From fd9fdba6c366f6a1606572e46eebf770462ff484 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 14 Apr 2014 19:04:46 +1000 Subject: xfs: remove unused bp arg from xfs_iflush_fork() Signed-off-by: Eric Sandeen Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 5e7a38fa6ee..b6825d01bed 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3371,9 +3371,9 @@ xfs_iflush_int( } } - xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); + xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); if (XFS_IFORK_Q(ip)) - xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); + xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); xfs_inobp_check(mp, bp); /* -- cgit v1.2.3-70-g09d2 From f37211c336d722805493aec8b13afdbb92bbfd98 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Apr 2014 07:11:51 +1000 Subject: xfs: remove XFS_IFILESTREAM We never test the flag except in xfs_inode_is_filestream, but that function already tests the on-disk flag or filesystem wide flags, and is used to decide if we want to set XFS_IFILESTREAM in the first place. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_filestream.c | 2 -- fs/xfs/xfs_filestream.h | 1 - fs/xfs/xfs_inode.c | 2 -- fs/xfs/xfs_inode.h | 4 +--- 4 files changed, 1 insertion(+), 8 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index dde529fa5eb..c422110c7c4 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -430,8 +430,6 @@ xfs_fstrm_free_func( container_of(mru, fstrm_item_t, mru); xfs_inode_t *ip = item->ip; - xfs_iflags_clear(ip, XFS_IFILESTREAM); - /* Drop the reference taken on the AG when the item was added. */ xfs_filestream_put_ag(ip->i_mount, item->ag); diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h index 6d61dbee856..c4fa9a0cd62 100644 --- a/fs/xfs/xfs_filestream.h +++ b/fs/xfs/xfs_filestream.h @@ -63,7 +63,6 @@ xfs_inode_is_filestream( struct xfs_inode *ip) { return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) || - xfs_iflags_test(ip, XFS_IFILESTREAM) || (ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 5e7a38fa6ee..3328320592a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -849,8 +849,6 @@ xfs_ialloc( error = xfs_filestream_associate(pip, ip); if (error < 0) return -error; - if (!error) - xfs_iflags_set(ip, XFS_IFILESTREAM); } *ipp = ip; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 396cc1fafd0..a7c2ebf05e4 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -209,7 +209,6 @@ xfs_get_initial_prid(struct xfs_inode *dp) #define XFS_ISTALE (1 << 1) /* inode has been staled */ #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ #define XFS_INEW (1 << 3) /* inode has just been allocated */ -#define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */ #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ #define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ @@ -225,8 +224,7 @@ xfs_get_initial_prid(struct xfs_inode *dp) */ #define XFS_IRECLAIM_RESET_FLAGS \ (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ - XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \ - XFS_IFILESTREAM); + XFS_IDIRTY_RELEASE | XFS_ITRUNCATED) /* * Synchronize processes attempting to flush the in-core inode back to disk. -- cgit v1.2.3-70-g09d2 From 2cd2ef6a300b1ac912bb515b75451585c3d33ea9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Apr 2014 07:11:51 +1000 Subject: xfs: rewrite the filestream allocator using the dentry cache In Linux we will always be able to find a parent inode for file that are undergoing I/O. Use this to simply the file stream allocator by only keeping track of parent inodes. Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_filestream.c | 660 ++++++++++++------------------------------------ fs/xfs/xfs_filestream.h | 31 +-- fs/xfs/xfs_inode.c | 24 +- 3 files changed, 171 insertions(+), 544 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index c422110c7c4..ff6f90215c8 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006-2007 Silicon Graphics, Inc. + * Copyright (c) 2014 Christoph Hellwig. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -32,101 +33,28 @@ #include "xfs_filestream.h" #include "xfs_trace.h" -#ifdef XFS_FILESTREAMS_TRACE - -ktrace_t *xfs_filestreams_trace_buf; - -STATIC void -xfs_filestreams_trace( - xfs_mount_t *mp, /* mount point */ - int type, /* type of trace */ - const char *func, /* source function */ - int line, /* source line number */ - __psunsigned_t arg0, - __psunsigned_t arg1, - __psunsigned_t arg2, - __psunsigned_t arg3, - __psunsigned_t arg4, - __psunsigned_t arg5) -{ - ktrace_enter(xfs_filestreams_trace_buf, - (void *)(__psint_t)(type | (line << 16)), - (void *)func, - (void *)(__psunsigned_t)current_pid(), - (void *)mp, - (void *)(__psunsigned_t)arg0, - (void *)(__psunsigned_t)arg1, - (void *)(__psunsigned_t)arg2, - (void *)(__psunsigned_t)arg3, - (void *)(__psunsigned_t)arg4, - (void *)(__psunsigned_t)arg5, - NULL, NULL, NULL, NULL, NULL, NULL); -} - -#define TRACE0(mp,t) TRACE6(mp,t,0,0,0,0,0,0) -#define TRACE1(mp,t,a0) TRACE6(mp,t,a0,0,0,0,0,0) -#define TRACE2(mp,t,a0,a1) TRACE6(mp,t,a0,a1,0,0,0,0) -#define TRACE3(mp,t,a0,a1,a2) TRACE6(mp,t,a0,a1,a2,0,0,0) -#define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0) -#define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0) -#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ - xfs_filestreams_trace(mp, t, __func__, __LINE__, \ - (__psunsigned_t)a0, (__psunsigned_t)a1, \ - (__psunsigned_t)a2, (__psunsigned_t)a3, \ - (__psunsigned_t)a4, (__psunsigned_t)a5) - -#define TRACE_AG_SCAN(mp, ag, ag2) \ - TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2); -#define TRACE_AG_PICK1(mp, max_ag, maxfree) \ - TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree); -#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \ - TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \ - cnt, free, scan, flag) -#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \ - TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2) -#define TRACE_FREE(mp, ip, pip, ag, cnt) \ - TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt) -#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \ - TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt) -#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \ - TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt) -#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \ - TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt) -#define TRACE_ORPHAN(mp, ip, ag) \ - TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag); - - -#else #define TRACE_AG_SCAN(mp, ag, ag2) #define TRACE_AG_PICK1(mp, max_ag, maxfree) #define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) -#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) #define TRACE_FREE(mp, ip, pip, ag, cnt) #define TRACE_LOOKUP(mp, ip, pip, ag, cnt) -#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) -#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) -#define TRACE_ORPHAN(mp, ip, ag) -#endif static kmem_zone_t *item_zone; -/* - * Structure for associating a file or a directory with an allocation group. - * The parent directory pointer is only needed for files, but since there will - * generally be vastly more files than directories in the cache, using the same - * data structure simplifies the code with very little memory overhead. - */ -typedef struct fstrm_item -{ - struct xfs_mru_cache_elem mru; - xfs_agnumber_t ag; /* AG currently in use for the file/directory. */ - xfs_inode_t *ip; /* inode self-pointer. */ - xfs_inode_t *pip; /* Parent directory inode pointer. */ -} fstrm_item_t; +struct xfs_fstrm_item { + struct xfs_mru_cache_elem mru; + struct xfs_inode *ip; + xfs_agnumber_t ag; /* AG in use for this directory */ +}; + +enum xfs_fstrm_alloc { + XFS_PICK_USERDATA = 1, + XFS_PICK_LOWSPACE = 2, +}; /* * Allocation group filestream associations are tracked with per-ag atomic - * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a + * counters. These counters allow xfs_filestream_pick_ag() to tell whether a * particular AG already has active filestreams associated with it. The mount * point's m_peraglock is used to protect these counters from per-ag array * re-allocation during a growfs operation. When xfs_growfs_data_private() is @@ -201,23 +129,42 @@ xfs_filestream_put_ag( xfs_perag_put(pag); } +static void +xfs_fstrm_free_func( + struct xfs_mru_cache_elem *mru) +{ + struct xfs_fstrm_item *item = + container_of(mru, struct xfs_fstrm_item, mru); + + xfs_filestream_put_ag(item->ip->i_mount, item->ag); + + TRACE_FREE(mp, ip, NULL, item->ag, + xfs_filestream_peek_ag(mp, item->ag)); + + kmem_zone_free(item_zone, item); +} + /* * Scan the AGs starting at startag looking for an AG that isn't in use and has * at least minlen blocks free. */ static int -_xfs_filestream_pick_ag( - xfs_mount_t *mp, - xfs_agnumber_t startag, - xfs_agnumber_t *agp, - int flags, - xfs_extlen_t minlen) +xfs_filestream_pick_ag( + struct xfs_inode *ip, + xfs_agnumber_t startag, + xfs_agnumber_t *agp, + int flags, + xfs_extlen_t minlen) { - int streams, max_streams; - int err, trylock, nscan; - xfs_extlen_t longest, free, minfree, maxfree = 0; - xfs_agnumber_t ag, max_ag = NULLAGNUMBER; - struct xfs_perag *pag; + struct xfs_mount *mp = ip->i_mount; + struct xfs_fstrm_item *item; + struct xfs_perag *pag; + xfs_extlen_t longest, free, minfree, maxfree = 0; + xfs_agnumber_t ag, max_ag = NULLAGNUMBER; + int streams, max_streams; + int err, trylock, nscan; + + ASSERT(S_ISDIR(ip->i_d.di_mode)); /* 2% of an AG's blocks must be free for it to be chosen. */ minfree = mp->m_sb.sb_agblocks / 50; @@ -321,205 +268,55 @@ next_ag: TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags); - return 0; -} - -/* - * Set the allocation group number for a file or a directory, updating inode - * references and per-AG references as appropriate. - */ -static int -_xfs_filestream_update_ag( - xfs_inode_t *ip, - xfs_inode_t *pip, - xfs_agnumber_t ag) -{ - int err = 0; - xfs_mount_t *mp; - fstrm_item_t *item; - xfs_agnumber_t old_ag; - xfs_inode_t *old_pip; - struct xfs_mru_cache_elem *mru; - - /* - * Either ip is a regular file and pip is a directory, or ip is a - * directory and pip is NULL. - */ - ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip && - S_ISDIR(pip->i_d.di_mode)) || - (S_ISDIR(ip->i_d.di_mode) && !pip))); - - mp = ip->i_mount; - - mru = xfs_mru_cache_lookup(mp->m_filestream, ip->i_ino); - if (mru) { - item = container_of(mru, fstrm_item_t, mru); - - ASSERT(item->ip == ip); - old_ag = item->ag; - item->ag = ag; - old_pip = item->pip; - item->pip = pip; - xfs_mru_cache_done(mp->m_filestream); - - /* - * If the AG has changed, drop the old ref and take a new one, - * effectively transferring the reference from old to new AG. - */ - if (ag != old_ag) { - xfs_filestream_put_ag(mp, old_ag); - xfs_filestream_get_ag(mp, ag); - } - - /* - * If ip is a file and its pip has changed, drop the old ref and - * take a new one. - */ - if (pip && pip != old_pip) { - IRELE(old_pip); - IHOLD(pip); - } - - TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag), - ag, xfs_filestream_peek_ag(mp, ag)); + if (*agp == NULLAGNUMBER) return 0; - } + err = ENOMEM; item = kmem_zone_zalloc(item_zone, KM_MAYFAIL); if (!item) - return ENOMEM; + goto out_put_ag; - item->ag = ag; + item->ag = *agp; item->ip = ip; - item->pip = pip; err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru); if (err) { - kmem_zone_free(item_zone, item); - return err; + if (err == EEXIST) + err = 0; + goto out_free_item; } - /* Take a reference on the AG. */ - xfs_filestream_get_ag(mp, ag); - - /* - * Take a reference on the inode itself regardless of whether it's a - * regular file or a directory. - */ - IHOLD(ip); - - /* - * In the case of a regular file, take a reference on the parent inode - * as well to ensure it remains in-core. - */ - if (pip) - IHOLD(pip); - - TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag), - ag, xfs_filestream_peek_ag(mp, ag)); - return 0; -} - -/* xfs_fstrm_free_func(): callback for freeing cached stream items. */ -STATIC void -xfs_fstrm_free_func( - struct xfs_mru_cache_elem *mru) -{ - fstrm_item_t *item = - container_of(mru, fstrm_item_t, mru); - xfs_inode_t *ip = item->ip; - - /* Drop the reference taken on the AG when the item was added. */ - xfs_filestream_put_ag(ip->i_mount, item->ag); - - TRACE_FREE(ip->i_mount, ip, item->pip, item->ag, - xfs_filestream_peek_ag(ip->i_mount, item->ag)); - - /* - * _xfs_filestream_update_ag() always takes a reference on the inode - * itself, whether it's a file or a directory. Release it here. - * This can result in the inode being freed and so we must - * not hold any inode locks when freeing filesstreams objects - * otherwise we can deadlock here. - */ - IRELE(ip); - /* - * In the case of a regular file, _xfs_filestream_update_ag() also - * takes a ref on the parent inode to keep it in-core. Release that - * too. - */ - if (item->pip) - IRELE(item->pip); - - /* Finally, free the memory allocated for the item. */ +out_free_item: kmem_zone_free(item_zone, item); +out_put_ag: + xfs_filestream_put_ag(mp, *agp); + return err; } -/* - * xfs_filestream_init() is called at xfs initialisation time to set up the - * memory zone that will be used for filestream data structure allocation. - */ -int -xfs_filestream_init(void) -{ - item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); - if (!item_zone) - return -ENOMEM; - - return 0; -} - -/* - * xfs_filestream_uninit() is called at xfs termination time to destroy the - * memory zone that was used for filestream data structure allocation. - */ -void -xfs_filestream_uninit(void) +static struct xfs_inode * +xfs_filestream_get_parent( + struct xfs_inode *ip) { - kmem_zone_destroy(item_zone); -} + struct inode *inode = VFS_I(ip), *dir = NULL; + struct dentry *dentry, *parent; -/* - * xfs_filestream_mount() is called when a file system is mounted with the - * filestream option. It is responsible for allocating the data structures - * needed to track the new file system's file streams. - */ -int -xfs_filestream_mount( - xfs_mount_t *mp) -{ - int err; - unsigned int lifetime, grp_count; + dentry = d_find_alias(inode); + if (!dentry) + goto out; - /* - * The filestream timer tunable is currently fixed within the range of - * one second to four minutes, with five seconds being the default. The - * group count is somewhat arbitrary, but it'd be nice to adhere to the - * timer tunable to within about 10 percent. This requires at least 10 - * groups. - */ - lifetime = xfs_fstrm_centisecs * 10; - grp_count = 10; + parent = dget_parent(dentry); + if (!parent) + goto out_dput; - err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count, - xfs_fstrm_free_func); + dir = igrab(parent->d_inode); + dput(parent); - return err; -} - -/* - * xfs_filestream_unmount() is called when a file system that was mounted with - * the filestream option is unmounted. It drains the data structures created - * to track the file system's file streams and frees all the memory that was - * allocated. - */ -void -xfs_filestream_unmount( - xfs_mount_t *mp) -{ - xfs_mru_cache_destroy(mp->m_filestream); +out_dput: + dput(dentry); +out: + return dir ? XFS_I(dir) : NULL; } /* @@ -528,94 +325,61 @@ xfs_filestream_unmount( */ xfs_agnumber_t xfs_filestream_lookup_ag( - xfs_inode_t *ip) + struct xfs_inode *ip) { - struct xfs_mount *mp = ip->i_mount; + struct xfs_mount *mp = ip->i_mount; + struct xfs_fstrm_item *item; + struct xfs_inode *pip = NULL; + xfs_agnumber_t ag = NULLAGNUMBER; + int ref = 0; struct xfs_mru_cache_elem *mru; - fstrm_item_t *item; - xfs_agnumber_t ag; - int ref; - if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) { - ASSERT(0); - return NULLAGNUMBER; - } + ASSERT(S_ISREG(ip->i_d.di_mode)); - mru = xfs_mru_cache_lookup(mp->m_filestream, ip->i_ino); - if (!mru) { - TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0); - return NULLAGNUMBER; - } + pip = xfs_filestream_get_parent(ip); + if (!pip) + goto out; + + mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); + if (!mru) + goto out; + + item = container_of(mru, struct xfs_fstrm_item, mru); - item = container_of(mru, fstrm_item_t, mru); - ASSERT(ip == item->ip); ag = item->ag; - ref = xfs_filestream_peek_ag(ip->i_mount, ag); xfs_mru_cache_done(mp->m_filestream); - TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref); + ref = xfs_filestream_peek_ag(ip->i_mount, ag); +out: + TRACE_LOOKUP(mp, ip, pip, ag, ref); + IRELE(pip); return ag; } /* - * xfs_filestream_associate() should only be called to associate a regular file - * with its parent directory. Calling it with a child directory isn't - * appropriate because filestreams don't apply to entire directory hierarchies. - * Creating a file in a child directory of an existing filestream directory - * starts a new filestream with its own allocation group association. + * Make sure a directory has a filestream associated with it. * - * Returns < 0 on error, 0 if successful association occurred, > 0 if - * we failed to get an association because of locking issues. + * This is called when creating regular files in an directory that has + * filestreams enabled, so that a stream is ready by the time we need it + * in the allocator for the files inside the directory. */ int xfs_filestream_associate( - xfs_inode_t *pip, - xfs_inode_t *ip) + struct xfs_inode *pip) { + struct xfs_mount *mp = pip->i_mount; struct xfs_mru_cache_elem *mru; - xfs_mount_t *mp; - fstrm_item_t *item; - xfs_agnumber_t ag, rotorstep, startag; - int err = 0; + xfs_agnumber_t startag, ag; ASSERT(S_ISDIR(pip->i_d.di_mode)); - ASSERT(S_ISREG(ip->i_d.di_mode)); - if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode)) - return -EINVAL; - - mp = pip->i_mount; /* - * We have a problem, Houston. - * - * Taking the iolock here violates inode locking order - we already - * hold the ilock. Hence if we block getting this lock we may never - * wake. Unfortunately, that means if we can't get the lock, we're - * screwed in terms of getting a stream association - we can't spin - * waiting for the lock because someone else is waiting on the lock we - * hold and we cannot drop that as we are in a transaction here. - * - * Lucky for us, this inversion is not a problem because it's a - * directory inode that we are trying to lock here. - * - * So, if we can't get the iolock without sleeping then just give up + * If the directory already has a file stream associated we're done. */ - if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) - return 1; - - /* If the parent directory is already in the cache, use its AG. */ mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); if (mru) { - item = container_of(mru, fstrm_item_t, mru); - - ASSERT(item->ip == pip); - ag = item->ag; xfs_mru_cache_done(mp->m_filestream); - - TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag)); - err = _xfs_filestream_update_ag(ip, pip, ag); - - goto exit; + return 0; } /* @@ -623,201 +387,107 @@ xfs_filestream_associate( * use the directory inode's AG. */ if (mp->m_flags & XFS_MOUNT_32BITINODES) { - rotorstep = xfs_rotorstep; + xfs_agnumber_t rotorstep = xfs_rotorstep; startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; mp->m_agfrotor = (mp->m_agfrotor + 1) % (mp->m_sb.sb_agcount * rotorstep); } else startag = XFS_INO_TO_AGNO(mp, pip->i_ino); - /* Pick a new AG for the parent inode starting at startag. */ - err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0); - if (err || ag == NULLAGNUMBER) - goto exit_did_pick; - - /* Associate the parent inode with the AG. */ - err = _xfs_filestream_update_ag(pip, NULL, ag); - if (err) - goto exit_did_pick; - - /* Associate the file inode with the AG. */ - err = _xfs_filestream_update_ag(ip, pip, ag); - if (err) - goto exit_did_pick; - - TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag)); - -exit_did_pick: - /* - * If _xfs_filestream_pick_ag() returned a valid AG, remove the - * reference it took on it, since the file and directory will have taken - * their own now if they were successfully cached. - */ - if (ag != NULLAGNUMBER) - xfs_filestream_put_ag(mp, ag); - -exit: - xfs_iunlock(pip, XFS_IOLOCK_EXCL); - return -err; + return xfs_filestream_pick_ag(pip, startag, &ag, 0, 0); } /* - * Pick a new allocation group for the current file and its file stream. This - * function is called by xfs_bmap_filestreams() with the mount point's per-ag - * lock held. + * Pick a new allocation group for the current file and its file stream. + * + * This is called when the allocator can't find a suitable extent in the + * current AG, and we have to move the stream into a new AG with more space. */ int xfs_filestream_new_ag( struct xfs_bmalloca *ap, xfs_agnumber_t *agp) { - struct xfs_mru_cache_elem *mru, *mru2; - int flags, err; - xfs_inode_t *ip, *pip = NULL; - xfs_mount_t *mp; - xfs_extlen_t minlen; - fstrm_item_t *dir, *file; - xfs_agnumber_t ag = NULLAGNUMBER; - - ip = ap->ip; - mp = ip->i_mount; - minlen = ap->length; - *agp = NULLAGNUMBER; - - /* - * Look for the file in the cache, removing it if it's found. Doing - * this allows it to be held across the dir lookup that follows. - */ - mru = xfs_mru_cache_remove(mp->m_filestream, ip->i_ino); - if (mru) { - file = container_of(mru, fstrm_item_t, mru); - ASSERT(ip == file->ip); - - /* Save the file's parent inode and old AG number for later. */ - pip = file->pip; - ag = file->ag; - - /* Look for the file's directory in the cache. */ - mru2 = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); - if (mru2) { - dir = container_of(mru2, fstrm_item_t, mru); - ASSERT(pip == dir->ip); - - /* - * If the directory has already moved on to a new AG, - * use that AG as the new AG for the file. Don't - * forget to twiddle the AG refcounts to match the - * movement. - */ - if (dir->ag != file->ag) { - xfs_filestream_put_ag(mp, file->ag); - xfs_filestream_get_ag(mp, dir->ag); - *agp = file->ag = dir->ag; - } + struct xfs_inode *ip = ap->ip, *pip; + struct xfs_mount *mp = ip->i_mount; + xfs_extlen_t minlen = ap->length; + xfs_agnumber_t startag = 0; + int flags, err = 0; + struct xfs_mru_cache_elem *mru; - xfs_mru_cache_done(mp->m_filestream); - } + *agp = NULLAGNUMBER; - /* - * Put the file back in the cache. If this fails, the free - * function needs to be called to tidy up in the same way as if - * the item had simply expired from the cache. - */ - err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, mru); - if (err) { - xfs_fstrm_free_func(mru); - return err; - } + pip = xfs_filestream_get_parent(ip); + if (!pip) + goto exit; - /* - * If the file's AG was moved to the directory's new AG, there's - * nothing more to be done. - */ - if (*agp != NULLAGNUMBER) { - TRACE_MOVEAG(mp, ip, pip, - ag, xfs_filestream_peek_ag(mp, ag), - *agp, xfs_filestream_peek_ag(mp, *agp)); - return 0; - } + mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino); + if (mru) { + struct xfs_fstrm_item *item = + container_of(mru, struct xfs_fstrm_item, mru); + startag = (item->ag + 1) % mp->m_sb.sb_agcount; } - /* - * If the file's parent directory is known, take its iolock in exclusive - * mode to prevent two sibling files from racing each other to migrate - * themselves and their parent to different AGs. - * - * Note that we lock the parent directory iolock inside the child - * iolock here. That's fine as we never hold both parent and child - * iolock in any other place. This is different from the ilock, - * which requires locking of the child after the parent for namespace - * operations. - */ - if (pip) - xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); - - /* - * A new AG needs to be found for the file. If the file's parent - * directory is also known, it will be moved to the new AG as well to - * ensure that files created inside it in future use the new AG. - */ - ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount; flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0); - err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen); - if (err || *agp == NULLAGNUMBER) - goto exit; + err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen); /* - * If the file wasn't found in the file cache, then its parent directory - * inode isn't known. For this to have happened, the file must either - * be pre-existing, or it was created long enough ago that its cache - * entry has expired. This isn't the sort of usage that the filestreams - * allocator is trying to optimise, so there's no point trying to track - * its new AG somehow in the filestream data structures. + * Only free the item here so we skip over the old AG earlier. */ - if (!pip) { - TRACE_ORPHAN(mp, ip, *agp); - goto exit; - } + if (mru) + xfs_fstrm_free_func(mru); - /* Associate the parent inode with the AG. */ - err = _xfs_filestream_update_ag(pip, NULL, *agp); - if (err) - goto exit; - - /* Associate the file inode with the AG. */ - err = _xfs_filestream_update_ag(ip, pip, *agp); - if (err) - goto exit; + IRELE(pip); +exit: + if (*agp == NULLAGNUMBER) + *agp = 0; + return err; +} - TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0, - *agp, xfs_filestream_peek_ag(mp, *agp)); +void +xfs_filestream_deassociate( + struct xfs_inode *ip) +{ + xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino); +} -exit: +int +xfs_filestream_mount( + xfs_mount_t *mp) +{ /* - * If _xfs_filestream_pick_ag() returned a valid AG, remove the - * reference it took on it, since the file and directory will have taken - * their own now if they were successfully cached. + * The filestream timer tunable is currently fixed within the range of + * one second to four minutes, with five seconds being the default. The + * group count is somewhat arbitrary, but it'd be nice to adhere to the + * timer tunable to within about 10 percent. This requires at least 10 + * groups. */ - if (*agp != NULLAGNUMBER) - xfs_filestream_put_ag(mp, *agp); - else - *agp = 0; + return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10, + 10, xfs_fstrm_free_func); +} - if (pip) - xfs_iunlock(pip, XFS_IOLOCK_EXCL); +void +xfs_filestream_unmount( + xfs_mount_t *mp) +{ + xfs_mru_cache_destroy(mp->m_filestream); +} - return err; + +/* needs to return a positive errno for the init path */ +int +xfs_filestream_init(void) +{ + item_zone = kmem_zone_init(sizeof(struct xfs_fstrm_item), "fstrm_item"); + if (!item_zone) + return -ENOMEM; + return 0; } -/* - * Remove an association between an inode and a filestream object. - * Typically this is done on last close of an unlinked file. - */ void -xfs_filestream_deassociate( - xfs_inode_t *ip) +xfs_filestream_uninit(void) { - xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino); + kmem_zone_destroy(item_zone); } diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h index c4fa9a0cd62..e3a25f891d0 100644 --- a/fs/xfs/xfs_filestream.h +++ b/fs/xfs/xfs_filestream.h @@ -20,44 +20,17 @@ struct xfs_mount; struct xfs_inode; -struct xfs_perag; struct xfs_bmalloca; -#ifdef XFS_FILESTREAMS_TRACE -#define XFS_FSTRM_KTRACE_INFO 1 -#define XFS_FSTRM_KTRACE_AGSCAN 2 -#define XFS_FSTRM_KTRACE_AGPICK1 3 -#define XFS_FSTRM_KTRACE_AGPICK2 4 -#define XFS_FSTRM_KTRACE_UPDATE 5 -#define XFS_FSTRM_KTRACE_FREE 6 -#define XFS_FSTRM_KTRACE_ITEM_LOOKUP 7 -#define XFS_FSTRM_KTRACE_ASSOCIATE 8 -#define XFS_FSTRM_KTRACE_MOVEAG 9 -#define XFS_FSTRM_KTRACE_ORPHAN 10 - -#define XFS_FSTRM_KTRACE_SIZE 16384 -extern ktrace_t *xfs_filestreams_trace_buf; - -#endif - -/* allocation selection flags */ -typedef enum xfs_fstrm_alloc { - XFS_PICK_USERDATA = 1, - XFS_PICK_LOWSPACE = 2, -} xfs_fstrm_alloc_t; - -/* prototypes for filestream.c */ int xfs_filestream_init(void); void xfs_filestream_uninit(void); int xfs_filestream_mount(struct xfs_mount *mp); void xfs_filestream_unmount(struct xfs_mount *mp); -xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); -int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip); void xfs_filestream_deassociate(struct xfs_inode *ip); +xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); +int xfs_filestream_associate(struct xfs_inode *dip); int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp); - -/* filestreams for the inode? */ static inline int xfs_inode_is_filestream( struct xfs_inode *ip) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3328320592a..b9b531f7fa3 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -846,9 +846,9 @@ xfs_ialloc( /* now we have set up the vfs inode we can associate the filestream */ if (filestreams) { - error = xfs_filestream_associate(pip, ip); - if (error < 0) - return -error; + error = xfs_filestream_associate(pip); + if (error) + return error; } *ipp = ip; @@ -1695,16 +1695,6 @@ xfs_release( if (!XFS_FORCED_SHUTDOWN(mp)) { int truncated; - /* - * If we are using filestreams, and we have an unlinked - * file that we are processing the last close on, then nothing - * will be able to reopen and write to this file. Purge this - * inode from the filestreams cache so that it doesn't delay - * teardown of the inode. - */ - if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) - xfs_filestream_deassociate(ip); - /* * If we previously truncated this file and removed old data * in the process, we want to initiate "early" writeout on @@ -2661,13 +2651,7 @@ xfs_remove( if (error) goto std_return; - /* - * If we are using filestreams, kill the stream association. - * If the file is still open it may get a new one but that - * will get killed on last close in xfs_close() so we don't - * have to worry about that. - */ - if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) + if (is_dir && xfs_inode_is_filestream(ip)) xfs_filestream_deassociate(ip); return 0; -- cgit v1.2.3-70-g09d2 From 3b8d90766a85e079fefaee74ca9dde43ce75edea Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Apr 2014 07:11:52 +1000 Subject: xfs: remove xfs_filestream_associate There is no good reason to create a filestream when a directory entry is created. Delay it until the first allocation happens to simply the code and reduce the amount of mru cache lookups we do. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_filestream.c | 57 +++++++++++++------------------------------------ fs/xfs/xfs_filestream.h | 1 - fs/xfs/xfs_inode.c | 15 ------------- 3 files changed, 15 insertions(+), 58 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 7b940369001..c8a8840e502 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -318,17 +318,18 @@ out: } /* - * Return the AG of the filestream the file or directory belongs to, or - * NULLAGNUMBER otherwise. + * Find the right allocation group for a file, either by finding an + * existing file stream or creating a new one. + * + * Returns NULLAGNUMBER in case of an error. */ xfs_agnumber_t xfs_filestream_lookup_ag( struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; - struct xfs_fstrm_item *item; struct xfs_inode *pip = NULL; - xfs_agnumber_t ag = NULLAGNUMBER; + xfs_agnumber_t startag, ag = NULLAGNUMBER; int ref = 0; struct xfs_mru_cache_elem *mru; @@ -339,45 +340,13 @@ xfs_filestream_lookup_ag( goto out; mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); - if (!mru) - goto out; - - item = container_of(mru, struct xfs_fstrm_item, mru); - - ag = item->ag; - xfs_mru_cache_done(mp->m_filestream); - - ref = xfs_filestream_peek_ag(ip->i_mount, ag); -out: - TRACE_LOOKUP(mp, ip, pip, ag, ref); - IRELE(pip); - return ag; -} - -/* - * Make sure a directory has a filestream associated with it. - * - * This is called when creating regular files in an directory that has - * filestreams enabled, so that a stream is ready by the time we need it - * in the allocator for the files inside the directory. - */ -int -xfs_filestream_associate( - struct xfs_inode *pip) -{ - struct xfs_mount *mp = pip->i_mount; - struct xfs_mru_cache_elem *mru; - xfs_agnumber_t startag, ag; - - ASSERT(S_ISDIR(pip->i_d.di_mode)); - - /* - * If the directory already has a file stream associated we're done. - */ - mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); if (mru) { + ag = container_of(mru, struct xfs_fstrm_item, mru)->ag; xfs_mru_cache_done(mp->m_filestream); - return 0; + + ref = xfs_filestream_peek_ag(ip->i_mount, ag); + TRACE_LOOKUP(mp, ip, pip, ag, ref); + goto out; } /* @@ -392,7 +361,11 @@ xfs_filestream_associate( } else startag = XFS_INO_TO_AGNO(mp, pip->i_ino); - return xfs_filestream_pick_ag(pip, startag, &ag, 0, 0); + if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0)) + ag = NULLAGNUMBER; +out: + IRELE(pip); + return ag; } /* diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h index 578d49e7cff..2de853e03bf 100644 --- a/fs/xfs/xfs_filestream.h +++ b/fs/xfs/xfs_filestream.h @@ -26,7 +26,6 @@ int xfs_filestream_mount(struct xfs_mount *mp); void xfs_filestream_unmount(struct xfs_mount *mp); void xfs_filestream_deassociate(struct xfs_inode *ip); xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); -int xfs_filestream_associate(struct xfs_inode *dip); int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp); static inline int diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b9b531f7fa3..cec18e9ba1d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -655,7 +655,6 @@ xfs_ialloc( uint flags; int error; timespec_t tv; - int filestreams = 0; /* * Call the space management code to pick @@ -772,13 +771,6 @@ xfs_ialloc( flags |= XFS_ILOG_DEV; break; case S_IFREG: - /* - * we can't set up filestreams until after the VFS inode - * is set up properly. - */ - if (pip && xfs_inode_is_filestream(pip)) - filestreams = 1; - /* fall through */ case S_IFDIR: if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { uint di_flags = 0; @@ -844,13 +836,6 @@ xfs_ialloc( /* now that we have an i_mode we can setup inode ops and unlock */ xfs_setup_inode(ip); - /* now we have set up the vfs inode we can associate the filestream */ - if (filestreams) { - error = xfs_filestream_associate(pip); - if (error) - return error; - } - *ipp = ip; return 0; } -- cgit v1.2.3-70-g09d2 From 9d43b180af67cccd4bd1342f7f54f8131515b0a1 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 24 Apr 2014 16:00:52 +1000 Subject: xfs: update inode allocation/free transaction reservations for finobt Create the xfs_calc_finobt_res() helper to calculate the finobt log reservation for inode allocation and free. Update XFS_IALLOC_SPACE_RES() to reserve blocks for the additional finobt insertion on inode allocation. Create XFS_IFREE_SPACE_RES() to reserve blocks for the potential finobt record insertion on inode free (i.e., if an inode chunk was previously fully allocated). Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_inode.c | 28 +++++++++++++++++++++++-- fs/xfs/xfs_trans_resv.c | 53 +++++++++++++++++++++++++++++++++++++++++++++--- fs/xfs/xfs_trans_space.h | 7 ++++++- 3 files changed, 82 insertions(+), 6 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 5e7a38fa6ee..a9079c14204 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1837,9 +1837,33 @@ xfs_inactive_ifree( int error; tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); + + /* + * The ifree transaction might need to allocate blocks for record + * insertion to the finobt. We don't want to fail here at ENOSPC, so + * allow ifree to dip into the reserved block pool if necessary. + * + * Freeing large sets of inodes generally means freeing inode chunks, + * directory and file data blocks, so this should be relatively safe. + * Only under severe circumstances should it be possible to free enough + * inodes to exhaust the reserve block pool via finobt expansion while + * at the same time not creating free space in the filesystem. + * + * Send a warning if the reservation does happen to fail, as the inode + * now remains allocated and sits on the unlinked list until the fs is + * repaired. + */ + tp->t_flags |= XFS_TRANS_RESERVE; + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, + XFS_IFREE_SPACE_RES(mp), 0); if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); + if (error == ENOSPC) { + xfs_warn_ratelimited(mp, + "Failed to remove inode(s) from unlinked list. " + "Please free space, unmount and run xfs_repair."); + } else { + ASSERT(XFS_FORCED_SHUTDOWN(mp)); + } xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); return error; } diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index ae368165244..52b6c3e3203 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c @@ -105,6 +105,47 @@ xfs_calc_inode_res( 2 * XFS_BMBT_BLOCK_LEN(mp)); } +/* + * The free inode btree is a conditional feature and the log reservation + * requirements differ slightly from that of the traditional inode allocation + * btree. The finobt tracks records for inode chunks with at least one free + * inode. A record can be removed from the tree for an inode allocation + * or free and thus the finobt reservation is unconditional across: + * + * - inode allocation + * - inode free + * - inode chunk allocation + * + * The 'modify' param indicates to include the record modification scenario. The + * 'alloc' param indicates to include the reservation for free space btree + * modifications on behalf of finobt modifications. This is required only for + * transactions that do not already account for free space btree modifications. + * + * the free inode btree: max depth * block size + * the allocation btrees: 2 trees * (max depth - 1) * block size + * the free inode btree entry: block size + */ +STATIC uint +xfs_calc_finobt_res( + struct xfs_mount *mp, + int alloc, + int modify) +{ + uint res; + + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + return 0; + + res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); + if (alloc) + res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); + if (modify) + res += (uint)XFS_FSB_TO_B(mp, 1); + + return res; +} + /* * Various log reservation values. * @@ -302,6 +343,7 @@ xfs_calc_remove_reservation( * the superblock for the nlink flag: sector size * the directory btree: (max depth + v2) * dir block size * the directory inode's bmap btree: (max depth + v2) * block size + * the finobt (record modification and allocation btrees) */ STATIC uint xfs_calc_create_resv_modify( @@ -310,7 +352,8 @@ xfs_calc_create_resv_modify( return xfs_calc_inode_res(mp, 2) + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + (uint)XFS_FSB_TO_B(mp, 1) + - xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + + xfs_calc_finobt_res(mp, 1, 1); } /* @@ -348,6 +391,7 @@ __xfs_calc_create_reservation( * the superblock for the nlink flag: sector size * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size + * the finobt (record insertion) */ STATIC uint xfs_calc_icreate_resv_alloc( @@ -357,7 +401,8 @@ xfs_calc_icreate_resv_alloc( mp->m_sb.sb_sectsize + xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)); + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_finobt_res(mp, 0, 0); } STATIC uint @@ -425,6 +470,7 @@ xfs_calc_symlink_reservation( * the on disk inode before ours in the agi hash list: inode cluster size * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size + * the finobt (record insertion, removal or modification) */ STATIC uint xfs_calc_ifree_reservation( @@ -439,7 +485,8 @@ xfs_calc_ifree_reservation( xfs_calc_buf_res(2 + mp->m_ialloc_blks + mp->m_in_maxlevels, 0) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)); + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_finobt_res(mp, 0, 1); } /* diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h index af5dbe06cb6..df4c1f81884 100644 --- a/fs/xfs/xfs_trans_space.h +++ b/fs/xfs/xfs_trans_space.h @@ -47,7 +47,9 @@ #define XFS_DIRREMOVE_SPACE_RES(mp) \ XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) #define XFS_IALLOC_SPACE_RES(mp) \ - ((mp)->m_ialloc_blks + (mp)->m_in_maxlevels - 1) + ((mp)->m_ialloc_blks + \ + (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \ + ((mp)->m_in_maxlevels - 1))) /* * Space reservation values for various transactions. @@ -82,5 +84,8 @@ (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) #define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) +#define XFS_IFREE_SPACE_RES(mp) \ + (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0) + #endif /* __XFS_TRANS_SPACE_H__ */ -- cgit v1.2.3-70-g09d2 From 263997a6842b27a49f42bd795c5dd12242917b22 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 20 May 2014 07:46:40 +1000 Subject: xfs: turn NLINK feature on by default mkfs has turned on the XFS_SB_VERSION_NLINKBIT feature bit by default since November 2007. It's about time we simply made the kernel code turn it on by default and so always convert v1 inodes to v2 inodes when reading them in from disk or allocating them. This This removes needless version checks and modification when bumping link counts on inodes, and will take code out of a few common code paths. text data bss dec hex filename 783251 100867 616 884734 d7ffe fs/xfs/xfs.o.orig 782664 100867 616 884147 d7db3 fs/xfs/xfs.o.patched Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_fsops.c | 4 +- fs/xfs/xfs_ialloc.c | 4 +- fs/xfs/xfs_inode.c | 111 +++++------------------------------------------- fs/xfs/xfs_inode.h | 1 - fs/xfs/xfs_inode_buf.c | 17 ++++---- fs/xfs/xfs_inode_item.c | 32 +------------- fs/xfs/xfs_ioctl.c | 9 +--- fs/xfs/xfs_mount.c | 6 +++ fs/xfs/xfs_sb.h | 10 ----- 9 files changed, 30 insertions(+), 164 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 02fb943cbf2..d49c67acabf 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -74,11 +74,9 @@ xfs_fs_geometry( } if (new_version >= 3) { geo->version = XFS_FSOP_GEOM_VERSION; - geo->flags = + geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | (xfs_sb_version_hasattr(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_ATTR : 0) | - (xfs_sb_version_hasnlink(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_NLINK : 0) | (xfs_sb_version_hasquota(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_QUOTA : 0) | (xfs_sb_version_hasalign(&mp->m_sb) ? diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 8f711db61a0..449fa7b08b5 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -220,10 +220,8 @@ xfs_ialloc_inode_init( if (tp) xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos, mp->m_sb.sb_inodesize, length, gen); - } else if (xfs_sb_version_hasnlink(&mp->m_sb)) + } else version = 2; - else - version = 1; for (j = 0; j < nbufs; j++) { /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 768087bedba..9731977be52 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -682,6 +682,14 @@ xfs_ialloc( return error; ASSERT(ip != NULL); + /* + * We always convert v1 inodes to v2 now - we only support filesystems + * with >= v2 inode capability, so there is no reason for ever leaving + * an inode in v1 format. + */ + if (ip->i_d.di_version == 1) + ip->i_d.di_version = 2; + ip->i_d.di_mode = mode; ip->i_d.di_onlink = 0; ip->i_d.di_nlink = nlink; @@ -691,27 +699,6 @@ xfs_ialloc( xfs_set_projid(ip, prid); memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - /* - * If the superblock version is up to where we support new format - * inodes and this is currently an old format inode, then change - * the inode version number now. This way we only do the conversion - * here rather than here and in the flush/logging code. - */ - if (xfs_sb_version_hasnlink(&mp->m_sb) && - ip->i_d.di_version == 1) { - ip->i_d.di_version = 2; - /* - * We've already zeroed the old link count, the projid field, - * and the pad field. - */ - } - - /* - * Project ids won't be stored on disk if we are using a version 1 inode. - */ - if ((prid != 0) && (ip->i_d.di_version == 1)) - xfs_bump_ino_vers2(tp, ip); - if (pip && XFS_INHERIT_GID(pip)) { ip->i_d.di_gid = pip->i_d.di_gid; if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) { @@ -1072,40 +1059,6 @@ xfs_droplink( return error; } -/* - * This gets called when the inode's version needs to be changed from 1 to 2. - * Currently this happens when the nlink field overflows the old 16-bit value - * or when chproj is called to change the project for the first time. - * As a side effect the superblock version will also get rev'd - * to contain the NLINK bit. - */ -void -xfs_bump_ino_vers2( - xfs_trans_t *tp, - xfs_inode_t *ip) -{ - xfs_mount_t *mp; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(ip->i_d.di_version == 1); - - ip->i_d.di_version = 2; - ip->i_d.di_onlink = 0; - memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - mp = tp->t_mountp; - if (!xfs_sb_version_hasnlink(&mp->m_sb)) { - spin_lock(&mp->m_sb_lock); - if (!xfs_sb_version_hasnlink(&mp->m_sb)) { - xfs_sb_version_addnlink(&mp->m_sb); - spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, XFS_SB_VERSIONNUM); - } else { - spin_unlock(&mp->m_sb_lock); - } - } - /* Caller must log the inode */ -} - /* * Increment the link count on an inode & log the change. */ @@ -1116,22 +1069,10 @@ xfs_bumplink( { xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); + ASSERT(ip->i_d.di_version > 1); ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE)); ip->i_d.di_nlink++; inc_nlink(VFS_I(ip)); - if ((ip->i_d.di_version == 1) && - (ip->i_d.di_nlink > XFS_MAXLINK_1)) { - /* - * The inode has increased its number of links beyond - * what can fit in an old format inode. It now needs - * to be converted to a version 2 inode with a 32 bit - * link count. If this is the first inode in the file - * system to do this, then we need to bump the superblock - * version number as well. - */ - xfs_bump_ino_vers2(tp, ip); - } - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); return 0; } @@ -3258,6 +3199,7 @@ xfs_iflush_int( ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); ASSERT(iip != NULL && iip->ili_fields != 0); + ASSERT(ip->i_d.di_version > 1); /* set *dip = inode's place in the buffer */ dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); @@ -3318,7 +3260,7 @@ xfs_iflush_int( } /* - * Inode item log recovery for v1/v2 inodes are dependent on the + * Inode item log recovery for v2 inodes are dependent on the * di_flushiter count for correct sequencing. We bump the flush * iteration count so we can detect flushes which postdate a log record * during recovery. This is redundant as we now log every change and @@ -3341,37 +3283,6 @@ xfs_iflush_int( if (ip->i_d.di_flushiter == DI_MAX_FLUSH) ip->i_d.di_flushiter = 0; - /* - * If this is really an old format inode and the superblock version - * has not been updated to support only new format inodes, then - * convert back to the old inode format. If the superblock version - * has been updated, then make the conversion permanent. - */ - ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); - if (ip->i_d.di_version == 1) { - if (!xfs_sb_version_hasnlink(&mp->m_sb)) { - /* - * Convert it back. - */ - ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); - dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); - } else { - /* - * The superblock version has already been bumped, - * so just make the conversion to the new inode - * format permanent. - */ - ip->i_d.di_version = 2; - dip->di_version = 2; - ip->i_d.di_onlink = 0; - dip->di_onlink = 0; - memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - memset(&(dip->di_pad[0]), 0, - sizeof(dip->di_pad)); - ASSERT(xfs_get_projid(ip) == 0); - } - } - xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); if (XFS_IFORK_Q(ip)) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f2fcde52b66..d331a0e040e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -379,7 +379,6 @@ int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, struct xfs_inode **, int *); int xfs_droplink(struct xfs_trans *, struct xfs_inode *); int xfs_bumplink(struct xfs_trans *, struct xfs_inode *); -void xfs_bump_ino_vers2(struct xfs_trans *, struct xfs_inode *); /* from xfs_file.c */ int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c index 24e993996bd..cb35ae41d4a 100644 --- a/fs/xfs/xfs_inode_buf.c +++ b/fs/xfs/xfs_inode_buf.c @@ -437,17 +437,16 @@ xfs_iread( } /* - * The inode format changed when we moved the link count and - * made it 32 bits long. If this is an old format inode, - * convert it in memory to look like a new one. If it gets - * flushed to disk we will convert back before flushing or - * logging it. We zero out the new projid field and the old link - * count field. We'll handle clearing the pad field (the remains - * of the old uuid field) when we actually convert the inode to - * the new format. We don't change the version number so that we - * can distinguish this from a real new format inode. + * Automatically convert version 1 inode formats in memory to version 2 + * inode format. If the inode is modified, it will get logged and + * rewritten as a version 2 inode. We can do this because we set the + * superblock feature bit for v2 inodes unconditionally during mount + * and it means the reast of the code can assume the inode version is 2 + * or higher. */ if (ip->i_d.di_version == 1) { + ip->i_d.di_version = 2; + memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); ip->i_d.di_nlink = ip->i_d.di_onlink; ip->i_d.di_onlink = 0; xfs_set_projid(ip, 0); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 686889b4a1e..a640137b357 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -145,34 +145,6 @@ xfs_inode_item_size( xfs_inode_item_attr_fork_size(iip, nvecs, nbytes); } -/* - * If this is a v1 format inode, then we need to log it as such. This means - * that we have to copy the link count from the new field to the old. We - * don't have to worry about the new fields, because nothing trusts them as - * long as the old inode version number is there. - */ -STATIC void -xfs_inode_item_format_v1_inode( - struct xfs_inode *ip) -{ - if (!xfs_sb_version_hasnlink(&ip->i_mount->m_sb)) { - /* - * Convert it back. - */ - ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); - ip->i_d.di_onlink = ip->i_d.di_nlink; - } else { - /* - * The superblock version has already been bumped, - * so just make the conversion to the new inode - * format permanent. - */ - ip->i_d.di_version = 2; - ip->i_d.di_onlink = 0; - memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - } -} - STATIC void xfs_inode_item_format_data_fork( struct xfs_inode_log_item *iip, @@ -370,6 +342,8 @@ xfs_inode_item_format( struct xfs_inode_log_format *ilf; struct xfs_log_iovec *vecp = NULL; + ASSERT(ip->i_d.di_version > 1); + ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT); ilf->ilf_type = XFS_LI_INODE; ilf->ilf_ino = ip->i_ino; @@ -380,8 +354,6 @@ xfs_inode_item_format( ilf->ilf_size = 2; /* format + core */ xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format)); - if (ip->i_d.di_version == 1) - xfs_inode_item_format_v1_inode(ip); xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE, &ip->i_d, xfs_icdinode_size(ip->i_d.di_version)); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0b18776b075..f920d420020 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1227,15 +1227,8 @@ xfs_ioctl_setattr( olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_pdquot, pdqp); } + ASSERT(ip->i_d.di_version > 1); xfs_set_projid(ip, fa->fsx_projid); - - /* - * We may have to rev the inode as well as - * the superblock version number since projids didn't - * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. - */ - if (ip->i_d.di_version == 1) - xfs_bump_ino_vers2(tp, ip); } } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 944f3d9456a..3f097825eff 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -697,6 +697,12 @@ xfs_mountfs( mp->m_update_flags |= XFS_SB_VERSIONNUM; } + /* always use v2 inodes by default now */ + if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { + mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT; + mp->m_update_flags |= XFS_SB_VERSIONNUM; + } + /* * Check if sb_agblocks is aligned at stripe boundary * If sb_agblocks is NOT aligned turn off m_dalign since diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index a6a76f41aad..a2826cf57d7 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -376,16 +376,6 @@ static inline void xfs_sb_version_addattr(struct xfs_sb *sbp) sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; } -static inline bool xfs_sb_version_hasnlink(struct xfs_sb *sbp) -{ - return (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT); -} - -static inline void xfs_sb_version_addnlink(struct xfs_sb *sbp) -{ - sbp->sb_versionnum |= XFS_SB_VERSION_NLINKBIT; -} - static inline bool xfs_sb_version_hasquota(struct xfs_sb *sbp) { return (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); -- cgit v1.2.3-70-g09d2