From 75c68f411b1242c8fdaf731078fdd4e77b14981d Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:06:28 +1100 Subject: [XFS] Remove xfs_iflush_all and clean up xfs_finish_reclaim_all() xfs_iflush_all() walks the m_inodes list to find inodes that need reclaiming. We already have such a list - the m_del_inodes list. Replace xfs_iflush_all() with a call to xfs_finish_reclaim_all() and clean up xfs_finish_reclaim_all() to handle the different flush modes now needed. Originally based on a patch from Christoph Hellwig. Version 3 o rediff against new linux-2.6/xfs_sync.c code Version 2 o revert xfs_syncsub() inode reclaim behaviour back to original code o xfs_quiesce_fs() should use XFS_IFLUSH_DELWRI_ELSE_ASYNC, not XFS_IFLUSH_ASYNC, to prevent change of behaviour. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32284a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_vnodeops.c | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 8b6812f66a1..a6714579a41 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2918,36 +2918,30 @@ xfs_finish_reclaim( } int -xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) +xfs_finish_reclaim_all( + xfs_mount_t *mp, + int noblock, + int mode) { - int purged; xfs_inode_t *ip, *n; - int done = 0; - while (!done) { - purged = 0; - XFS_MOUNT_ILOCK(mp); - list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { - if (noblock) { - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) - continue; - if (xfs_ipincount(ip) || - !xfs_iflock_nowait(ip)) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - continue; - } +restart: + XFS_MOUNT_ILOCK(mp); + list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { + if (noblock) { + if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) + continue; + if (xfs_ipincount(ip) || + !xfs_iflock_nowait(ip)) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + continue; } - XFS_MOUNT_IUNLOCK(mp); - if (xfs_finish_reclaim(ip, noblock, - XFS_IFLUSH_DELWRI_ELSE_ASYNC)) - delay(1); - purged = 1; - break; } - - done = !purged; + XFS_MOUNT_IUNLOCK(mp); + if (xfs_finish_reclaim(ip, noblock, mode)) + delay(1); + goto restart; } - XFS_MOUNT_IUNLOCK(mp); return 0; } -- cgit v1.2.3-70-g09d2 From bf904248a2adb3f3be4eb4fb1837ce3bb28cca76 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:36:14 +1100 Subject: [XFS] Combine the XFS and Linux inodes To avoid issues with different lifecycles of XFS and Linux inodes, embedd the linux inode inside the XFS inode. This means that the linux inode has the same lifecycle as the XFS inode, even when it has been released by the OS. XFS inodes don't live much longer than this (a short stint in reclaim at most), so there isn't significant memory usage penalties here. Version 3 o kill xfs_icount() Version 2 o remove unused commented out code from xfs_iget(). o kill useless cast in VFS_I() SGI-PV: 988141 SGI-Modid: xfs-linux-melb:xfs-kern:32323a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_iops.c | 17 +++-- fs/xfs/linux-2.6/xfs_super.c | 47 +++++------- fs/xfs/linux-2.6/xfs_vnode.c | 15 +--- fs/xfs/xfs_iget.c | 167 +++++++++---------------------------------- fs/xfs/xfs_inode.c | 43 ++++++++--- fs/xfs/xfs_inode.h | 9 ++- fs/xfs/xfs_vnodeops.c | 13 +--- 7 files changed, 108 insertions(+), 203 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 3bfb3c0f8e2..37bb1012aff 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -64,14 +64,14 @@ xfs_synchronize_atime( { struct inode *inode = VFS_I(ip); - if (inode) { + if (!(inode->i_state & I_CLEAR)) { ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; } } /* - * If the linux inode exists, mark it dirty. + * If the linux inode is valid, mark it dirty. * Used when commiting a dirty inode into a transaction so that * the inode will get written back by the linux code */ @@ -81,7 +81,7 @@ xfs_mark_inode_dirty_sync( { struct inode *inode = VFS_I(ip); - if (inode) + if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) mark_inode_dirty_sync(inode); } @@ -766,12 +766,21 @@ xfs_diflags_to_iflags( * When reading existing inodes from disk this is called directly * from xfs_iget, when creating a new inode it is called from * xfs_ialloc after setting up the inode. + * + * We are always called with an uninitialised linux inode here. + * We need to initialise the necessary fields and take a reference + * on it. */ void xfs_setup_inode( struct xfs_inode *ip) { - struct inode *inode = ip->i_vnode; + struct inode *inode = &ip->i_vnode; + + inode->i_ino = ip->i_ino; + inode->i_state = I_NEW|I_LOCK; + inode_add_to_lists(ip->i_mount->m_super, inode); + ASSERT(atomic_read(&inode->i_count) == 1); inode->i_mode = ip->i_d.di_mode; inode->i_nlink = ip->i_d.di_nlink; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index b87e45577a5..c6ef684bf2e 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -72,7 +72,6 @@ static struct quotactl_ops xfs_quotactl_operations; static struct super_operations xfs_super_operations; -static kmem_zone_t *xfs_vnode_zone; static kmem_zone_t *xfs_ioend_zone; mempool_t *xfs_ioend_pool; @@ -867,29 +866,24 @@ xfsaild_stop( } - +/* Catch misguided souls that try to use this interface on XFS */ STATIC struct inode * xfs_fs_alloc_inode( struct super_block *sb) { - return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); + BUG(); } +/* + * we need to provide an empty inode free function to prevent + * the generic code from trying to free our combined inode. + */ STATIC void xfs_fs_destroy_inode( - struct inode *inode) -{ - kmem_zone_free(xfs_vnode_zone, inode); -} - -STATIC void -xfs_fs_inode_init_once( - void *vnode) + struct inode *inode) { - inode_init_once((struct inode *)vnode); } - /* * Slab object creation initialisation for the XFS inode. * This covers only the idempotent fields in the XFS inode; @@ -898,13 +892,18 @@ xfs_fs_inode_init_once( * fields in the xfs inode that left in the initialise state * when freeing the inode. */ -void -xfs_inode_init_once( +STATIC void +xfs_fs_inode_init_once( void *inode) { struct xfs_inode *ip = inode; memset(ip, 0, sizeof(struct xfs_inode)); + + /* vfs inode */ + inode_init_once(VFS_I(ip)); + + /* xfs inode */ atomic_set(&ip->i_iocount, 0); atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); @@ -975,8 +974,6 @@ xfs_fs_clear_inode( if (xfs_reclaim(ip)) panic("%s: cannot reclaim 0x%p\n", __func__, inode); } - - ASSERT(XFS_I(inode) == NULL); } STATIC void @@ -1829,16 +1826,10 @@ xfs_free_trace_bufs(void) STATIC int __init xfs_init_zones(void) { - xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | - KM_ZONE_SPREAD, - xfs_fs_inode_init_once); - if (!xfs_vnode_zone) - goto out; xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); if (!xfs_ioend_zone) - goto out_destroy_vnode_zone; + goto out; xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, xfs_ioend_zone); @@ -1854,6 +1845,7 @@ xfs_init_zones(void) "xfs_bmap_free_item"); if (!xfs_bmap_free_item_zone) goto out_destroy_log_ticket_zone; + xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), "xfs_btree_cur"); if (!xfs_btree_cur_zone) @@ -1901,8 +1893,8 @@ xfs_init_zones(void) xfs_inode_zone = kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | - KM_ZONE_SPREAD, xfs_inode_init_once); + KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD, + xfs_fs_inode_init_once); if (!xfs_inode_zone) goto out_destroy_efi_zone; @@ -1950,8 +1942,6 @@ xfs_init_zones(void) mempool_destroy(xfs_ioend_pool); out_destroy_ioend_zone: kmem_zone_destroy(xfs_ioend_zone); - out_destroy_vnode_zone: - kmem_zone_destroy(xfs_vnode_zone); out: return -ENOMEM; } @@ -1976,7 +1966,6 @@ xfs_destroy_zones(void) kmem_zone_destroy(xfs_log_ticket_zone); mempool_destroy(xfs_ioend_pool); kmem_zone_destroy(xfs_ioend_zone); - kmem_zone_destroy(xfs_vnode_zone); } diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index ac827d23149..ad18262d651 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -84,25 +84,12 @@ vn_ioerror( #ifdef XFS_INODE_TRACE -/* - * Reference count of Linux inode if present, -1 if the xfs_inode - * has no associated Linux inode. - */ -static inline int xfs_icount(struct xfs_inode *ip) -{ - struct inode *inode = VFS_I(ip); - - if (inode) - return atomic_read(&inode->i_count); - return -1; -} - #define KTRACE_ENTER(ip, vk, s, line, ra) \ ktrace_enter( (ip)->i_trace, \ /* 0 */ (void *)(__psint_t)(vk), \ /* 1 */ (void *)(s), \ /* 2 */ (void *)(__psint_t) line, \ -/* 3 */ (void *)(__psint_t)xfs_icount(ip), \ +/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \ /* 4 */ (void *)(ra), \ /* 5 */ NULL, \ /* 6 */ (void *)(__psint_t)current_cpu(), \ diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index b2539b17c95..c4414e8bce8 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -44,77 +44,65 @@ */ static int xfs_iget_cache_hit( - struct inode *inode, struct xfs_perag *pag, struct xfs_inode *ip, int flags, int lock_flags) __releases(pag->pag_ici_lock) { struct xfs_mount *mp = ip->i_mount; - struct inode *old_inode; int error = 0; /* * If INEW is set this inode is being set up + * If IRECLAIM is set this inode is being torn down * Pause and try again. */ - if (xfs_iflags_test(ip, XFS_INEW)) { + if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { error = EAGAIN; XFS_STATS_INC(xs_ig_frecycle); goto out_error; } - old_inode = ip->i_vnode; - if (old_inode == NULL) { + /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ + if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { + /* - * If IRECLAIM is set this inode is - * on its way out of the system, - * we need to pause and try again. + * If lookup is racing with unlink, then we should return an + * error immediately so we don't remove it from the reclaim + * list and potentially leak the inode. */ - if (xfs_iflags_test(ip, XFS_IRECLAIM)) { - error = EAGAIN; - XFS_STATS_INC(xs_ig_frecycle); + + if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { + error = ENOENT; goto out_error; } - ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + + xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); /* - * If lookup is racing with unlink, then we - * should return an error immediately so we - * don't remove it from the reclaim list and - * potentially leak the inode. + * We need to re-initialise the VFS inode as it has been + * 'freed' by the VFS. Do this here so we can deal with + * errors cleanly, then tag it so it can be set up correctly + * later. */ - if ((ip->i_d.di_mode == 0) && - !(flags & XFS_IGET_CREATE)) { - error = ENOENT; + if (!inode_init_always(mp->m_super, VFS_I(ip))) { + error = ENOMEM; goto out_error; } - xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); - + xfs_iflags_set(ip, XFS_INEW); xfs_iflags_clear(ip, XFS_IRECLAIMABLE); read_unlock(&pag->pag_ici_lock); XFS_MOUNT_ILOCK(mp); list_del_init(&ip->i_reclaim); XFS_MOUNT_IUNLOCK(mp); - - } else if (inode != old_inode) { - /* The inode is being torn down, pause and - * try again. - */ - if (old_inode->i_state & (I_FREEING | I_CLEAR)) { - error = EAGAIN; - XFS_STATS_INC(xs_ig_frecycle); - goto out_error; - } -/* Chances are the other vnode (the one in the inode) is being torn -* down right now, and we landed on top of it. Question is, what do -* we do? Unhook the old inode and hook up the new one? -*/ - cmn_err(CE_PANIC, - "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", - old_inode, inode); + } else if (!igrab(VFS_I(ip))) { + /* If the VFS inode is being torn down, pause and try again. */ + error = EAGAIN; + XFS_STATS_INC(xs_ig_frecycle); + goto out_error; } else { + /* we've got a live one */ read_unlock(&pag->pag_ici_lock); } @@ -215,11 +203,11 @@ out_destroy: /* * Look up an inode by number in the given file system. * The inode is looked up in the cache held in each AG. - * If the inode is found in the cache, attach it to the provided - * vnode. + * If the inode is found in the cache, initialise the vfs inode + * if necessary. * * If it is not in core, read it in from the file system's device, - * add it to the cache and attach the provided vnode. + * add it to the cache and initialise the vfs inode. * * The inode is locked according to the value of the lock_flags parameter. * This flag parameter indicates how and if the inode's IO lock and inode lock @@ -236,9 +224,8 @@ out_destroy: * bno -- the block number starting the buffer containing the inode, * if known (as by bulkstat), else 0. */ -STATIC int -xfs_iget_core( - struct inode *inode, +int +xfs_iget( xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, @@ -269,7 +256,7 @@ again: ip = radix_tree_lookup(&pag->pag_ici_root, agino); if (ip) { - error = xfs_iget_cache_hit(inode, pag, ip, flags, lock_flags); + error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); if (error) goto out_error_or_again; } else { @@ -283,23 +270,16 @@ again: } xfs_put_perag(mp, pag); - ASSERT(ip->i_df.if_ext_max == - XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); - xfs_iflags_set(ip, XFS_IMODIFIED); *ipp = ip; - /* - * Set up the Linux with the Linux inode. - */ - ip->i_vnode = inode; - inode->i_private = ip; - + ASSERT(ip->i_df.if_ext_max == + XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); /* * If we have a real type for an on-disk inode, we can set ops(&unlock) * now. If it's a new inode being created, xfs_ialloc will handle it. */ - if (ip->i_d.di_mode != 0) + if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) xfs_setup_inode(ip); return 0; @@ -313,75 +293,6 @@ out_error_or_again: } -/* - * The 'normal' internal xfs_iget, if needed it will - * 'allocate', or 'get', the vnode. - */ -int -xfs_iget( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_ino_t ino, - uint flags, - uint lock_flags, - xfs_inode_t **ipp, - xfs_daddr_t bno) -{ - struct inode *inode; - xfs_inode_t *ip; - int error; - - XFS_STATS_INC(xs_ig_attempts); - -retry: - inode = iget_locked(mp->m_super, ino); - if (!inode) - /* If we got no inode we are out of memory */ - return ENOMEM; - - if (inode->i_state & I_NEW) { - XFS_STATS_INC(vn_active); - XFS_STATS_INC(vn_alloc); - - error = xfs_iget_core(inode, mp, tp, ino, flags, - lock_flags, ipp, bno); - if (error) { - make_bad_inode(inode); - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - iput(inode); - } - return error; - } - - /* - * If the inode is not fully constructed due to - * filehandle mismatches wait for the inode to go - * away and try again. - * - * iget_locked will call __wait_on_freeing_inode - * to wait for the inode to go away. - */ - if (is_bad_inode(inode)) { - iput(inode); - delay(1); - goto retry; - } - - ip = XFS_I(inode); - if (!ip) { - iput(inode); - delay(1); - goto retry; - } - - if (lock_flags != 0) - xfs_ilock(ip, lock_flags); - XFS_STATS_INC(xs_ig_found); - *ipp = ip; - return 0; -} - /* * Look for the inode corresponding to the given ino in the hash table. * If it is there and its i_transp pointer matches tp, return it. @@ -481,14 +392,6 @@ xfs_ireclaim(xfs_inode_t *ip) */ XFS_QM_DQDETACH(ip->i_mount, ip); - /* - * Pull our behavior descriptor from the vnode chain. - */ - if (ip->i_vnode) { - ip->i_vnode->i_private = NULL; - ip->i_vnode = NULL; - } - /* * Free all memory associated with the inode. */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bc33762abc4..99d9118c4a4 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -813,6 +813,16 @@ xfs_inode_alloc( ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(list_empty(&ip->i_reclaim)); + /* + * initialise the VFS inode here to get failures + * out of the way early. + */ + if (!inode_init_always(mp->m_super, VFS_I(ip))) { + kmem_zone_free(xfs_inode_zone, ip); + return NULL; + } + + /* initialise the xfs inode */ ip->i_ino = ino; ip->i_mount = mp; ip->i_blkno = 0; @@ -1086,6 +1096,7 @@ xfs_ialloc( uint flags; int error; timespec_t tv; + int filestreams = 0; /* * Call the space management code to pick @@ -1093,9 +1104,8 @@ xfs_ialloc( */ error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, ialloc_context, call_again, &ino); - if (error != 0) { + if (error) return error; - } if (*call_again || ino == NULLFSINO) { *ipp = NULL; return 0; @@ -1109,9 +1119,8 @@ xfs_ialloc( */ error = xfs_trans_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); - if (error != 0) { + if (error) return error; - } ASSERT(ip != NULL); ip->i_d.di_mode = (__uint16_t)mode; @@ -1192,13 +1201,12 @@ xfs_ialloc( flags |= XFS_ILOG_DEV; break; case S_IFREG: - if (pip && xfs_inode_is_filestream(pip)) { - error = xfs_filestream_associate(pip, ip); - if (error < 0) - return -error; - if (!error) - xfs_iflags_set(ip, XFS_IFILESTREAM); - } + /* + * we can't set up filestreams until after the VFS inode + * is set up properly. + */ + if (pip && xfs_inode_is_filestream(pip)) + filestreams = 1; /* fall through */ case S_IFDIR: if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { @@ -1264,6 +1272,15 @@ xfs_ialloc( /* now that we have an i_mode we can setup inode ops and unlock */ xfs_setup_inode(ip); + /* now we have set up the vfs inode we can associate the filestream */ + if (filestreams) { + error = xfs_filestream_associate(pip, ip); + if (error < 0) + return -error; + if (!error) + xfs_iflags_set(ip, XFS_IFILESTREAM); + } + *ipp = ip; return 0; } @@ -2650,6 +2667,10 @@ xfs_idestroy_fork( * It must free the inode itself and any buffers allocated for * if_extents/if_data and if_broot. It must also free the lock * associated with the inode. + * + * Note: because we don't initialise everything on reallocation out + * of the zone, we must ensure we nullify everything correctly before + * freeing the structure. */ void xfs_idestroy( diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 6fd20fc179a..345b43a90eb 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -236,7 +236,6 @@ typedef struct xfs_inode { /* Inode linking and identification information. */ struct xfs_mount *i_mount; /* fs mount struct ptr */ struct list_head i_reclaim; /* reclaim list */ - struct inode *i_vnode; /* vnode backpointer */ struct xfs_dquot *i_udquot; /* user dquot */ struct xfs_dquot *i_gdquot; /* group dquot */ @@ -271,6 +270,10 @@ typedef struct xfs_inode { xfs_fsize_t i_size; /* in-memory size */ xfs_fsize_t i_new_size; /* size when write completes */ atomic_t i_iocount; /* outstanding I/O count */ + + /* VFS inode */ + struct inode i_vnode; /* embedded VFS inode */ + /* Trace buffers per inode. */ #ifdef XFS_INODE_TRACE struct ktrace *i_trace; /* general inode trace */ @@ -298,13 +301,13 @@ typedef struct xfs_inode { /* Convert from vfs inode to xfs inode */ static inline struct xfs_inode *XFS_I(struct inode *inode) { - return (struct xfs_inode *)inode->i_private; + return container_of(inode, struct xfs_inode, i_vnode); } /* convert from xfs inode to vfs inode */ static inline struct inode *VFS_I(struct xfs_inode *ip) { - return (struct inode *)ip->i_vnode; + return &ip->i_vnode; } /* diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index a6714579a41..7fb577c9f9d 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2833,6 +2833,7 @@ xfs_reclaim( if (!ip->i_update_core && (ip->i_itemp == NULL)) { xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_iflock(ip); + xfs_iflags_set(ip, XFS_IRECLAIMABLE); return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); } else { xfs_mount_t *mp = ip->i_mount; @@ -2841,8 +2842,6 @@ xfs_reclaim( XFS_MOUNT_ILOCK(mp); spin_lock(&ip->i_flags_lock); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); - VFS_I(ip)->i_private = NULL; - ip->i_vnode = NULL; spin_unlock(&ip->i_flags_lock); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); XFS_MOUNT_IUNLOCK(mp); @@ -2857,10 +2856,6 @@ xfs_finish_reclaim( int sync_mode) { xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); - struct inode *vp = VFS_I(ip); - - if (vp && VN_BAD(vp)) - goto reclaim; /* The hash lock here protects a thread in xfs_iget_core from * racing with us on linking the inode back with a vnode. @@ -2870,7 +2865,7 @@ xfs_finish_reclaim( write_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); if (__xfs_iflags_test(ip, XFS_IRECLAIM) || - (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { + !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { spin_unlock(&ip->i_flags_lock); write_unlock(&pag->pag_ici_lock); if (locked) { @@ -2904,15 +2899,13 @@ xfs_finish_reclaim( * In the case of a forced shutdown we rely on xfs_iflush() to * wait for the inode to be unpinned before returning an error. */ - if (xfs_iflush(ip, sync_mode) == 0) { + if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { /* synchronize with xfs_iflush_done */ xfs_iflock(ip); xfs_ifunlock(ip); } xfs_iunlock(ip, XFS_ILOCK_EXCL); - - reclaim: xfs_ireclaim(ip); return 0; } -- cgit v1.2.3-70-g09d2 From fce08f2f3bd0d08feeb4cea70e44aa3471d9bb4c Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:03 +1100 Subject: [XFS] move inode reclaim functions to xfs_sync.c Background inode reclaim is run by the xfssyncd. Move the reclaim worker functions to be close to the sync code as the are very similar in structure and are both run from the same background thread. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32329a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 91 +++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_sync.h | 3 ++ fs/xfs/xfs_inode.h | 2 - fs/xfs/xfs_vnodeops.c | 90 -------------------------------------------- 4 files changed, 94 insertions(+), 92 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index b2b708254ae..79038ea55b0 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -583,3 +583,94 @@ xfs_syncd_stop( kthread_stop(mp->m_sync_task); } +int +xfs_finish_reclaim( + xfs_inode_t *ip, + int locked, + int sync_mode) +{ + xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); + + /* The hash lock here protects a thread in xfs_iget_core from + * racing with us on linking the inode back with a vnode. + * Once we have the XFS_IRECLAIM flag set it will not touch + * us. + */ + write_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + if (__xfs_iflags_test(ip, XFS_IRECLAIM) || + !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); + if (locked) { + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } + return 1; + } + __xfs_iflags_set(ip, XFS_IRECLAIM); + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); + xfs_put_perag(ip->i_mount, pag); + + /* + * If the inode is still dirty, then flush it out. If the inode + * is not in the AIL, then it will be OK to flush it delwri as + * long as xfs_iflush() does not keep any references to the inode. + * We leave that decision up to xfs_iflush() since it has the + * knowledge of whether it's OK to simply do a delwri flush of + * the inode or whether we need to wait until the inode is + * pulled from the AIL. + * We get the flush lock regardless, though, just to make sure + * we don't free it while it is being flushed. + */ + if (!locked) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_iflock(ip); + } + + /* + * In the case of a forced shutdown we rely on xfs_iflush() to + * wait for the inode to be unpinned before returning an error. + */ + if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { + /* synchronize with xfs_iflush_done */ + xfs_iflock(ip); + xfs_ifunlock(ip); + } + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_ireclaim(ip); + return 0; +} + +int +xfs_finish_reclaim_all( + xfs_mount_t *mp, + int noblock, + int mode) +{ + xfs_inode_t *ip, *n; + +restart: + XFS_MOUNT_ILOCK(mp); + list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { + if (noblock) { + if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) + continue; + if (xfs_ipincount(ip) || + !xfs_iflock_nowait(ip)) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + continue; + } + } + XFS_MOUNT_IUNLOCK(mp); + if (xfs_finish_reclaim(ip, noblock, mode)) + delay(1); + goto restart; + } + XFS_MOUNT_IUNLOCK(mp); + return 0; +} + + diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 3b49aa3bb5f..23117a17fde 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -45,4 +45,7 @@ void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_device(struct xfs_inode *ip); +int xfs_finish_reclaim(struct xfs_inode *ip, int locked, int sync_mode); +int xfs_finish_reclaim_all(struct xfs_mount *mp, int noblock, int mode); + #endif diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 345b43a90eb..64e50ff9ad2 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -496,8 +496,6 @@ int xfs_isilocked(xfs_inode_t *, uint); uint xfs_ilock_map_shared(xfs_inode_t *); void xfs_iunlock_map_shared(xfs_inode_t *, uint); void xfs_ireclaim(xfs_inode_t *); -int xfs_finish_reclaim(xfs_inode_t *, int, int); -int xfs_finish_reclaim_all(struct xfs_mount *, int, int); /* * xfs_inode.c prototypes. diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 7fb577c9f9d..cdcc835bc5a 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2849,96 +2849,6 @@ xfs_reclaim( return 0; } -int -xfs_finish_reclaim( - xfs_inode_t *ip, - int locked, - int sync_mode) -{ - xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); - - /* The hash lock here protects a thread in xfs_iget_core from - * racing with us on linking the inode back with a vnode. - * Once we have the XFS_IRECLAIM flag set it will not touch - * us. - */ - write_lock(&pag->pag_ici_lock); - spin_lock(&ip->i_flags_lock); - if (__xfs_iflags_test(ip, XFS_IRECLAIM) || - !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - if (locked) { - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } - return 1; - } - __xfs_iflags_set(ip, XFS_IRECLAIM); - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - xfs_put_perag(ip->i_mount, pag); - - /* - * If the inode is still dirty, then flush it out. If the inode - * is not in the AIL, then it will be OK to flush it delwri as - * long as xfs_iflush() does not keep any references to the inode. - * We leave that decision up to xfs_iflush() since it has the - * knowledge of whether it's OK to simply do a delwri flush of - * the inode or whether we need to wait until the inode is - * pulled from the AIL. - * We get the flush lock regardless, though, just to make sure - * we don't free it while it is being flushed. - */ - if (!locked) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_iflock(ip); - } - - /* - * In the case of a forced shutdown we rely on xfs_iflush() to - * wait for the inode to be unpinned before returning an error. - */ - if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { - /* synchronize with xfs_iflush_done */ - xfs_iflock(ip); - xfs_ifunlock(ip); - } - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_ireclaim(ip); - return 0; -} - -int -xfs_finish_reclaim_all( - xfs_mount_t *mp, - int noblock, - int mode) -{ - xfs_inode_t *ip, *n; - -restart: - XFS_MOUNT_ILOCK(mp); - list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { - if (noblock) { - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) - continue; - if (xfs_ipincount(ip) || - !xfs_iflock_nowait(ip)) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - continue; - } - } - XFS_MOUNT_IUNLOCK(mp); - if (xfs_finish_reclaim(ip, noblock, mode)) - delay(1); - goto restart; - } - XFS_MOUNT_IUNLOCK(mp); - return 0; -} - /* * xfs_alloc_file_space() * This routine allocates disk space for the given file. -- cgit v1.2.3-70-g09d2 From 1dc3318ae1c1cc11f9fb8279a806de448e2b90e8 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:15 +1100 Subject: [XFS] rename inode reclaim functions The function names xfs_finish_reclaim and xfs_finish_reclaim_all are not very descriptive of what they are reclaiming. Rename to xfs_reclaim_inode[s] to match the xfs_sync_inodes() function. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32330a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 10 +++++----- fs/xfs/linux-2.6/xfs_sync.h | 4 ++-- fs/xfs/xfs_mount.c | 2 +- fs/xfs/xfs_vnodeops.c | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 79038ea55b0..34413ceaea9 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -364,7 +364,7 @@ xfs_quiesce_fs( int count = 0, pincount; xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* * This loop must run at least twice. The first instance of the loop @@ -505,7 +505,7 @@ xfs_sync_worker( if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* dgc: errors ignored here */ error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); error = xfs_sync_fsdata(mp, SYNC_BDFLUSH); @@ -584,7 +584,7 @@ xfs_syncd_stop( } int -xfs_finish_reclaim( +xfs_reclaim_inode( xfs_inode_t *ip, int locked, int sync_mode) @@ -645,7 +645,7 @@ xfs_finish_reclaim( } int -xfs_finish_reclaim_all( +xfs_reclaim_inodes( xfs_mount_t *mp, int noblock, int mode) @@ -665,7 +665,7 @@ restart: } } XFS_MOUNT_IUNLOCK(mp); - if (xfs_finish_reclaim(ip, noblock, mode)) + if (xfs_reclaim_inode(ip, noblock, mode)) delay(1); goto restart; } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 23117a17fde..c1bcd500509 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -45,7 +45,7 @@ void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_device(struct xfs_inode *ip); -int xfs_finish_reclaim(struct xfs_inode *ip, int locked, int sync_mode); -int xfs_finish_reclaim_all(struct xfs_mount *mp, int noblock, int mode); +int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); +int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); #endif diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 43e5917465a..3704baefe2e 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1235,7 +1235,7 @@ xfs_unmountfs( * need to force the log first. */ xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); - xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_ASYNC); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC); XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index cdcc835bc5a..07945634923 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2834,7 +2834,7 @@ xfs_reclaim( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_iflock(ip); xfs_iflags_set(ip, XFS_IRECLAIMABLE); - return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); + return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); } else { xfs_mount_t *mp = ip->i_mount; -- cgit v1.2.3-70-g09d2 From 396beb85311689e38634926058d9a3bb0576ca8a Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:26 +1100 Subject: [XFS] mark inodes for reclaim via a tag in the inode radix tree Prepare for removing the deleted inode list by marking inodes for reclaim in the inode radix trees so that we can use the radix trees to find reclaimable inodes. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32331a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 41 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_sync.h | 4 ++++ fs/xfs/xfs_ag.h | 5 +++++ fs/xfs/xfs_iget.c | 3 +++ fs/xfs/xfs_vnodeops.c | 1 + 5 files changed, 54 insertions(+) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 34413ceaea9..9e7f4dccab7 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -644,6 +644,47 @@ xfs_reclaim_inode( return 0; } +void +xfs_inode_set_reclaim_tag( + xfs_inode_t *ip) +{ + xfs_mount_t *mp = ip->i_mount; + xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + + read_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + radix_tree_tag_set(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + spin_unlock(&ip->i_flags_lock); + read_unlock(&pag->pag_ici_lock); + xfs_put_perag(mp, pag); +} + +void +__xfs_inode_clear_reclaim_tag( + xfs_mount_t *mp, + xfs_perag_t *pag, + xfs_inode_t *ip) +{ + radix_tree_tag_clear(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); +} + +void +xfs_inode_clear_reclaim_tag( + xfs_inode_t *ip) +{ + xfs_mount_t *mp = ip->i_mount; + xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + + read_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + __xfs_inode_clear_reclaim_tag(mp, pag, ip); + spin_unlock(&ip->i_flags_lock); + read_unlock(&pag->pag_ici_lock); + xfs_put_perag(mp, pag); +} + int xfs_reclaim_inodes( xfs_mount_t *mp, diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index c1bcd500509..5f6de1efe1f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -48,4 +48,8 @@ void xfs_flush_device(struct xfs_inode *ip); int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); +void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); +void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); +void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, + struct xfs_inode *ip); #endif diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 729ee3eb39a..2bfd8632914 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -204,6 +204,11 @@ typedef struct xfs_perag #endif } xfs_perag_t; +/* + * tags for inode radix tree + */ +#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ + #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) #define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp))) diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index c4414e8bce8..a0387f14c20 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -91,6 +91,9 @@ xfs_iget_cache_hit( } xfs_iflags_set(ip, XFS_INEW); xfs_iflags_clear(ip, XFS_IRECLAIMABLE); + + /* clear the radix tree reclaim flag as well. */ + __xfs_inode_clear_reclaim_tag(mp, pag, ip); read_unlock(&pag->pag_ici_lock); XFS_MOUNT_ILOCK(mp); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 07945634923..f89a73eb016 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2845,6 +2845,7 @@ xfs_reclaim( spin_unlock(&ip->i_flags_lock); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); XFS_MOUNT_IUNLOCK(mp); + xfs_inode_set_reclaim_tag(ip); } return 0; } -- cgit v1.2.3-70-g09d2 From 116545130cbc5214523c2f994a11c81ef9eb9186 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:49 +1100 Subject: [XFS] kill deleted inodes list Now that the deleted inodes list is unused, kill it. This also removes the i_reclaim list head from the xfs_inode, shrinking it by two pointers. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32334a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_super.c | 2 -- fs/xfs/linux-2.6/xfs_sync.c | 6 ++++++ fs/xfs/xfs_iget.c | 8 -------- fs/xfs/xfs_inode.c | 4 ++-- fs/xfs/xfs_inode.h | 1 - fs/xfs/xfs_mount.c | 1 - fs/xfs/xfs_mount.h | 5 +---- fs/xfs/xfs_vnodeops.c | 12 +----------- 8 files changed, 10 insertions(+), 29 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 1d67d7f92a4..206a949e387 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -913,7 +913,6 @@ xfs_fs_inode_init_once( atomic_set(&ip->i_iocount, 0); atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); - INIT_LIST_HEAD(&ip->i_reclaim); init_waitqueue_head(&ip->i_ipin_wait); /* * Because we want to use a counting completion, complete @@ -1546,7 +1545,6 @@ xfs_fs_fill_super( goto out_free_args; spin_lock_init(&mp->m_sb_lock); - mutex_init(&mp->m_ilock); mutex_init(&mp->m_growlock); atomic_set(&mp->m_active_trans, 0); INIT_LIST_HEAD(&mp->m_sync_list); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index bbb40e27840..22006b5733c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -644,6 +644,11 @@ xfs_reclaim_inode( return 0; } +/* + * We set the inode flag atomically with the radix tree tag. + * Once we get tag lookups on the radix tree, this inode flag + * can go away. + */ void xfs_inode_set_reclaim_tag( xfs_inode_t *ip) @@ -655,6 +660,7 @@ xfs_inode_set_reclaim_tag( spin_lock(&ip->i_flags_lock); radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); read_unlock(&pag->pag_ici_lock); xfs_put_perag(mp, pag); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index a0387f14c20..800133805ca 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -95,10 +95,6 @@ xfs_iget_cache_hit( /* clear the radix tree reclaim flag as well. */ __xfs_inode_clear_reclaim_tag(mp, pag, ip); read_unlock(&pag->pag_ici_lock); - - XFS_MOUNT_ILOCK(mp); - list_del_init(&ip->i_reclaim); - XFS_MOUNT_IUNLOCK(mp); } else if (!igrab(VFS_I(ip))) { /* If the VFS inode is being torn down, pause and try again. */ error = EAGAIN; @@ -419,11 +415,7 @@ xfs_iextract( write_unlock(&pag->pag_ici_lock); xfs_put_perag(mp, pag); - /* Deal with the deleted inodes list */ - XFS_MOUNT_ILOCK(mp); - list_del_init(&ip->i_reclaim); mp->m_ireclaims++; - XFS_MOUNT_IUNLOCK(mp); } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 99d9118c4a4..4eb629f0513 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -811,7 +811,7 @@ xfs_inode_alloc( ASSERT(atomic_read(&ip->i_iocount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(list_empty(&ip->i_reclaim)); + ASSERT(completion_done(&ip->i_flush)); /* * initialise the VFS inode here to get failures @@ -2729,7 +2729,7 @@ xfs_idestroy( ASSERT(atomic_read(&ip->i_iocount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(list_empty(&ip->i_reclaim)); + ASSERT(completion_done(&ip->i_flush)); kmem_zone_free(xfs_inode_zone, ip); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 64e50ff9ad2..a5aeb9cfeae 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -235,7 +235,6 @@ typedef struct dm_attrs_s { typedef struct xfs_inode { /* Inode linking and identification information. */ struct xfs_mount *i_mount; /* fs mount struct ptr */ - struct list_head i_reclaim; /* reclaim list */ struct xfs_dquot *i_udquot; /* user dquot */ struct xfs_dquot *i_gdquot; /* group dquot */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 3704baefe2e..177976dfea0 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -580,7 +580,6 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) mp->m_blockmask = sbp->sb_blocksize - 1; mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; mp->m_blockwmask = mp->m_blockwsize - 1; - INIT_LIST_HEAD(&mp->m_del_inodes); /* * Setup for attributes, in case they get created. diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 4e62802b6ab..67cf0b2bb84 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -248,8 +248,6 @@ typedef struct xfs_mount { xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ spinlock_t m_agirotor_lock;/* .. and lock protecting it */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ - struct list_head m_del_inodes; /* inodes to reclaim */ - mutex_t m_ilock; /* inode list mutex */ uint m_ireclaims; /* count of calls to reclaim*/ uint m_readio_log; /* min read size log bytes */ uint m_readio_blocks; /* min read size blocks */ @@ -312,8 +310,7 @@ typedef struct xfs_mount { int m_attr_magicpct;/* 37% of the blocksize */ int m_dir_magicpct; /* 37% of the dir blocksize */ __uint8_t m_mk_sharedro; /* mark shared ro on unmount */ - __uint8_t m_inode_quiesce;/* call quiesce on new inodes. - field governed by m_ilock */ + __uint8_t m_inode_quiesce;/* call quiesce on new inodes. */ __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ int m_dirblksize; /* directory block sz--bytes */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index f89a73eb016..1d15a320b9a 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2835,18 +2835,8 @@ xfs_reclaim( xfs_iflock(ip); xfs_iflags_set(ip, XFS_IRECLAIMABLE); return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); - } else { - xfs_mount_t *mp = ip->i_mount; - - /* Protect sync and unpin from us */ - XFS_MOUNT_ILOCK(mp); - spin_lock(&ip->i_flags_lock); - __xfs_iflags_set(ip, XFS_IRECLAIMABLE); - spin_unlock(&ip->i_flags_lock); - list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); - XFS_MOUNT_IUNLOCK(mp); - xfs_inode_set_reclaim_tag(ip); } + xfs_inode_set_reclaim_tag(ip); return 0; } -- cgit v1.2.3-70-g09d2 From 2b7035fd7473c799ca3372092d72c768c7db329d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:55:18 +1100 Subject: [XFS] Trivial xfs_remove comment fixup The dp to ip comment should be for the unconditional xfs_droplink call, and the "." link obviously only exists for directories, so it should be in the is_dir conditional. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32374a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_vnodeops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1d15a320b9a..1c890113ab3 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2009,7 +2009,7 @@ xfs_remove( goto out_bmap_cancel; /* - * Drop the link from dp to ip. + * Drop the "." link from ip to self. */ error = xfs_droplink(tp, ip); if (error) @@ -2024,7 +2024,7 @@ xfs_remove( } /* - * Drop the "." link from ip to self. + * Drop the link from dp to ip. */ error = xfs_droplink(tp, ip); if (error) -- cgit v1.2.3-70-g09d2 From ea5a3dc8356bf1cf27bab9a5a0da5dfbbb82013d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 18:27:48 +1100 Subject: [XFS] kill sys_cred capable_cred has been unused for a while so we can kill it and sys_cred. That also means the cred argument to xfs_setattr and xfs_change_file_space can be removed now. SGI-PV: 988918 SGI-Modid: xfs-linux-melb:xfs-kern:32412a Signed-off-by: Christoph Hellwig Signed-off-by: Tim Shimmin Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_cred.h | 8 -------- fs/xfs/linux-2.6/xfs_globals.c | 7 ------- fs/xfs/linux-2.6/xfs_globals.h | 1 - fs/xfs/linux-2.6/xfs_ioctl.c | 3 +-- fs/xfs/linux-2.6/xfs_iops.c | 6 +++--- fs/xfs/xfs_acl.c | 2 +- fs/xfs/xfs_vnodeops.c | 6 ++---- fs/xfs/xfs_vnodeops.h | 6 ++---- 8 files changed, 9 insertions(+), 30 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h index 652721ce0ea..98da2199bc2 100644 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ b/fs/xfs/linux-2.6/xfs_cred.h @@ -27,12 +27,4 @@ typedef struct cred { /* EMPTY */ } cred_t; -extern struct cred *sys_cred; - -/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ -static inline int capable_cred(cred_t *cr, int cid) -{ - return (cr == sys_cred) ? 1 : capable(cid); -} - #endif /* __XFS_CRED_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index ef90e64641e..46e862b004e 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -43,10 +43,3 @@ xfs_param_t xfs_params = { .inherit_nodfrg = { 0, 1, 1 }, .fstrm_timer = { 1, 30*100, 3600*100}, }; - -/* - * Global system credential structure. - */ -static cred_t sys_cred_val; -cred_t *sys_cred = &sys_cred_val; - diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h index 2770b0085ee..69f71caf061 100644 --- a/fs/xfs/linux-2.6/xfs_globals.h +++ b/fs/xfs/linux-2.6/xfs_globals.h @@ -19,6 +19,5 @@ #define __XFS_GLOBALS_H__ extern uint64_t xfs_panic_mask; /* set to cause more panics */ -extern struct cred *sys_cred; #endif /* __XFS_GLOBALS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index d3438c72dca..b5ea3f2afdc 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -691,8 +691,7 @@ xfs_ioc_space( if (ioflags & IO_INVIS) attr_flags |= XFS_ATTR_DMI; - error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, - NULL, attr_flags); + error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, attr_flags); return -error; } diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 37bb1012aff..f78bc221576 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -601,7 +601,7 @@ xfs_vn_setattr( struct dentry *dentry, struct iattr *iattr) { - return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL); + return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); } /* @@ -642,7 +642,7 @@ xfs_vn_fallocate( xfs_ilock(ip, XFS_IOLOCK_EXCL); error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, - 0, NULL, XFS_ATTR_NOLOCK); + 0, XFS_ATTR_NOLOCK); if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) new_size = offset + len; @@ -653,7 +653,7 @@ xfs_vn_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL); + error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); } xfs_iunlock(ip, XFS_IOLOCK_EXCL); diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index b2f639a1416..8b3d1bdeb44 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -758,7 +758,7 @@ xfs_acl_setmode( if (gap && nomask) iattr.ia_mode |= gap->ae_perm << 3; - return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred); + return xfs_setattr(XFS_I(vp), &iattr, 0); } /* diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1c890113ab3..34a1982ed6d 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -79,8 +79,7 @@ int xfs_setattr( struct xfs_inode *ip, struct iattr *iattr, - int flags, - cred_t *credp) + int flags) { xfs_mount_t *mp = ip->i_mount; struct inode *inode = VFS_I(ip); @@ -3362,7 +3361,6 @@ xfs_change_file_space( int cmd, xfs_flock64_t *bf, xfs_off_t offset, - cred_t *credp, int attr_flags) { xfs_mount_t *mp = ip->i_mount; @@ -3450,7 +3448,7 @@ xfs_change_file_space( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = startoffset; - error = xfs_setattr(ip, &iattr, attr_flags, credp); + error = xfs_setattr(ip, &iattr, attr_flags); if (error) return error; diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index e932a96bec5..b1ae8e3f404 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -15,8 +15,7 @@ struct xfs_iomap; int xfs_open(struct xfs_inode *ip); -int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags, - struct cred *credp); +int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ @@ -44,8 +43,7 @@ int xfs_inode_flush(struct xfs_inode *ip, int flags); int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); int xfs_reclaim(struct xfs_inode *ip); int xfs_change_file_space(struct xfs_inode *ip, int cmd, - xfs_flock64_t *bf, xfs_off_t offset, - struct cred *credp, int attr_flags); + xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, struct xfs_inode *target_ip); -- cgit v1.2.3-70-g09d2 From e0b8e8b65d578f5d5538465dff8392cf02e1cc5d Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Thu, 30 Oct 2008 18:30:48 +1100 Subject: [XFS] remove restricted chown parameter from xfs linux On Linux all filesystems are supposed to be operating under Posix' restricted chown. Restricted chown means it restricts chown to the owner unless you have CAP_FOWNER. NOTE: that 2 files outside of fs/xfs have been modified too for this change. Reviewed-by: Dave Chinner SGI-PV: 988919 SGI-Modid: xfs-linux-melb:xfs-kern:32413a Signed-off-by: Tim Shimmin Signed-off-by: Christoph Hellwig Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_globals.c | 1 - fs/xfs/linux-2.6/xfs_ioctl.c | 4 ---- fs/xfs/linux-2.6/xfs_linux.h | 1 - fs/xfs/linux-2.6/xfs_sysctl.c | 11 ----------- fs/xfs/linux-2.6/xfs_sysctl.h | 3 +-- fs/xfs/xfs_vnodeops.c | 13 ++----------- 6 files changed, 3 insertions(+), 30 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index 46e862b004e..2ae8b1ccb02 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -26,7 +26,6 @@ */ xfs_param_t xfs_params = { /* MIN DFLT MAX */ - .restrict_chown = { 0, 1, 1 }, .sgid_inherit = { 0, 0, 1 }, .symlink_mode = { 0, 0, 1 }, .panic_mask = { 0, 0, 255 }, diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index b5ea3f2afdc..d25694e8cd6 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1103,10 +1103,6 @@ xfs_ioctl_setattr( /* * Change file ownership. Must be the owner or privileged. - * If the system was configured with the "restricted_chown" - * option, the owner is not permitted to give away the file, - * and can change the group id only to a group of which he - * or she is a member. */ if (mask & FSX_PROJID) { /* diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index cc0f7b3a979..214717650b2 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -107,7 +107,6 @@ #undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ #endif -#define restricted_chown xfs_params.restrict_chown.val #define irix_sgid_inherit xfs_params.sgid_inherit.val #define irix_symlink_mode xfs_params.symlink_mode.val #define xfs_panic_mask xfs_params.panic_mask.val diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 7dacb5bbde3..916c0ffb608 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -55,17 +55,6 @@ xfs_stats_clear_proc_handler( #endif /* CONFIG_PROC_FS */ static ctl_table xfs_table[] = { - { - .ctl_name = XFS_RESTRICT_CHOWN, - .procname = "restrict_chown", - .data = &xfs_params.restrict_chown.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xfs_params.restrict_chown.min, - .extra2 = &xfs_params.restrict_chown.max - }, { .ctl_name = XFS_SGID_INHERIT, .procname = "irix_sgid_inherit", diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h index 4aadb8056c3..b9937d450f8 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.h +++ b/fs/xfs/linux-2.6/xfs_sysctl.h @@ -31,7 +31,6 @@ typedef struct xfs_sysctl_val { } xfs_sysctl_val_t; typedef struct xfs_param { - xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/ xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is * not a member of parent dir GID. */ xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ @@ -68,7 +67,7 @@ typedef struct xfs_param { enum { /* XFS_REFCACHE_SIZE = 1 */ /* XFS_REFCACHE_PURGE = 2 */ - XFS_RESTRICT_CHOWN = 3, + /* XFS_RESTRICT_CHOWN = 3 */ XFS_SGID_INHERIT = 4, XFS_SYMLINK_MODE = 5, XFS_PANIC_MASK = 6, diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 34a1982ed6d..c45ea278ef4 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -232,10 +232,6 @@ xfs_setattr( /* * Change file ownership. Must be the owner or privileged. - * If the system was configured with the "restricted_chown" - * option, the owner is not permitted to give away the file, - * and can change the group id only to a group of which he - * or she is a member. */ if (mask & (ATTR_UID|ATTR_GID)) { /* @@ -259,9 +255,8 @@ xfs_setattr( * shall be equal to either the group ID or one of the * supplementary group IDs of the calling process. */ - if (restricted_chown && - (iuid != uid || (igid != gid && - !in_group_p((gid_t)gid))) && + if ((iuid != uid || + (igid != gid && !in_group_p((gid_t)gid))) && !capable(CAP_CHOWN)) { code = XFS_ERROR(EPERM); goto error_return; @@ -455,10 +450,6 @@ xfs_setattr( /* * Change file ownership. Must be the owner or privileged. - * If the system was configured with the "restricted_chown" - * option, the owner is not permitted to give away the file, - * and can change the group id only to a group of which he - * or she is a member. */ if (mask & (ATTR_UID|ATTR_GID)) { /* -- cgit v1.2.3-70-g09d2 From cc09c0dc57de7f7d2ed89d480b5653e5f6a32f2c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 17 Nov 2008 17:37:10 +1100 Subject: [XFS] Fix double free of log tickets When an I/O error occurs during an intermediate commit on a rolling transaction, xfs_trans_commit() will free the transaction structure and the related ticket. However, the duplicate transaction that gets used as the transaction continues still contains a pointer to the ticket. Hence when the duplicate transaction is cancelled and freed, we free the ticket a second time. Add reference counting to the ticket so that we hold an extra reference to the ticket over the transaction commit. We drop the extra reference once we have checked that the transaction commit did not return an error, thus avoiding a double free on commit error. Credit to Nick Piggin for tripping over the problem. SGI-PV: 989741 Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_bmap.c | 10 ++++++++-- fs/xfs/xfs_inode.c | 10 ++++++++-- fs/xfs/xfs_log.c | 39 +++++++++++++++++++++++++-------------- fs/xfs/xfs_log.h | 4 ++++ fs/xfs/xfs_log_priv.h | 1 + fs/xfs/xfs_trans.c | 9 ++++++++- fs/xfs/xfs_utils.c | 6 ++++++ fs/xfs/xfs_vnodeops.c | 6 ++++++ 8 files changed, 66 insertions(+), 19 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index db289050692..c3912213645 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4292,9 +4292,15 @@ xfs_bmap_finish( * We have a new transaction, so we should return committed=1, * even though we're returning an error. */ - if (error) { + if (error) return error; - } + + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(ntp->t_ticket); + if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES, logcount))) return error; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index cd522827f99..b9771004706 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1782,8 +1782,14 @@ xfs_itruncate_finish( xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_trans_ihold(ntp, ip); - if (!error) - error = xfs_trans_reserve(ntp, 0, + if (error) + return error; + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(ntp->t_ticket); + error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 92c20a8d9e6..4bf44aef644 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -100,12 +100,11 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, /* local ticket functions */ -STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, +STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log, int unit_bytes, int count, char clientid, uint flags); -STATIC void xlog_ticket_put(xlog_t *log, xlog_ticket_t *ticket); #if defined(DEBUG) STATIC void xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr); @@ -360,7 +359,7 @@ xfs_log_done(xfs_mount_t *mp, */ xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); xlog_ungrant_log_space(log, ticket); - xlog_ticket_put(log, ticket); + xfs_log_ticket_put(ticket); } else { xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); xlog_regrant_reserve_log_space(log, ticket); @@ -514,7 +513,7 @@ xfs_log_reserve(xfs_mount_t *mp, retval = xlog_regrant_write_log_space(log, internal_ticket); } else { /* may sleep if need to allocate more tickets */ - internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, + internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt, client, flags); if (!internal_ticket) return XFS_ERROR(ENOMEM); @@ -749,7 +748,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (tic) { xlog_trace_loggrant(log, tic, "unmount rec"); xlog_ungrant_log_space(log, tic); - xlog_ticket_put(log, tic); + xfs_log_ticket_put(tic); } } else { /* @@ -3222,22 +3221,33 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) */ /* - * Free a used ticket. + * Free a used ticket when it's refcount falls to zero. */ -STATIC void -xlog_ticket_put(xlog_t *log, - xlog_ticket_t *ticket) +void +xfs_log_ticket_put( + xlog_ticket_t *ticket) { - sv_destroy(&ticket->t_wait); - kmem_zone_free(xfs_log_ticket_zone, ticket); -} /* xlog_ticket_put */ + ASSERT(atomic_read(&ticket->t_ref) > 0); + if (atomic_dec_and_test(&ticket->t_ref)) { + sv_destroy(&ticket->t_wait); + kmem_zone_free(xfs_log_ticket_zone, ticket); + } +} +xlog_ticket_t * +xfs_log_ticket_get( + xlog_ticket_t *ticket) +{ + ASSERT(atomic_read(&ticket->t_ref) > 0); + atomic_inc(&ticket->t_ref); + return ticket; +} /* * Allocate and initialise a new log ticket. */ STATIC xlog_ticket_t * -xlog_ticket_get(xlog_t *log, +xlog_ticket_alloc(xlog_t *log, int unit_bytes, int cnt, char client, @@ -3308,6 +3318,7 @@ xlog_ticket_get(xlog_t *log, unit_bytes += 2*BBSIZE; } + atomic_set(&tic->t_ref, 1); tic->t_unit_res = unit_bytes; tic->t_curr_res = unit_bytes; tic->t_cnt = cnt; @@ -3323,7 +3334,7 @@ xlog_ticket_get(xlog_t *log, xlog_tic_reset_res(tic); return tic; -} /* xlog_ticket_get */ +} /****************************************************************************** diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index d47b91f1082..8a3e84e900a 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -134,6 +134,7 @@ typedef struct xfs_log_callback { #ifdef __KERNEL__ /* Log manager interfaces */ struct xfs_mount; +struct xlog_ticket; xfs_lsn_t xfs_log_done(struct xfs_mount *mp, xfs_log_ticket_t ticket, void **iclog, @@ -177,6 +178,9 @@ int xfs_log_need_covered(struct xfs_mount *mp); void xlog_iodone(struct xfs_buf *); +struct xlog_ticket * xfs_log_ticket_get(struct xlog_ticket *ticket); +void xfs_log_ticket_put(struct xlog_ticket *ticket); + #endif diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index de7ef6ca920..b39a1980e82 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -245,6 +245,7 @@ typedef struct xlog_ticket { struct xlog_ticket *t_next; /* :4|8 */ struct xlog_ticket *t_prev; /* :4|8 */ xlog_tid_t t_tid; /* transaction identifier : 4 */ + atomic_t t_ref; /* ticket reference count : 4 */ int t_curr_res; /* current reservation in bytes : 4 */ int t_unit_res; /* unit reservation in bytes : 4 */ char t_ocnt; /* original count : 1 */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index ad137efc870..8570b826fed 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -290,7 +290,7 @@ xfs_trans_dup( ASSERT(tp->t_ticket != NULL); ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE); - ntp->t_ticket = tp->t_ticket; + ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; tp->t_blk_res = tp->t_blk_res_used; ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; @@ -1259,6 +1259,13 @@ xfs_trans_roll( trans = *tpp; + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(trans->t_ticket); + + /* * Reserve space in the log for th next transaction. * This also pushes items in the "AIL", the list of logged items, diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 35d4d414bcc..771144932ab 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -172,6 +172,12 @@ xfs_dir_ialloc( *ipp = NULL; return code; } + + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(tp->t_ticket); code = xfs_trans_reserve(tp, 0, log_res, 0, XFS_TRANS_PERM_LOG_RES, log_count); /* diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c45ea278ef4..0574aadc4d3 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1018,6 +1018,12 @@ xfs_inactive_symlink_rmt( ASSERT(XFS_FORCED_SHUTDOWN(mp)); goto error0; } + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(tp->t_ticket); + /* * Remove the memory for extent descriptions (just bookkeeping). */ -- cgit v1.2.3-70-g09d2 From f999a5bf3fa6b3d11334c3ba1e9dcfed5ff9f8a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:32 +1100 Subject: [XFS] wire up ->open for directories Currently there's no ->open method set for directories on XFS. That means we don't perform any check for opening too large directories without O_LARGEFILE, we don't check for shut down filesystems, and we don't actually do the readahead for the first block in the directory. Instead of just setting the directories open routine to xfs_file_open we merge the shutdown check directly into xfs_file_open and create a new xfs_dir_open that first calls xfs_file_open and then performs the readahead for block 0. (First sent on September 29th) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_file.c | 34 +++++++++++++++++++++++++++++++--- fs/xfs/xfs_vnodeops.c | 22 ---------------------- fs/xfs/xfs_vnodeops.h | 1 - 3 files changed, 31 insertions(+), 26 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 3fee790f138..72fc8d8c8bc 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -38,6 +38,7 @@ #include "xfs_rw.h" #include "xfs_ioctl32.h" #include "xfs_vnodeops.h" +#include "xfs_da_btree.h" #include #include @@ -169,11 +170,37 @@ xfs_file_splice_write_invis( STATIC int xfs_file_open( struct inode *inode, - struct file *filp) + struct file *file) { - if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) + if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) return -EFBIG; - return -xfs_open(XFS_I(inode)); + if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) + return -EIO; + return 0; +} + +STATIC int +xfs_dir_open( + struct inode *inode, + struct file *file) +{ + struct xfs_inode *ip = XFS_I(inode); + int mode; + int error; + + error = xfs_file_open(inode, file); + if (error) + return error; + + /* + * If there are any blocks, read-ahead block 0 as we're almost + * certain to have the next operation be a read there. + */ + mode = xfs_ilock_map_shared(ip); + if (ip->i_d.di_nextents > 0) + xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); + xfs_iunlock(ip, mode); + return 0; } STATIC int @@ -345,6 +372,7 @@ const struct file_operations xfs_invis_file_operations = { const struct file_operations xfs_dir_file_operations = { + .open = xfs_dir_open, .read = generic_read_dir, .readdir = xfs_file_readdir, .llseek = generic_file_llseek, diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 0574aadc4d3..c055bdb11cb 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -53,28 +53,6 @@ #include "xfs_filestream.h" #include "xfs_vnodeops.h" -int -xfs_open( - xfs_inode_t *ip) -{ - int mode; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return XFS_ERROR(EIO); - - /* - * If it's a directory with any blocks, read-ahead block 0 - * as we're almost certain to have the next operation be a read there. - */ - if (S_ISDIR(ip->i_d.di_mode) && ip->i_d.di_nextents > 0) { - mode = xfs_ilock_map_shared(ip); - if (ip->i_d.di_nextents > 0) - (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); - xfs_iunlock(ip, mode); - } - return 0; -} - int xfs_setattr( struct xfs_inode *ip, diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index b1ae8e3f404..a559400aeae 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -14,7 +14,6 @@ struct xfs_inode; struct xfs_iomap; -int xfs_open(struct xfs_inode *ip); int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ -- cgit v1.2.3-70-g09d2 From 2e6560929d8ab4b650fecc3a87013852b34f0922 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 28 Nov 2008 14:23:33 +1100 Subject: [XFS] fix error inversion problems with data flushing XFS gets the sign of the error wrong in several places when gathering the error from generic linux functions. These functions return negative error values, while the core XFS code returns positive error values. Hence when XFS inverts the error to be returned to the VFS, it can incorrectly invert a negative error and this error will be ignored by the syscall return. Fix all the problems related to calling filemap_* functions. Problem initially identified by Nick Piggin in xfs_fsync(). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_fs_subr.c | 23 ++++++++++++++++++++--- fs/xfs/linux-2.6/xfs_lrw.c | 2 +- fs/xfs/linux-2.6/xfs_super.c | 13 +++++++++---- fs/xfs/xfs_vnodeops.c | 2 +- fs/xfs/xfs_vnodeops.h | 1 + 5 files changed, 32 insertions(+), 9 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 36caa6d957d..5aeb7777696 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -24,6 +24,10 @@ int fs_noerr(void) { return 0; } int fs_nosys(void) { return ENOSYS; } void fs_noval(void) { return; } +/* + * note: all filemap functions return negative error codes. These + * need to be inverted before returning to the xfs core functions. + */ void xfs_tosspages( xfs_inode_t *ip, @@ -53,7 +57,7 @@ xfs_flushinval_pages( if (!ret) truncate_inode_pages(mapping, first); } - return ret; + return -ret; } int @@ -72,10 +76,23 @@ xfs_flush_pages( xfs_iflags_clear(ip, XFS_ITRUNCATED); ret = filemap_fdatawrite(mapping); if (flags & XFS_B_ASYNC) - return ret; + return -ret; ret2 = filemap_fdatawait(mapping); if (!ret) ret = ret2; } - return ret; + return -ret; +} + +int +xfs_wait_on_pages( + xfs_inode_t *ip, + xfs_off_t first, + xfs_off_t last) +{ + struct address_space *mapping = VFS_I(ip)->i_mapping; + + if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) + return -filemap_fdatawait(mapping); + return 0; } diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 1957e5357d0..4959c874499 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -243,7 +243,7 @@ xfs_read( if (unlikely(ioflags & IO_ISDIRECT)) { if (inode->i_mapping->nrpages) - ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), + ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); mutex_unlock(&inode->i_mutex); if (ret) { diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index cc5e07e3e7a..ae92290e3c1 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -990,21 +990,26 @@ xfs_fs_write_inode( struct inode *inode, int sync) { + struct xfs_inode *ip = XFS_I(inode); int error = 0; int flags = 0; - xfs_itrace_entry(XFS_I(inode)); + xfs_itrace_entry(ip); if (sync) { - filemap_fdatawait(inode->i_mapping); + error = xfs_wait_on_pages(ip, 0, -1); + if (error) + goto out_error; flags |= FLUSH_SYNC; } - error = xfs_inode_flush(XFS_I(inode), flags); + error = xfs_inode_flush(ip, flags); + +out_error: /* * if we failed to write out the inode then mark * it dirty again so we'll try again later. */ if (error) - xfs_mark_inode_dirty_sync(XFS_I(inode)); + xfs_mark_inode_dirty_sync(ip); return -error; } diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c055bdb11cb..f26b038004a 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -681,7 +681,7 @@ xfs_fsync( return XFS_ERROR(EIO); /* capture size updates in I/O completion before writing the inode. */ - error = filemap_fdatawait(VFS_I(ip)->i_mapping); + error = xfs_wait_on_pages(ip, 0, -1); if (error) return XFS_ERROR(error); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index a559400aeae..2a45b00ad32 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -75,5 +75,6 @@ int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last, int fiopt); int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last, uint64_t flags, int fiopt); +int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last); #endif /* _XFS_VNODEOPS_H */ -- cgit v1.2.3-70-g09d2 From 26c5295135d10fc90cbf160adfda392d91f58279 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 28 Nov 2008 14:23:37 +1100 Subject: [XFS] remove i_gen from incore inode i_gen is incremented in directory operations when the directory is changed. It is never read or otherwise used so it should be removed to help reduce the size of the struct xfs_inode. The patch also removes a duplicate logging of the directory inode core. We only need to do this once per transaction so kill the one associated with the i_gen increment. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi --- fs/xfs/xfs_inode.h | 1 - fs/xfs/xfs_rename.c | 12 ++---------- fs/xfs/xfs_vnodeops.c | 29 ++--------------------------- 3 files changed, 4 insertions(+), 38 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 7f007ef4bbb..ea691c738f2 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -261,7 +261,6 @@ typedef struct xfs_inode { unsigned short i_flags; /* see defined flags below */ unsigned char i_update_core; /* timestamps/size is dirty */ unsigned char i_update_size; /* di_size field is dirty */ - unsigned int i_gen; /* generation count */ unsigned int i_delayed_blks; /* count of delay alloc blks */ xfs_icdinode_t i_d; /* most of ondisk inode */ diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index d700dacdb10..02f0e8f53a9 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -367,19 +367,11 @@ xfs_rename( &first_block, &free_list, spaceres); if (error) goto abort_return; - xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - /* - * Update the generation counts on all the directory inodes - * that we're modifying. - */ - src_dp->i_gen++; + xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); - - if (new_parent) { - target_dp->i_gen++; + if (new_parent) xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); - } /* * If this is a synchronous mount, make sure that the diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index f26b038004a..b29a0eb9c0f 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1599,8 +1599,6 @@ xfs_create( xfs_trans_set_sync(tp); } - dp->i_gen++; - /* * Attach the dquot(s) to the inodes and modify them incore. * These ids of the inode couldn't have changed since the new @@ -1967,13 +1965,6 @@ xfs_remove( } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - /* - * Bump the in memory generation count on the parent - * directory so that other can know that it has changed. - */ - dp->i_gen++; - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - if (is_dir) { /* * Drop the link from ip's "..". @@ -1991,8 +1982,8 @@ xfs_remove( } else { /* * When removing a non-directory we need to log the parent - * inode here for the i_gen update. For a directory this is - * done implicitly by the xfs_droplink call for the ".." entry. + * inode here. For a directory this is done implicitly + * by the xfs_droplink call for the ".." entry. */ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); } @@ -2152,7 +2143,6 @@ xfs_link( if (error) goto abort_return; xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - tdp->i_gen++; xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); error = xfs_bumplink(tp, sip); @@ -2329,18 +2319,10 @@ xfs_mkdir( } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - /* - * Bump the in memory version number of the parent directory - * so that other processes accessing it will recognize that - * the directory has changed. - */ - dp->i_gen++; - error = xfs_dir_init(tp, cdp, dp); if (error) goto error2; - cdp->i_gen = 1; error = xfs_bumplink(tp, dp); if (error) goto error2; @@ -2626,13 +2608,6 @@ xfs_symlink( xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - /* - * Bump the in memory version number of the parent directory - * so that other processes accessing it will recognize that - * the directory has changed. - */ - dp->i_gen++; - /* * If this is a synchronous mount, make sure that the * symlink transaction goes to disk before returning to -- cgit v1.2.3-70-g09d2 From 25e41b3d521f52771354a718042a753a3e77df0a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:39 +0100 Subject: move vn_iowait / vn_iowake into xfs_aops.c The whole machinery to wait on I/O completion is related to the I/O path and should be there instead of in xfs_vnode.c. Also give the functions more descriptive names. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_aops.c | 38 ++++++++++++++++++++++++++++++++++++-- fs/xfs/linux-2.6/xfs_aops.h | 3 +++ fs/xfs/linux-2.6/xfs_super.c | 2 +- fs/xfs/linux-2.6/xfs_sync.c | 2 +- fs/xfs/linux-2.6/xfs_vnode.c | 34 ---------------------------------- fs/xfs/linux-2.6/xfs_vnode.h | 10 ---------- fs/xfs/xfs_inode.c | 6 +++--- fs/xfs/xfs_vnodeops.c | 7 ++++--- 8 files changed, 48 insertions(+), 54 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index f35dba9bf1d..de3a198f771 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -42,6 +42,40 @@ #include #include + +/* + * Prime number of hash buckets since address is used as the key. + */ +#define NVSYNC 37 +#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC]) +static wait_queue_head_t xfs_ioend_wq[NVSYNC]; + +void __init +xfs_ioend_init(void) +{ + int i; + + for (i = 0; i < NVSYNC; i++) + init_waitqueue_head(&xfs_ioend_wq[i]); +} + +void +xfs_ioend_wait( + xfs_inode_t *ip) +{ + wait_queue_head_t *wq = to_ioend_wq(ip); + + wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); +} + +STATIC void +xfs_ioend_wake( + xfs_inode_t *ip) +{ + if (atomic_dec_and_test(&ip->i_iocount)) + wake_up(to_ioend_wq(ip)); +} + STATIC void xfs_count_page_state( struct page *page, @@ -164,7 +198,7 @@ xfs_destroy_ioend( __FILE__, __LINE__); } - vn_iowake(ip); + xfs_ioend_wake(ip); mempool_free(ioend, xfs_ioend_pool); } @@ -516,7 +550,7 @@ xfs_cancel_ioend( unlock_buffer(bh); } while ((bh = next_bh) != NULL); - vn_iowake(XFS_I(ioend->io_inode)); + xfs_ioend_wake(XFS_I(ioend->io_inode)); mempool_free(ioend, xfs_ioend_pool); } while ((ioend = next) != NULL); } diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 3ba0631a381..7b26f5ff969 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -43,4 +43,7 @@ typedef struct xfs_ioend { extern const struct address_space_operations xfs_address_space_operations; extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); +extern void xfs_ioend_init(void); +extern void xfs_ioend_wait(struct xfs_inode *); + #endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 4ebbd6820e7..36f6cc703ef 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1822,7 +1822,7 @@ init_xfs_fs(void) XFS_BUILD_OPTIONS " enabled\n"); ktrace_init(64); - vn_init(); + xfs_ioend_init(); xfs_dir_startup(); error = xfs_init_zones(); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index d12d31b86fa..ca5bd2951a8 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -133,7 +133,7 @@ xfs_sync_inodes_ag( lock_flags |= XFS_IOLOCK_SHARED; error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); if (flags & SYNC_IOWAIT) - vn_iowait(ip); + xfs_ioend_wait(ip); } xfs_ilock(ip, XFS_ILOCK_SHARED); diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index a8cf97a4319..f6d14112279 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -32,40 +32,6 @@ #include "xfs_mount.h" -/* - * Dedicated vnode inactive/reclaim sync wait queues. - * Prime number of hash buckets since address is used as the key. - */ -#define NVSYNC 37 -#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) -static wait_queue_head_t vsync[NVSYNC]; - -void __init -vn_init(void) -{ - int i; - - for (i = 0; i < NVSYNC; i++) - init_waitqueue_head(&vsync[i]); -} - -void -vn_iowait( - xfs_inode_t *ip) -{ - wait_queue_head_t *wq = vptosync(ip); - - wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); -} - -void -vn_iowake( - xfs_inode_t *ip) -{ - if (atomic_dec_and_test(&ip->i_iocount)) - wake_up(vptosync(ip)); -} - #ifdef XFS_INODE_TRACE #define KTRACE_ENTER(ip, vk, s, line, ra) \ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 07fed8837db..bd3e05c4790 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -54,16 +54,6 @@ struct attrlist_cursor_kern; Prevent VM access to the pages until the operation completes. */ - -extern void vn_init(void); - -/* - * Yeah, these don't take vnode anymore at all, all this should be - * cleaned up at some point. - */ -extern void vn_iowait(struct xfs_inode *ip); -extern void vn_iowake(struct xfs_inode *ip); - #define IHOLD(ip) \ do { \ ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4e664f57860..063da344e18 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1322,8 +1322,8 @@ xfs_itrunc_trace( * direct I/O with the truncate operation. Also, because we hold * the IOLOCK in exclusive mode, we prevent new direct I/Os from being * started until the truncate completes and drops the lock. Essentially, - * the vn_iowait() call forms an I/O barrier that provides strict ordering - * between direct I/Os and the truncate operation. + * the xfs_ioend_wait() call forms an I/O barrier that provides strict + * ordering between direct I/Os and the truncate operation. * * The flags parameter can have either the value XFS_ITRUNC_DEFINITE * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used @@ -1354,7 +1354,7 @@ xfs_itruncate_start( /* wait for the completion of any pending DIOs */ if (new_size == 0 || new_size < ip->i_size) - vn_iowait(ip); + xfs_ioend_wait(ip); /* * Call toss_pages or flushinval_pages to get rid of pages diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index b29a0eb9c0f..2d57aae0e31 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -338,7 +338,7 @@ xfs_setattr( } /* wait for all I/O to complete */ - vn_iowait(ip); + xfs_ioend_wait(ip); if (!code) code = xfs_itruncate_data(ip, iattr->ia_size); @@ -2758,7 +2758,7 @@ xfs_reclaim( return 0; } - vn_iowait(ip); + xfs_ioend_wait(ip); ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); @@ -3149,7 +3149,8 @@ xfs_free_file_space( need_iolock = 0; if (need_iolock) { xfs_ilock(ip, XFS_IOLOCK_EXCL); - vn_iowait(ip); /* wait for the completion of any pending DIOs */ + /* wait for the completion of any pending DIOs */ + xfs_ioend_wait(ip); } rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); -- cgit v1.2.3-70-g09d2 From c6422617a1c0d7787e515748b01f594fe43aea98 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Fri, 5 Dec 2008 13:16:15 +1100 Subject: [XFS] Check return value of xfs_buf_get_noaddr() We check the return value of all other calls to xfs_buf_get_noaddr(). Make sense to do it here too. Signed-off-by: Lachlan McIlroy Reviewed-by: Christoph Hellwig Reviewed-by: Eric Sandeen --- fs/xfs/xfs_vnodeops.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 2d57aae0e31..4547608b46c 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -3034,6 +3034,8 @@ xfs_zero_remaining_bytes( bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp); + if (!bp) + return XFS_ERROR(ENOMEM); for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { offset_fsb = XFS_B_TO_FSBT(mp, offset); -- cgit v1.2.3-70-g09d2 From c4cd747ee6c3ba1e7727878e3fce482d0d8c0136 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Dec 2008 04:47:34 -0500 Subject: [XFS] use inode_change_ok for setattr permission checking Instead of implementing our own checks use inode_change_ok to check for necessary permission in setattr. There is a slight change in behaviour as inode_change_ok doesn't allow i_mode updates to add the suid or sgid without superuser privilegues while the old XFS code just stripped away those bits from the file mode. (First sent on Semptember 29th) Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_vnodeops.c | 149 ++++++++++++-------------------------------------- 1 file changed, 36 insertions(+), 113 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 4547608b46c..f07bf8768c3 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -70,7 +70,6 @@ xfs_setattr( gid_t gid=0, igid=0; int timeflags = 0; struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; - int file_owner; int need_iolock = 1; xfs_itrace_entry(ip); @@ -81,6 +80,10 @@ xfs_setattr( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); + code = -inode_change_ok(inode, iattr); + if (code) + return code; + olddquot1 = olddquot2 = NULL; udqp = gdqp = NULL; @@ -158,56 +161,6 @@ xfs_setattr( xfs_ilock(ip, lock_flags); - /* boolean: are we the file owner? */ - file_owner = (current_fsuid() == ip->i_d.di_uid); - - /* - * Change various properties of a file. - * Only the owner or users with CAP_FOWNER - * capability may do these things. - */ - if (mask & (ATTR_MODE|ATTR_UID|ATTR_GID)) { - /* - * CAP_FOWNER overrides the following restrictions: - * - * The user ID of the calling process must be equal - * to the file owner ID, except in cases where the - * CAP_FSETID capability is applicable. - */ - if (!file_owner && !capable(CAP_FOWNER)) { - code = XFS_ERROR(EPERM); - goto error_return; - } - - /* - * CAP_FSETID overrides the following restrictions: - * - * The effective user ID of the calling process shall match - * the file owner when setting the set-user-ID and - * set-group-ID bits on that file. - * - * The effective group ID or one of the supplementary group - * IDs of the calling process shall match the group owner of - * the file when setting the set-group-ID bit on that file - */ - if (mask & ATTR_MODE) { - mode_t m = 0; - - if ((iattr->ia_mode & S_ISUID) && !file_owner) - m |= S_ISUID; - if ((iattr->ia_mode & S_ISGID) && - !in_group_p((gid_t)ip->i_d.di_gid)) - m |= S_ISGID; -#if 0 - /* Linux allows this, Irix doesn't. */ - if ((iattr->ia_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode)) - m |= S_ISVTX; -#endif - if (m && !capable(CAP_FSETID)) - iattr->ia_mode &= ~m; - } - } - /* * Change file ownership. Must be the owner or privileged. */ @@ -223,22 +176,6 @@ xfs_setattr( gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; - /* - * CAP_CHOWN overrides the following restrictions: - * - * If _POSIX_CHOWN_RESTRICTED is defined, this capability - * shall override the restriction that a process cannot - * change the user ID of a file it owns and the restriction - * that the group ID supplied to the chown() function - * shall be equal to either the group ID or one of the - * supplementary group IDs of the calling process. - */ - if ((iuid != uid || - (igid != gid && !in_group_p((gid_t)gid))) && - !capable(CAP_CHOWN)) { - code = XFS_ERROR(EPERM); - goto error_return; - } /* * Do a quota reservation only if uid/gid is actually * going to change. @@ -276,36 +213,22 @@ xfs_setattr( code = XFS_ERROR(EINVAL); goto error_return; } + /* * Make sure that the dquots are attached to the inode. */ - if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED))) + code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); + if (code) goto error_return; - } - /* - * Change file access or modified times. - */ - if (mask & (ATTR_ATIME|ATTR_MTIME)) { - if (!file_owner) { - if ((mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)) && - !capable(CAP_FOWNER)) { - code = XFS_ERROR(EPERM); - goto error_return; - } - } - } - - /* - * Now we can make the changes. Before we join the inode - * to the transaction, if ATTR_SIZE is set then take care of - * the part of the truncation that must be done without the - * inode lock. This needs to be done before joining the inode - * to the transaction, because the inode cannot be unlocked - * once it is a part of the transaction. - */ - if (mask & ATTR_SIZE) { - code = 0; + /* + * Now we can make the changes. Before we join the inode + * to the transaction, if ATTR_SIZE is set then take care of + * the part of the truncation that must be done without the + * inode lock. This needs to be done before joining the inode + * to the transaction, because the inode cannot be unlocked + * once it is a part of the transaction. + */ if (iattr->ia_size > ip->i_size) { /* * Do the first part of growing a file: zero any data @@ -360,17 +283,10 @@ xfs_setattr( } commit_flags = XFS_TRANS_RELEASE_LOG_RES; xfs_ilock(ip, XFS_ILOCK_EXCL); - } - if (tp) { xfs_trans_ijoin(tp, ip, lock_flags); xfs_trans_ihold(tp, ip); - } - /* - * Truncate file. Must have write permission and not be a directory. - */ - if (mask & ATTR_SIZE) { /* * Only change the c/mtime if we are changing the size * or we are explicitly asked to change it. This handles @@ -410,20 +326,9 @@ xfs_setattr( */ xfs_iflags_set(ip, XFS_ITRUNCATED); } - } - - /* - * Change file access modes. - */ - if (mask & ATTR_MODE) { - ip->i_d.di_mode &= S_IFMT; - ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT; - - inode->i_mode &= S_IFMT; - inode->i_mode |= iattr->ia_mode & ~S_IFMT; - - xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); - timeflags |= XFS_ICHGTIME_CHG; + } else if (tp) { + xfs_trans_ijoin(tp, ip, lock_flags); + xfs_trans_ihold(tp, ip); } /* @@ -471,6 +376,24 @@ xfs_setattr( timeflags |= XFS_ICHGTIME_CHG; } + /* + * Change file access modes. + */ + if (mask & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + + ip->i_d.di_mode &= S_IFMT; + ip->i_d.di_mode |= mode & ~S_IFMT; + + inode->i_mode &= S_IFMT; + inode->i_mode |= mode & ~S_IFMT; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + timeflags |= XFS_ICHGTIME_CHG; + } /* * Change file access or modified times. -- cgit v1.2.3-70-g09d2