diff options
Diffstat (limited to 'fs/xfs')
85 files changed, 2303 insertions, 3184 deletions
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6 index d1491aa7a0e..97316451fc6 100644 --- a/fs/xfs/Makefile-linux-2.6 +++ b/fs/xfs/Makefile-linux-2.6 @@ -70,7 +70,6 @@ xfs-y += xfs_alloc.o \ xfs_iget.o \ xfs_inode.o \ xfs_inode_item.o \ - xfs_iocore.o \ xfs_iomap.o \ xfs_itable.o \ xfs_dfrag.o \ diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h deleted file mode 100644 index 50a6191178f..00000000000 --- a/fs/xfs/linux-2.6/spin.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_SPIN_H__ -#define __XFS_SUPPORT_SPIN_H__ - -#include <linux/sched.h> /* preempt needs this */ -#include <linux/spinlock.h> - -/* - * Map lock_t from IRIX to Linux spinlocks. - * - * We do not make use of lock_t from interrupt context, so we do not - * have to worry about disabling interrupts at all (unlike IRIX). - */ - -typedef spinlock_t lock_t; - -#define SPLDECL(s) unsigned long s -#ifndef DEFINE_SPINLOCK -#define DEFINE_SPINLOCK(s) spinlock_t s = SPIN_LOCK_UNLOCKED -#endif - -#define spinlock_init(lock, name) spin_lock_init(lock) -#define spinlock_destroy(lock) -#define mutex_spinlock(lock) ({ spin_lock(lock); 0; }) -#define mutex_spinunlock(lock, s) do { spin_unlock(lock); (void)s; } while (0) -#define nested_spinlock(lock) spin_lock(lock) -#define nested_spinunlock(lock) spin_unlock(lock) - -#endif /* __XFS_SUPPORT_SPIN_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 2e34b104107..e0519529c26 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -107,6 +107,18 @@ xfs_page_trace( #define xfs_page_trace(tag, inode, page, pgoff) #endif +STATIC struct block_device * +xfs_find_bdev_for_inode( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + + if (XFS_IS_REALTIME_INODE(ip)) + return mp->m_rtdev_targp->bt_bdev; + else + return mp->m_ddev_targp->bt_bdev; +} + /* * Schedule IO completion handling on a xfsdatad if this was * the final hold on this ioend. If we are asked to wait, @@ -151,7 +163,7 @@ xfs_destroy_ioend( /* * Update on-disk file size now that data has been written to disk. * The current in-memory file size is i_size. If a write is beyond - * eof io_new_size will be the intended file size until i_size is + * eof i_new_size will be the intended file size until i_size is * updated. If this write does not extend all the way to the valid * file size then restrict this update to the end of the write. */ @@ -173,7 +185,7 @@ xfs_setfilesize( xfs_ilock(ip, XFS_ILOCK_EXCL); - isize = MAX(ip->i_size, ip->i_iocore.io_new_size); + isize = MAX(ip->i_size, ip->i_new_size); isize = MIN(isize, bsize); if (ip->i_d.di_size < isize) { @@ -226,12 +238,13 @@ xfs_end_bio_unwritten( { xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); + struct xfs_inode *ip = XFS_I(ioend->io_inode); xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; if (likely(!ioend->io_error)) { - xfs_bmap(XFS_I(ioend->io_inode), offset, size, - BMAPI_UNWRITTEN, NULL, NULL); + if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) + xfs_iomap_write_unwritten(ip, offset, size); xfs_setfilesize(ioend); } xfs_destroy_ioend(ioend); @@ -304,7 +317,7 @@ xfs_map_blocks( xfs_inode_t *ip = XFS_I(inode); int error, nmaps = 1; - error = xfs_bmap(ip, offset, count, + error = xfs_iomap(ip, offset, count, flags, mapp, &nmaps); if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE))) xfs_iflags_set(ip, XFS_IMODIFIED); @@ -1323,7 +1336,7 @@ __xfs_get_blocks( offset = (xfs_off_t)iblock << inode->i_blkbits; ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); size = bh_result->b_size; - error = xfs_bmap(XFS_I(inode), offset, size, + error = xfs_iomap(XFS_I(inode), offset, size, create ? flags : BMAPI_READ, &iomap, &niomap); if (error) return -error; @@ -1471,28 +1484,21 @@ xfs_vm_direct_IO( { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - xfs_iomap_t iomap; - int maps = 1; - int error; + struct block_device *bdev; ssize_t ret; - error = xfs_bmap(XFS_I(inode), offset, 0, - BMAPI_DEVICE, &iomap, &maps); - if (error) - return -error; + bdev = xfs_find_bdev_for_inode(XFS_I(inode)); if (rw == WRITE) { iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); ret = blockdev_direct_IO_own_locking(rw, iocb, inode, - iomap.iomap_target->bt_bdev, - iov, offset, nr_segs, + bdev, iov, offset, nr_segs, xfs_get_blocks_direct, xfs_end_io_direct); } else { iocb->private = xfs_alloc_ioend(inode, IOMAP_READ); ret = blockdev_direct_IO_no_locking(rw, iocb, inode, - iomap.iomap_target->bt_bdev, - iov, offset, nr_segs, + bdev, iov, offset, nr_segs, xfs_get_blocks_direct, xfs_end_io_direct); } @@ -1525,8 +1531,7 @@ xfs_vm_bmap( struct inode *inode = (struct inode *)mapping->host; struct xfs_inode *ip = XFS_I(inode); - vn_trace_entry(XFS_I(inode), __FUNCTION__, - (inst_t *)__return_address); + xfs_itrace_entry(XFS_I(inode)); xfs_rwlock(ip, VRWLOCK_READ); xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); xfs_rwunlock(ip, VRWLOCK_READ); diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 0382c19d652..e347bfd47c9 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -387,8 +387,6 @@ _xfs_buf_lookup_pages( if (unlikely(page == NULL)) { if (flags & XBF_READ_AHEAD) { bp->b_page_count = i; - for (i = 0; i < bp->b_page_count; i++) - unlock_page(bp->b_pages[i]); return -ENOMEM; } @@ -418,24 +416,17 @@ _xfs_buf_lookup_pages( ASSERT(!PagePrivate(page)); if (!PageUptodate(page)) { page_count--; - if (blocksize >= PAGE_CACHE_SIZE) { - if (flags & XBF_READ) - bp->b_locked = 1; - } else if (!PagePrivate(page)) { + if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) { if (test_page_region(page, offset, nbytes)) page_count++; } } + unlock_page(page); bp->b_pages[i] = page; offset = 0; } - if (!bp->b_locked) { - for (i = 0; i < bp->b_page_count; i++) - unlock_page(bp->b_pages[i]); - } - if (page_count == bp->b_page_count) bp->b_flags |= XBF_DONE; @@ -751,7 +742,6 @@ xfs_buf_associate_memory( bp->b_pages[i] = mem_to_page((void *)pageaddr); pageaddr += PAGE_CACHE_SIZE; } - bp->b_locked = 0; bp->b_count_desired = len; bp->b_buffer_length = buflen; @@ -1098,25 +1088,13 @@ xfs_buf_iostart( return status; } -STATIC_INLINE int -_xfs_buf_iolocked( - xfs_buf_t *bp) -{ - ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE)); - if (bp->b_flags & XBF_READ) - return bp->b_locked; - return 0; -} - STATIC_INLINE void _xfs_buf_ioend( xfs_buf_t *bp, int schedule) { - if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { - bp->b_locked = 0; + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) xfs_buf_ioend(bp, schedule); - } } STATIC void @@ -1147,10 +1125,6 @@ xfs_buf_bio_end_io( if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - - if (_xfs_buf_iolocked(bp)) { - unlock_page(page); - } } while (bvec >= bio->bi_io_vec); _xfs_buf_ioend(bp, 1); @@ -1161,13 +1135,12 @@ STATIC void _xfs_buf_ioapply( xfs_buf_t *bp) { - int i, rw, map_i, total_nr_pages, nr_pages; + int rw, map_i, total_nr_pages, nr_pages; struct bio *bio; int offset = bp->b_offset; int size = bp->b_count_desired; sector_t sector = bp->b_bn; unsigned int blocksize = bp->b_target->bt_bsize; - int locking = _xfs_buf_iolocked(bp); total_nr_pages = bp->b_page_count; map_i = 0; @@ -1190,7 +1163,7 @@ _xfs_buf_ioapply( * filesystem block size is not smaller than the page size. */ if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && - (bp->b_flags & XBF_READ) && locking && + (bp->b_flags & XBF_READ) && (blocksize >= PAGE_CACHE_SIZE)) { bio = bio_alloc(GFP_NOIO, 1); @@ -1207,24 +1180,6 @@ _xfs_buf_ioapply( goto submit_io; } - /* Lock down the pages which we need to for the request */ - if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) { - for (i = 0; size; i++) { - int nbytes = PAGE_CACHE_SIZE - offset; - struct page *page = bp->b_pages[i]; - - if (nbytes > size) - nbytes = size; - - lock_page(page); - - size -= nbytes; - offset = 0; - } - offset = bp->b_offset; - size = bp->b_count_desired; - } - next_chunk: atomic_inc(&bp->b_io_remaining); nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); @@ -1571,7 +1526,7 @@ xfs_alloc_delwrite_queue( INIT_LIST_HEAD(&btp->bt_list); INIT_LIST_HEAD(&btp->bt_delwrite_queue); - spinlock_init(&btp->bt_delwrite_lock, "delwri_lock"); + spin_lock_init(&btp->bt_delwrite_lock); btp->bt_flags = 0; btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd"); if (IS_ERR(btp->bt_task)) { diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index b5908a34b15..a3d207de48b 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -143,7 +143,6 @@ typedef struct xfs_buf { void *b_fspriv2; void *b_fspriv3; unsigned short b_error; /* error code on I/O */ - unsigned short b_locked; /* page array is locked */ unsigned int b_page_count; /* size of page array */ unsigned int b_offset; /* page offset in first page */ struct page **b_pages; /* array of page pointers */ diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 15bd4948832..ca4f66c4de1 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -118,20 +118,29 @@ xfs_nfs_get_inode( u64 ino, u32 generation) { - xfs_fid_t xfid; - bhv_vnode_t *vp; + xfs_mount_t *mp = XFS_M(sb); + xfs_inode_t *ip; int error; - xfid.fid_len = sizeof(xfs_fid_t) - sizeof(xfid.fid_len); - xfid.fid_pad = 0; - xfid.fid_ino = ino; - xfid.fid_gen = generation; + /* + * NFS can sometimes send requests for ino 0. Fail them gracefully. + */ + if (ino == 0) + return ERR_PTR(-ESTALE); - error = xfs_vget(XFS_M(sb), &vp, &xfid); + error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); if (error) return ERR_PTR(-error); + if (!ip) + return ERR_PTR(-EIO); + + if (!ip->i_d.di_mode || ip->i_d.di_gen != generation) { + xfs_iput_new(ip, XFS_ILOCK_SHARED); + return ERR_PTR(-ENOENT); + } - return vp ? vn_to_inode(vp) : NULL; + xfs_iunlock(ip, XFS_ILOCK_SHARED); + return ip->i_vnode; } STATIC struct dentry * diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 21a1c2b1c5f..edab1ffbb16 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -350,8 +350,8 @@ xfs_file_readdir( size = buf.used; de = (struct hack_dirent *)buf.dirent; - curr_offset = de->offset /* & 0x7fffffff */; while (size > 0) { + curr_offset = de->offset /* & 0x7fffffff */; if (filldir(dirent, de->name, de->namlen, curr_offset & 0x7fffffff, de->ino, de->d_type)) { @@ -362,7 +362,6 @@ xfs_file_readdir( sizeof(u64)); size -= reclen; de = (struct hack_dirent *)((char *)de + reclen); - curr_offset = de->offset /* & 0x7fffffff */; } } diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index 9febf9dc999..ef90e64641e 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -47,5 +47,6 @@ xfs_param_t xfs_params = { /* * Global system credential structure. */ -cred_t sys_cred_val, *sys_cred = &sys_cred_val; +static cred_t sys_cred_val; +cred_t *sys_cred = &sys_cred_val; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 98a56568bb2..4c82a050a3a 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -75,7 +75,6 @@ xfs_find_handle( xfs_handle_t handle; xfs_fsop_handlereq_t hreq; struct inode *inode; - bhv_vnode_t *vp; if (copy_from_user(&hreq, arg, sizeof(hreq))) return -XFS_ERROR(EFAULT); @@ -134,21 +133,16 @@ xfs_find_handle( return -XFS_ERROR(EBADF); } - /* we need the vnode */ - vp = vn_from_inode(inode); - /* now we can grab the fsid */ memcpy(&handle.ha_fsid, XFS_I(inode)->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); hsize = sizeof(xfs_fsid_t); if (cmd != XFS_IOC_PATH_TO_FSHANDLE) { - xfs_inode_t *ip; + xfs_inode_t *ip = XFS_I(inode); int lock_mode; /* need to get access to the xfs_inode to read the generation */ - ip = xfs_vtoi(vp); - ASSERT(ip); lock_mode = xfs_ilock_map_shared(ip); /* fill in fid section of handle from inode */ @@ -176,21 +170,19 @@ xfs_find_handle( /* - * Convert userspace handle data into vnode (and inode). - * We [ab]use the fact that all the fsop_handlereq ioctl calls - * have a data structure argument whose first component is always - * a xfs_fsop_handlereq_t, so we can cast to and from this type. - * This allows us to optimise the copy_from_user calls and gives - * a handy, shared routine. + * Convert userspace handle data into inode. + * + * We use the fact that all the fsop_handlereq ioctl calls have a data + * structure argument whose first component is always a xfs_fsop_handlereq_t, + * so we can pass that sub structure into this handy, shared routine. * - * If no error, caller must always VN_RELE the returned vp. + * If no error, caller must always iput the returned inode. */ STATIC int xfs_vget_fsop_handlereq( xfs_mount_t *mp, struct inode *parinode, /* parent inode pointer */ xfs_fsop_handlereq_t *hreq, - bhv_vnode_t **vp, struct inode **inode) { void __user *hanp; @@ -199,8 +191,6 @@ xfs_vget_fsop_handlereq( xfs_handle_t *handlep; xfs_handle_t handle; xfs_inode_t *ip; - struct inode *inodep; - bhv_vnode_t *vpp; xfs_ino_t ino; __u32 igen; int error; @@ -241,7 +231,7 @@ xfs_vget_fsop_handlereq( } /* - * Get the XFS inode, building a vnode to go with it. + * Get the XFS inode, building a Linux inode to go with it. */ error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); if (error) @@ -253,12 +243,9 @@ xfs_vget_fsop_handlereq( return XFS_ERROR(ENOENT); } - vpp = XFS_ITOV(ip); - inodep = vn_to_inode(vpp); xfs_iunlock(ip, XFS_ILOCK_SHARED); - *vp = vpp; - *inode = inodep; + *inode = XFS_ITOV(ip); return 0; } @@ -275,7 +262,6 @@ xfs_open_by_handle( struct file *filp; struct inode *inode; struct dentry *dentry; - bhv_vnode_t *vp; xfs_fsop_handlereq_t hreq; if (!capable(CAP_SYS_ADMIN)) @@ -283,7 +269,7 @@ xfs_open_by_handle( if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); if (error) return -error; @@ -385,7 +371,6 @@ xfs_readlink_by_handle( { struct inode *inode; xfs_fsop_handlereq_t hreq; - bhv_vnode_t *vp; __u32 olen; void *link; int error; @@ -395,7 +380,7 @@ xfs_readlink_by_handle( if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); if (error) return -error; @@ -438,34 +423,32 @@ xfs_fssetdm_by_handle( struct fsdmidata fsd; xfs_fsop_setdm_handlereq_t dmhreq; struct inode *inode; - bhv_vnode_t *vp; if (!capable(CAP_MKNOD)) return -XFS_ERROR(EPERM); if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &dmhreq.hreq, &vp, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, &dmhreq.hreq, &inode); if (error) return -error; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { - VN_RELE(vp); - return -XFS_ERROR(EPERM); + error = -XFS_ERROR(EPERM); + goto out; } if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { - VN_RELE(vp); - return -XFS_ERROR(EFAULT); + error = -XFS_ERROR(EFAULT); + goto out; } - error = xfs_set_dmattrs(xfs_vtoi(vp), - fsd.fsd_dmevmask, fsd.fsd_dmstate); + error = -xfs_set_dmattrs(XFS_I(inode), fsd.fsd_dmevmask, + fsd.fsd_dmstate); - VN_RELE(vp); - if (error) - return -error; - return 0; + out: + iput(inode); + return error; } STATIC int @@ -478,7 +461,6 @@ xfs_attrlist_by_handle( attrlist_cursor_kern_t *cursor; xfs_fsop_attrlist_handlereq_t al_hreq; struct inode *inode; - bhv_vnode_t *vp; char *kbuf; if (!capable(CAP_SYS_ADMIN)) @@ -488,8 +470,7 @@ xfs_attrlist_by_handle( if (al_hreq.buflen > XATTR_LIST_MAX) return -XFS_ERROR(EINVAL); - error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, - &vp, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode); if (error) goto out; @@ -509,7 +490,7 @@ xfs_attrlist_by_handle( out_kfree: kfree(kbuf); out_vn_rele: - VN_RELE(vp); + iput(inode); out: return -error; } @@ -531,7 +512,7 @@ xfs_attrmulti_attr_get( if (!kbuf) return ENOMEM; - error = xfs_attr_get(XFS_I(inode), name, kbuf, len, flags, NULL); + error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags, NULL); if (error) goto out_kfree; @@ -598,7 +579,6 @@ xfs_attrmulti_by_handle( xfs_attr_multiop_t *ops; xfs_fsop_attrmulti_handlereq_t am_hreq; struct inode *inode; - bhv_vnode_t *vp; unsigned int i, size; char *attr_name; @@ -607,7 +587,7 @@ xfs_attrmulti_by_handle( if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &am_hreq.hreq, &vp, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, &am_hreq.hreq, &inode); if (error) goto out; @@ -666,7 +646,7 @@ xfs_attrmulti_by_handle( out_kfree_ops: kfree(ops); out_vn_rele: - VN_RELE(vp); + iput(inode); out: return -error; } @@ -702,7 +682,6 @@ xfs_ioc_fsgeometry( STATIC int xfs_ioc_xattr( - bhv_vnode_t *vp, xfs_inode_t *ip, struct file *filp, unsigned int cmd, @@ -735,12 +714,10 @@ xfs_ioctl( void __user *arg) { struct inode *inode = filp->f_path.dentry->d_inode; - bhv_vnode_t *vp = vn_from_inode(inode); xfs_mount_t *mp = ip->i_mount; int error; - vn_trace_entry(XFS_I(inode), "xfs_ioctl", (inst_t *)__return_address); - + xfs_itrace_entry(XFS_I(inode)); switch (cmd) { case XFS_IOC_ALLOCSP: @@ -764,7 +741,7 @@ xfs_ioctl( case XFS_IOC_DIOINFO: { struct dioattr da; xfs_buftarg_t *target = - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? + XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; da.d_mem = da.d_miniosz = 1 << target->bt_sshift; @@ -796,7 +773,7 @@ xfs_ioctl( case XFS_IOC_GETXFLAGS: case XFS_IOC_SETXFLAGS: case XFS_IOC_FSSETXATTR: - return xfs_ioc_xattr(vp, ip, filp, cmd, arg); + return xfs_ioc_xattr(ip, filp, cmd, arg); case XFS_IOC_FSSETDM: { struct fsdmidata dmi; @@ -1203,7 +1180,6 @@ xfs_ioc_fsgetxattr( STATIC int xfs_ioc_xattr( - bhv_vnode_t *vp, xfs_inode_t *ip, struct file *filp, unsigned int cmd, @@ -1237,7 +1213,7 @@ xfs_ioc_xattr( error = xfs_setattr(ip, vattr, attr_flags, NULL); if (likely(!error)) - __vn_revalidate(vp, vattr); /* update flags */ + vn_revalidate(XFS_ITOV(ip)); /* update flags */ error = -error; break; } @@ -1272,7 +1248,7 @@ xfs_ioc_xattr( error = xfs_setattr(ip, vattr, attr_flags, NULL); if (likely(!error)) - __vn_revalidate(vp, vattr); /* update flags */ + vn_revalidate(XFS_ITOV(ip)); /* update flags */ error = -error; break; } diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index bf2a956b63c..a4b254eb43b 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -44,6 +44,7 @@ #include "xfs_error.h" #include "xfs_dfrag.h" #include "xfs_vnodeops.h" +#include "xfs_ioctl32.h" #define _NATIVE_IOC(cmd, type) \ _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) @@ -379,9 +380,6 @@ xfs_compat_ioctl( switch (cmd) { case XFS_IOC_DIOINFO: case XFS_IOC_FSGEOMETRY: - case XFS_IOC_GETVERSION: - case XFS_IOC_GETXFLAGS: - case XFS_IOC_SETXFLAGS: case XFS_IOC_FSGETXATTR: case XFS_IOC_FSSETXATTR: case XFS_IOC_FSGETXATTRA: @@ -407,6 +405,11 @@ xfs_compat_ioctl( case XFS_IOC_ERROR_CLEARALL: break; + case XFS_IOC32_GETXFLAGS: + case XFS_IOC32_SETXFLAGS: + case XFS_IOC32_GETVERSION: + cmd = _NATIVE_IOC(cmd, long); + break; #ifdef BROKEN_X86_ALIGNMENT /* xfs_flock_t has wrong u32 vs u64 alignment */ case XFS_IOC_ALLOCSP_32: diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 5e8bb7f71b5..cc4abd3daa4 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -52,6 +52,7 @@ #include <linux/xattr.h> #include <linux/namei.h> #include <linux/security.h> +#include <linux/falloc.h> /* * Bring the atime in the XFS inode uptodate. @@ -71,6 +72,22 @@ xfs_synchronize_atime( } /* + * If the linux inode exists, mark it dirty. + * Used when commiting a dirty inode into a transaction so that + * the inode will get written back by the linux code + */ +void +xfs_mark_inode_dirty_sync( + xfs_inode_t *ip) +{ + bhv_vnode_t *vp; + + vp = XFS_ITOV_NULL(ip); + if (vp) + mark_inode_dirty_sync(vn_to_inode(vp)); +} + +/* * Change the requested timestamp in the given inode. * We don't lock across timestamp updates, and we don't log them but * we do record the fact that there is dirty information in core. @@ -184,10 +201,6 @@ xfs_validate_fields( struct xfs_inode *ip = XFS_I(inode); loff_t size; - inode->i_nlink = ip->i_d.di_nlink; - inode->i_blocks = - XFS_FSB_TO_BB(ip->i_mount, ip->i_d.di_nblocks + - ip->i_delayed_blks); /* we're under i_sem so i_size can't change under us */ size = XFS_ISIZE(ip); if (i_size_read(inode) != size) @@ -542,12 +555,31 @@ xfs_vn_put_link( #ifdef CONFIG_XFS_POSIX_ACL STATIC int +xfs_check_acl( + struct inode *inode, + int mask) +{ + struct xfs_inode *ip = XFS_I(inode); + int error; + + xfs_itrace_entry(ip); + + if (XFS_IFORK_Q(ip)) { + error = xfs_acl_iaccess(ip, mask, NULL); + if (error != -1) + return -error; + } + + return -EAGAIN; +} + +STATIC int xfs_vn_permission( - struct inode *inode, - int mode, - struct nameidata *nd) + struct inode *inode, + int mask, + struct nameidata *nd) { - return -xfs_access(XFS_I(inode), mode << 6, NULL); + return generic_permission(inode, mask, xfs_check_acl); } #else #define xfs_vn_permission NULL @@ -555,33 +587,61 @@ xfs_vn_permission( STATIC int xfs_vn_getattr( - struct vfsmount *mnt, - struct dentry *dentry, - struct kstat *stat) + struct vfsmount *mnt, + struct dentry *dentry, + struct kstat *stat) { - struct inode *inode = dentry->d_inode; - bhv_vattr_t vattr = { .va_mask = XFS_AT_STAT }; - int error; - - error = xfs_getattr(XFS_I(inode), &vattr, ATTR_LAZY); - if (likely(!error)) { - stat->size = i_size_read(inode); - stat->dev = inode->i_sb->s_dev; - stat->rdev = (vattr.va_rdev == 0) ? 0 : - MKDEV(sysv_major(vattr.va_rdev) & 0x1ff, - sysv_minor(vattr.va_rdev)); - stat->mode = vattr.va_mode; - stat->nlink = vattr.va_nlink; - stat->uid = vattr.va_uid; - stat->gid = vattr.va_gid; - stat->ino = vattr.va_nodeid; - stat->atime = vattr.va_atime; - stat->mtime = vattr.va_mtime; - stat->ctime = vattr.va_ctime; - stat->blocks = vattr.va_nblocks; - stat->blksize = vattr.va_blocksize; + struct inode *inode = dentry->d_inode; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + + xfs_itrace_entry(ip); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + stat->size = XFS_ISIZE(ip); + stat->dev = inode->i_sb->s_dev; + stat->mode = ip->i_d.di_mode; + stat->nlink = ip->i_d.di_nlink; + stat->uid = ip->i_d.di_uid; + stat->gid = ip->i_d.di_gid; + stat->ino = ip->i_ino; +#if XFS_BIG_INUMS + stat->ino += mp->m_inoadd; +#endif + stat->atime = inode->i_atime; + stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec; + stat->mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; + stat->ctime.tv_sec = ip->i_d.di_ctime.t_sec; + stat->ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; + stat->blocks = + XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); + + + switch (inode->i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + stat->blksize = BLKDEV_IOSIZE; + stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, + sysv_minor(ip->i_df.if_u2.if_rdev)); + break; + default: + if (XFS_IS_REALTIME_INODE(ip)) { + /* + * If the file blocks are being allocated from a + * realtime volume, then return the inode's realtime + * extent size or the realtime volume's extent size. + */ + stat->blksize = + xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; + } else + stat->blksize = xfs_preferred_iosize(mp); + stat->rdev = 0; + break; } - return -error; + + return 0; } STATIC int @@ -636,7 +696,7 @@ xfs_vn_setattr( error = xfs_setattr(XFS_I(inode), &vattr, flags, NULL); if (likely(!error)) - __vn_revalidate(vn_from_inode(inode), &vattr); + vn_revalidate(vn_from_inode(inode)); return -error; } @@ -750,6 +810,47 @@ xfs_vn_removexattr( return namesp->attr_remove(vp, attr, xflags); } +STATIC long +xfs_vn_fallocate( + struct inode *inode, + int mode, + loff_t offset, + loff_t len) +{ + long error; + loff_t new_size = 0; + xfs_flock64_t bf; + xfs_inode_t *ip = XFS_I(inode); + + /* preallocation on directories not yet supported */ + error = -ENODEV; + if (S_ISDIR(inode->i_mode)) + goto out_error; + + bf.l_whence = 0; + bf.l_start = offset; + bf.l_len = len; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, + 0, NULL, ATTR_NOLOCK); + if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && + offset + len > i_size_read(inode)) + new_size = offset + len; + + /* Change file size if needed */ + if (new_size) { + bhv_vattr_t va; + + va.va_mask = XFS_AT_SIZE; + va.va_size = new_size; + error = xfs_setattr(ip, &va, ATTR_NOLOCK, NULL); + } + + xfs_iunlock(ip, XFS_IOLOCK_EXCL); +out_error: + return error; +} const struct inode_operations xfs_inode_operations = { .permission = xfs_vn_permission, @@ -760,6 +861,7 @@ const struct inode_operations xfs_inode_operations = { .getxattr = xfs_vn_getxattr, .listxattr = xfs_vn_listxattr, .removexattr = xfs_vn_removexattr, + .fallocate = xfs_vn_fallocate, }; const struct inode_operations xfs_dir_inode_operations = { diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index dc3752de22d..3ca39c4e5d2 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -43,7 +43,6 @@ #include <kmem.h> #include <mrlock.h> -#include <spin.h> #include <sv.h> #include <mutex.h> #include <sema.h> @@ -75,6 +74,7 @@ #include <linux/notifier.h> #include <linux/delay.h> #include <linux/log2.h> +#include <linux/spinlock.h> #include <asm/page.h> #include <asm/div64.h> @@ -136,43 +136,19 @@ #define current_restore_flags_nested(sp, f) \ (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) -#define NBPP PAGE_SIZE -#define NDPP (1 << (PAGE_SHIFT - 9)) +#define spinlock_destroy(lock) #define NBBY 8 /* number of bits per byte */ -#define NBPC PAGE_SIZE /* Number of bytes per click */ -#define BPCSHIFT PAGE_SHIFT /* LOG2(NBPC) if exact */ /* * Size of block device i/o is parameterized here. * Currently the system supports page-sized i/o. */ -#define BLKDEV_IOSHIFT BPCSHIFT +#define BLKDEV_IOSHIFT PAGE_CACHE_SHIFT #define BLKDEV_IOSIZE (1<<BLKDEV_IOSHIFT) /* number of BB's per block device block */ #define BLKDEV_BB BTOBB(BLKDEV_IOSIZE) -/* bytes to clicks */ -#define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) -#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) -#define btoc64(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) -#define btoct64(x) ((__uint64_t)(x)>>BPCSHIFT) - -/* off_t bytes to clicks */ -#define offtoc(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) -#define offtoct(x) ((xfs_off_t)(x)>>BPCSHIFT) - -/* clicks to off_t bytes */ -#define ctooff(x) ((xfs_off_t)(x)<<BPCSHIFT) - -/* clicks to bytes */ -#define ctob(x) ((__psunsigned_t)(x)<<BPCSHIFT) -#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) -#define ctob64(x) ((__uint64_t)(x)<<BPCSHIFT) - -/* bytes to clicks */ -#define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) - #define ENOATTR ENODATA /* Attribute not found */ #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ @@ -205,10 +181,6 @@ #define xfs_stack_trace() dump_stack() #define xfs_itruncate_data(ip, off) \ (-vmtruncate(vn_to_inode(XFS_ITOV(ip)), (off))) -#define xfs_statvfs_fsid(statp, mp) \ - ({ u64 id = huge_encode_dev((mp)->m_ddev_targp->bt_dev); \ - __kernel_fsid_t *fsid = &(statp)->f_fsid; \ - (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); }) /* Move the kernel do_div definition off to one side */ diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 6f614f35f65..16635338849 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -58,14 +58,12 @@ void xfs_rw_enter_trace( int tag, - xfs_iocore_t *io, + xfs_inode_t *ip, void *data, size_t segs, loff_t offset, int ioflags) { - xfs_inode_t *ip = XFS_IO_INODE(io); - if (ip->i_rwtrace == NULL) return; ktrace_enter(ip->i_rwtrace, @@ -78,8 +76,8 @@ xfs_rw_enter_trace( (void *)((unsigned long)((offset >> 32) & 0xffffffff)), (void *)((unsigned long)(offset & 0xffffffff)), (void *)((unsigned long)ioflags), - (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)), - (void *)((unsigned long)(io->io_new_size & 0xffffffff)), + (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)), + (void *)((unsigned long)(ip->i_new_size & 0xffffffff)), (void *)((unsigned long)current_pid()), (void *)NULL, (void *)NULL, @@ -89,13 +87,12 @@ xfs_rw_enter_trace( void xfs_inval_cached_trace( - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, xfs_off_t len, xfs_off_t first, xfs_off_t last) { - xfs_inode_t *ip = XFS_IO_INODE(io); if (ip->i_rwtrace == NULL) return; @@ -131,7 +128,7 @@ xfs_inval_cached_trace( */ STATIC int xfs_iozero( - struct inode *ip, /* inode */ + struct xfs_inode *ip, /* inode */ loff_t pos, /* offset in file */ size_t count) /* size of data to zero */ { @@ -139,7 +136,7 @@ xfs_iozero( struct address_space *mapping; int status; - mapping = ip->i_mapping; + mapping = ip->i_vnode->i_mapping; do { unsigned offset, bytes; void *fsdata; @@ -205,7 +202,7 @@ xfs_read( if (unlikely(ioflags & IO_ISDIRECT)) { xfs_buftarg_t *target = - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? + XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((*offset & target->bt_smask) || (size & target->bt_smask)) { @@ -246,9 +243,8 @@ xfs_read( if (unlikely(ioflags & IO_ISDIRECT)) { if (VN_CACHED(vp)) - ret = xfs_flushinval_pages(ip, - ctooff(offtoct(*offset)), - -1, FI_REMAPF_LOCKED); + ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), + -1, FI_REMAPF_LOCKED); mutex_unlock(&inode->i_mutex); if (ret) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -256,7 +252,7 @@ xfs_read( } } - xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, + xfs_rw_enter_trace(XFS_READ_ENTER, ip, (void *)iovp, segs, *offset, ioflags); iocb->ki_pos = *offset; @@ -301,7 +297,7 @@ xfs_splice_read( return -error; } } - xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, &ip->i_iocore, + xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, ip, pipe, count, *ppos, ioflags); ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); if (ret > 0) @@ -323,7 +319,6 @@ xfs_splice_write( { bhv_vnode_t *vp = XFS_ITOV(ip); xfs_mount_t *mp = ip->i_mount; - xfs_iocore_t *io = &ip->i_iocore; ssize_t ret; struct inode *inode = outfilp->f_mapping->host; xfs_fsize_t isize, new_size; @@ -350,10 +345,10 @@ xfs_splice_write( xfs_ilock(ip, XFS_ILOCK_EXCL); if (new_size > ip->i_size) - io->io_new_size = new_size; + ip->i_new_size = new_size; xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore, + xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, ip, pipe, count, *ppos, ioflags); ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); if (ret > 0) @@ -370,9 +365,9 @@ xfs_splice_write( xfs_iunlock(ip, XFS_ILOCK_EXCL); } - if (io->io_new_size) { + if (ip->i_new_size) { xfs_ilock(ip, XFS_ILOCK_EXCL); - io->io_new_size = 0; + ip->i_new_size = 0; if (ip->i_d.di_size > ip->i_size) ip->i_d.di_size = ip->i_size; xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -389,20 +384,19 @@ xfs_splice_write( */ STATIC int /* error (positive) */ xfs_zero_last_block( - struct inode *ip, - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_fsize_t offset, xfs_fsize_t isize) { xfs_fileoff_t last_fsb; - xfs_mount_t *mp = io->io_mount; + xfs_mount_t *mp = ip->i_mount; int nimaps; int zero_offset; int zero_len; int error = 0; xfs_bmbt_irec_t imap; - ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); + ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); zero_offset = XFS_B_FSB_OFFSET(mp, isize); if (zero_offset == 0) { @@ -415,7 +409,7 @@ xfs_zero_last_block( last_fsb = XFS_B_TO_FSBT(mp, isize); nimaps = 1; - error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap, + error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { return error; @@ -433,14 +427,14 @@ xfs_zero_last_block( * out sync. We need to drop the ilock while we do this so we * don't deadlock when the buffer cache calls back to us. */ - XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); + xfs_iunlock(ip, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); zero_len = mp->m_sb.sb_blocksize - zero_offset; if (isize + zero_len > offset) zero_len = offset - isize; error = xfs_iozero(ip, isize, zero_len); - XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); ASSERT(error >= 0); return error; } @@ -458,35 +452,33 @@ xfs_zero_last_block( int /* error (positive) */ xfs_zero_eof( - bhv_vnode_t *vp, - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, /* starting I/O offset */ xfs_fsize_t isize) /* current inode size */ { - struct inode *ip = vn_to_inode(vp); + xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t start_zero_fsb; xfs_fileoff_t end_zero_fsb; xfs_fileoff_t zero_count_fsb; xfs_fileoff_t last_fsb; xfs_fileoff_t zero_off; xfs_fsize_t zero_len; - xfs_mount_t *mp = io->io_mount; int nimaps; int error = 0; xfs_bmbt_irec_t imap; - ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); - ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); + ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); ASSERT(offset > isize); /* * First handle zeroing the block on which isize resides. * We only zero a part of that block so it is handled specially. */ - error = xfs_zero_last_block(ip, io, offset, isize); + error = xfs_zero_last_block(ip, offset, isize); if (error) { - ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); - ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); + ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); return error; } @@ -514,11 +506,11 @@ xfs_zero_eof( while (start_zero_fsb <= end_zero_fsb) { nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; - error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb, + error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, 0, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { - ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); - ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); + ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); return error; } ASSERT(nimaps > 0); @@ -542,7 +534,7 @@ xfs_zero_eof( * Drop the inode lock while we're doing the I/O. * We'll still have the iolock to protect us. */ - XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + xfs_iunlock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); @@ -558,14 +550,13 @@ xfs_zero_eof( start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); - XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); } return 0; out_lock: - - XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); ASSERT(error >= 0); return error; } @@ -587,7 +578,6 @@ xfs_write( xfs_mount_t *mp; ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; - xfs_iocore_t *io; int iolock; int eventsent = 0; bhv_vrwlock_t locktype; @@ -607,8 +597,7 @@ xfs_write( if (count == 0) return 0; - io = &xip->i_iocore; - mp = io->io_mount; + mp = xip->i_mount; xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); @@ -667,7 +656,7 @@ start: if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = - (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? + XFS_IS_REALTIME_INODE(xip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->bt_smask) || (count & target->bt_smask)) { @@ -688,7 +677,7 @@ start: new_size = pos + count; if (new_size > xip->i_size) - io->io_new_size = new_size; + xip->i_new_size = new_size; if (likely(!(ioflags & IO_INVIS))) { file_update_time(file); @@ -706,7 +695,7 @@ start: */ if (pos > xip->i_size) { - error = xfs_zero_eof(vp, io, pos, xip->i_size); + error = xfs_zero_eof(xip, pos, xip->i_size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL); goto out_unlock_internal; @@ -740,10 +729,10 @@ retry: if ((ioflags & IO_ISDIRECT)) { if (VN_CACHED(vp)) { WARN_ON(need_i_mutex == 0); - xfs_inval_cached_trace(io, pos, -1, - ctooff(offtoct(pos)), -1); + xfs_inval_cached_trace(xip, pos, -1, + (pos & PAGE_CACHE_MASK), -1); error = xfs_flushinval_pages(xip, - ctooff(offtoct(pos)), + (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); if (error) goto out_unlock_internal; @@ -751,7 +740,7 @@ retry: if (need_i_mutex) { /* demote the lock now the cached pages are gone */ - XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); + xfs_ilock_demote(xip, XFS_IOLOCK_EXCL); mutex_unlock(&inode->i_mutex); iolock = XFS_IOLOCK_SHARED; @@ -759,7 +748,7 @@ retry: need_i_mutex = 0; } - xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs, + xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); @@ -779,7 +768,7 @@ retry: goto relock; } } else { - xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs, + xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, *offset, ioflags); ret = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); @@ -843,9 +832,9 @@ retry: } out_unlock_internal: - if (io->io_new_size) { + if (xip->i_new_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); - io->io_new_size = 0; + xip->i_new_size = 0; /* * If this was a direct or synchronous I/O that failed (such * as ENOSPC) then part of the I/O may have been written to @@ -894,25 +883,6 @@ xfs_bdstrat_cb(struct xfs_buf *bp) } } - -int -xfs_bmap( - xfs_inode_t *ip, - xfs_off_t offset, - ssize_t count, - int flags, - xfs_iomap_t *iomapp, - int *niomaps) -{ - xfs_iocore_t *io = &ip->i_iocore; - - ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); - ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == - ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); - - return xfs_iomap(io, offset, count, flags, iomapp, niomaps); -} - /* * Wrapper around bdstrat so that we can stop data * from going to disk in case we are shutting down the filesystem. diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index 4b7747a828d..e200253139c 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -19,7 +19,6 @@ #define __XFS_LRW_H__ struct xfs_mount; -struct xfs_iocore; struct xfs_inode; struct xfs_bmbt_irec; struct xfs_buf; @@ -60,20 +59,19 @@ struct xfs_iomap; #define XFS_IOMAP_UNWRITTEN 27 #define XFS_SPLICE_READ_ENTER 28 #define XFS_SPLICE_WRITE_ENTER 29 -extern void xfs_rw_enter_trace(int, struct xfs_iocore *, - void *, size_t, loff_t, int); -extern void xfs_inval_cached_trace(struct xfs_iocore *, - xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t); +extern void xfs_rw_enter_trace(int, struct xfs_inode *, + void *, size_t, loff_t, int); +extern void xfs_inval_cached_trace(struct xfs_inode *, + xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t); #else -#define xfs_rw_enter_trace(tag, io, data, size, offset, ioflags) -#define xfs_inval_cached_trace(io, offset, len, first, last) +#define xfs_rw_enter_trace(tag, ip, data, size, offset, ioflags) +#define xfs_inval_cached_trace(ip, offset, len, first, last) #endif extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *); extern int xfs_bdstrat_cb(struct xfs_buf *); extern int xfs_dev_is_read_only(struct xfs_mount *, char *); -extern int xfs_zero_eof(struct inode *, struct xfs_iocore *, xfs_off_t, - xfs_fsize_t); +extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); #endif /* __XFS_LRW_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 8cb63c60c04..21dfc9da235 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -41,6 +41,7 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_itable.h" +#include "xfs_fsops.h" #include "xfs_rw.h" #include "xfs_acl.h" #include "xfs_attr.h" @@ -49,6 +50,8 @@ #include "xfs_vnodeops.h" #include "xfs_vfsops.h" #include "xfs_version.h" +#include "xfs_log_priv.h" +#include "xfs_trans_priv.h" #include <linux/namei.h> #include <linux/init.h> @@ -87,6 +90,435 @@ xfs_args_allocate( return args; } +#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ +#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ +#define MNTOPT_LOGDEV "logdev" /* log device */ +#define MNTOPT_RTDEV "rtdev" /* realtime I/O device */ +#define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */ +#define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */ +#define MNTOPT_INO64 "ino64" /* force inodes into 64-bit range */ +#define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */ +#define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */ +#define MNTOPT_SUNIT "sunit" /* data volume stripe unit */ +#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ +#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ +#define MNTOPT_MTPT "mtpt" /* filesystem mount point */ +#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */ +#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */ +#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ +#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ +#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ +#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ +#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and + * unwritten extent conversion */ +#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ +#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ +#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ +#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ +#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ +#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ +#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes + * in stat(). */ +#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ +#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ +#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */ +#define MNTOPT_QUOTA "quota" /* disk quotas (user) */ +#define MNTOPT_NOQUOTA "noquota" /* no quotas */ +#define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */ +#define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */ +#define MNTOPT_PRJQUOTA "prjquota" /* project quota enabled */ +#define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */ +#define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */ +#define MNTOPT_PQUOTA "pquota" /* project quota (IRIX variant) */ +#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */ +#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ +#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ +#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ +#define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ +#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ +#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ + +STATIC unsigned long +suffix_strtoul(char *s, char **endp, unsigned int base) +{ + int last, shift_left_factor = 0; + char *value = s; + + last = strlen(value) - 1; + if (value[last] == 'K' || value[last] == 'k') { + shift_left_factor = 10; + value[last] = '\0'; + } + if (value[last] == 'M' || value[last] == 'm') { + shift_left_factor = 20; + value[last] = '\0'; + } + if (value[last] == 'G' || value[last] == 'g') { + shift_left_factor = 30; + value[last] = '\0'; + } + + return simple_strtoul((const char *)s, endp, base) << shift_left_factor; +} + +STATIC int +xfs_parseargs( + struct xfs_mount *mp, + char *options, + struct xfs_mount_args *args, + int update) +{ + char *this_char, *value, *eov; + int dsunit, dswidth, vol_dsunit, vol_dswidth; + int iosize; + int ikeep = 0; + + args->flags |= XFSMNT_BARRIER; + args->flags2 |= XFSMNT2_COMPAT_IOSIZE; + + if (!options) + goto done; + + iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0; + + while ((this_char = strsep(&options, ",")) != NULL) { + if (!*this_char) + continue; + if ((value = strchr(this_char, '=')) != NULL) + *value++ = 0; + + if (!strcmp(this_char, MNTOPT_LOGBUFS)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + args->logbufs = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + args->logbufsize = suffix_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + strncpy(args->logname, value, MAXNAMELEN); + } else if (!strcmp(this_char, MNTOPT_MTPT)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + strncpy(args->mtpt, value, MAXNAMELEN); + } else if (!strcmp(this_char, MNTOPT_RTDEV)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + strncpy(args->rtname, value, MAXNAMELEN); + } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + iosize = simple_strtoul(value, &eov, 10); + args->flags |= XFSMNT_IOSIZE; + args->iosizelog = (uint8_t) iosize; + } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + iosize = suffix_strtoul(value, &eov, 10); + args->flags |= XFSMNT_IOSIZE; + args->iosizelog = ffs(iosize) - 1; + } else if (!strcmp(this_char, MNTOPT_GRPID) || + !strcmp(this_char, MNTOPT_BSDGROUPS)) { + mp->m_flags |= XFS_MOUNT_GRPID; + } else if (!strcmp(this_char, MNTOPT_NOGRPID) || + !strcmp(this_char, MNTOPT_SYSVGROUPS)) { + mp->m_flags &= ~XFS_MOUNT_GRPID; + } else if (!strcmp(this_char, MNTOPT_WSYNC)) { + args->flags |= XFSMNT_WSYNC; + } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { + args->flags |= XFSMNT_OSYNCISOSYNC; + } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { + args->flags |= XFSMNT_NORECOVERY; + } else if (!strcmp(this_char, MNTOPT_INO64)) { + args->flags |= XFSMNT_INO64; +#if !XFS_BIG_INUMS + cmn_err(CE_WARN, + "XFS: %s option not allowed on this system", + this_char); + return EINVAL; +#endif + } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { + args->flags |= XFSMNT_NOALIGN; + } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { + args->flags |= XFSMNT_SWALLOC; + } else if (!strcmp(this_char, MNTOPT_SUNIT)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + dsunit = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { + if (!value || !*value) { + cmn_err(CE_WARN, + "XFS: %s option requires an argument", + this_char); + return EINVAL; + } + dswidth = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { + args->flags &= ~XFSMNT_32BITINODES; +#if !XFS_BIG_INUMS + cmn_err(CE_WARN, + "XFS: %s option not allowed on this system", + this_char); + return EINVAL; +#endif + } else if (!strcmp(this_char, MNTOPT_NOUUID)) { + args->flags |= XFSMNT_NOUUID; + } else if (!strcmp(this_char, MNTOPT_BARRIER)) { + args->flags |= XFSMNT_BARRIER; + } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { + args->flags &= ~XFSMNT_BARRIER; + } else if (!strcmp(this_char, MNTOPT_IKEEP)) { + ikeep = 1; + args->flags &= ~XFSMNT_IDELETE; + } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { + args->flags |= XFSMNT_IDELETE; + } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { + args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE; + } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { + args->flags2 |= XFSMNT2_COMPAT_IOSIZE; + } else if (!strcmp(this_char, MNTOPT_ATTR2)) { + args->flags |= XFSMNT_ATTR2; + } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { + args->flags &= ~XFSMNT_ATTR2; + } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { + args->flags2 |= XFSMNT2_FILESTREAMS; + } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { + args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA); + args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA); + } else if (!strcmp(this_char, MNTOPT_QUOTA) || + !strcmp(this_char, MNTOPT_UQUOTA) || + !strcmp(this_char, MNTOPT_USRQUOTA)) { + args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF; + } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || + !strcmp(this_char, MNTOPT_UQUOTANOENF)) { + args->flags |= XFSMNT_UQUOTA; + args->flags &= ~XFSMNT_UQUOTAENF; + } else if (!strcmp(this_char, MNTOPT_PQUOTA) || + !strcmp(this_char, MNTOPT_PRJQUOTA)) { + args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF; + } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { + args->flags |= XFSMNT_PQUOTA; + args->flags &= ~XFSMNT_PQUOTAENF; + } else if (!strcmp(this_char, MNTOPT_GQUOTA) || + !strcmp(this_char, MNTOPT_GRPQUOTA)) { + args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF; + } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { + args->flags |= XFSMNT_GQUOTA; + args->flags &= ~XFSMNT_GQUOTAENF; + } else if (!strcmp(this_char, MNTOPT_DMAPI)) { + args->flags |= XFSMNT_DMAPI; + } else if (!strcmp(this_char, MNTOPT_XDSM)) { + args->flags |= XFSMNT_DMAPI; + } else if (!strcmp(this_char, MNTOPT_DMI)) { + args->flags |= XFSMNT_DMAPI; + } else if (!strcmp(this_char, "ihashsize")) { + cmn_err(CE_WARN, + "XFS: ihashsize no longer used, option is deprecated."); + } else if (!strcmp(this_char, "osyncisdsync")) { + /* no-op, this is now the default */ + cmn_err(CE_WARN, + "XFS: osyncisdsync is now the default, option is deprecated."); + } else if (!strcmp(this_char, "irixsgid")) { + cmn_err(CE_WARN, + "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); + } else { + cmn_err(CE_WARN, + "XFS: unknown mount option [%s].", this_char); + return EINVAL; + } + } + + if (args->flags & XFSMNT_NORECOVERY) { + if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) { + cmn_err(CE_WARN, + "XFS: no-recovery mounts must be read-only."); + return EINVAL; + } + } + + if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { + cmn_err(CE_WARN, + "XFS: sunit and swidth options incompatible with the noalign option"); + return EINVAL; + } + + if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) { + cmn_err(CE_WARN, + "XFS: cannot mount with both project and group quota"); + return EINVAL; + } + + if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') { + printk("XFS: %s option needs the mount point option as well\n", + MNTOPT_DMAPI); + return EINVAL; + } + + if ((dsunit && !dswidth) || (!dsunit && dswidth)) { + cmn_err(CE_WARN, + "XFS: sunit and swidth must be specified together"); + return EINVAL; + } + + if (dsunit && (dswidth % dsunit != 0)) { + cmn_err(CE_WARN, + "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", + dswidth, dsunit); + return EINVAL; + } + + /* + * Applications using DMI filesystems often expect the + * inode generation number to be monotonically increasing. + * If we delete inode chunks we break this assumption, so + * keep unused inode chunks on disk for DMI filesystems + * until we come up with a better solution. + * Note that if "ikeep" or "noikeep" mount options are + * supplied, then they are honored. + */ + if (!(args->flags & XFSMNT_DMAPI) && !ikeep) + args->flags |= XFSMNT_IDELETE; + + if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { + if (dsunit) { + args->sunit = dsunit; + args->flags |= XFSMNT_RETERR; + } else { + args->sunit = vol_dsunit; + } + dswidth ? (args->swidth = dswidth) : + (args->swidth = vol_dswidth); + } else { + args->sunit = args->swidth = 0; + } + +done: + if (args->flags & XFSMNT_32BITINODES) + mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + if (args->flags2) + args->flags |= XFSMNT_FLAGS2; + return 0; +} + +struct proc_xfs_info { + int flag; + char *str; +}; + +STATIC int +xfs_showargs( + struct xfs_mount *mp, + struct seq_file *m) +{ + static struct proc_xfs_info xfs_info_set[] = { + /* the few simple ones we can get from the mount struct */ + { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC }, + { XFS_MOUNT_INO64, "," MNTOPT_INO64 }, + { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN }, + { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, + { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, + { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, + { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC }, + { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, + { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, + { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI }, + { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, + { 0, NULL } + }; + static struct proc_xfs_info xfs_info_unset[] = { + /* the few simple ones we can get from the mount struct */ + { XFS_MOUNT_IDELETE, "," MNTOPT_IKEEP }, + { XFS_MOUNT_COMPAT_IOSIZE, "," MNTOPT_LARGEIO }, + { XFS_MOUNT_BARRIER, "," MNTOPT_NOBARRIER }, + { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_64BITINODE }, + { 0, NULL } + }; + struct proc_xfs_info *xfs_infop; + + for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { + if (mp->m_flags & xfs_infop->flag) + seq_puts(m, xfs_infop->str); + } + for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) { + if (!(mp->m_flags & xfs_infop->flag)) + seq_puts(m, xfs_infop->str); + } + + if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) + seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", + (int)(1 << mp->m_writeio_log) >> 10); + + if (mp->m_logbufs > 0) + seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs); + if (mp->m_logbsize > 0) + seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); + + if (mp->m_logname) + seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); + if (mp->m_rtname) + seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); + + if (mp->m_dalign > 0) + seq_printf(m, "," MNTOPT_SUNIT "=%d", + (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); + if (mp->m_swidth > 0) + seq_printf(m, "," MNTOPT_SWIDTH "=%d", + (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); + + if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD)) + seq_puts(m, "," MNTOPT_USRQUOTA); + else if (mp->m_qflags & XFS_UQUOTA_ACCT) + seq_puts(m, "," MNTOPT_UQUOTANOENF); + + if (mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) + seq_puts(m, "," MNTOPT_PRJQUOTA); + else if (mp->m_qflags & XFS_PQUOTA_ACCT) + seq_puts(m, "," MNTOPT_PQUOTANOENF); + + if (mp->m_qflags & (XFS_GQUOTA_ACCT|XFS_OQUOTA_ENFD)) + seq_puts(m, "," MNTOPT_GRPQUOTA); + else if (mp->m_qflags & XFS_GQUOTA_ACCT) + seq_puts(m, "," MNTOPT_GQUOTANOENF); + + if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) + seq_puts(m, "," MNTOPT_NOQUOTA); + + return 0; +} __uint64_t xfs_max_file_offset( unsigned int blockshift) @@ -137,7 +569,7 @@ xfs_set_inodeops( break; case S_IFLNK: inode->i_op = &xfs_symlink_inode_operations; - if (inode->i_blocks) + if (!(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE)) inode->i_mapping->a_ops = &xfs_address_space_operations; break; default: @@ -174,8 +606,6 @@ xfs_revalidate_inode( inode->i_generation = ip->i_d.di_gen; i_size_write(inode, ip->i_d.di_size); - inode->i_blocks = - XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; @@ -334,6 +764,64 @@ xfs_blkdev_issue_flush( blkdev_issue_flush(buftarg->bt_bdev, NULL); } +/* + * XFS AIL push thread support + */ +void +xfsaild_wakeup( + xfs_mount_t *mp, + xfs_lsn_t threshold_lsn) +{ + mp->m_ail.xa_target = threshold_lsn; + wake_up_process(mp->m_ail.xa_task); +} + +int +xfsaild( + void *data) +{ + xfs_mount_t *mp = (xfs_mount_t *)data; + xfs_lsn_t last_pushed_lsn = 0; + long tout = 0; + + while (!kthread_should_stop()) { + if (tout) + schedule_timeout_interruptible(msecs_to_jiffies(tout)); + tout = 1000; + + /* swsusp */ + try_to_freeze(); + + ASSERT(mp->m_log); + if (XFS_FORCED_SHUTDOWN(mp)) + continue; + + tout = xfsaild_push(mp, &last_pushed_lsn); + } + + return 0; +} /* xfsaild */ + +int +xfsaild_start( + xfs_mount_t *mp) +{ + mp->m_ail.xa_target = 0; + mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild"); + if (IS_ERR(mp->m_ail.xa_task)) + return -PTR_ERR(mp->m_ail.xa_task); + return 0; +} + +void +xfsaild_stop( + xfs_mount_t *mp) +{ + kthread_stop(mp->m_ail.xa_task); +} + + + STATIC struct inode * xfs_fs_alloc_inode( struct super_block *sb) @@ -361,7 +849,7 @@ xfs_fs_inode_init_once( inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); } -STATIC int +STATIC int __init xfs_init_zones(void) { xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode", @@ -410,8 +898,7 @@ xfs_fs_write_inode( { int error = 0, flags = FLUSH_INODE; - vn_trace_entry(XFS_I(inode), __FUNCTION__, - (inst_t *)__return_address); + xfs_itrace_entry(XFS_I(inode)); if (sync) { filemap_fdatawait(inode->i_mapping); flags |= FLUSH_SYNC; @@ -438,8 +925,7 @@ xfs_fs_clear_inode( * find an inode with di_mode == 0 but without IGET_CREATE set. */ if (ip) { - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); - + xfs_itrace_entry(ip); XFS_STATS_INC(vn_rele); XFS_STATS_INC(vn_remove); XFS_STATS_INC(vn_reclaim); @@ -683,8 +1169,44 @@ xfs_fs_statfs( struct dentry *dentry, struct kstatfs *statp) { - return -xfs_statvfs(XFS_M(dentry->d_sb), statp, - vn_from_inode(dentry->d_inode)); + struct xfs_mount *mp = XFS_M(dentry->d_sb); + xfs_sb_t *sbp = &mp->m_sb; + __uint64_t fakeinos, id; + xfs_extlen_t lsize; + + statp->f_type = XFS_SB_MAGIC; + statp->f_namelen = MAXNAMELEN - 1; + + id = huge_encode_dev(mp->m_ddev_targp->bt_dev); + statp->f_fsid.val[0] = (u32)id; + statp->f_fsid.val[1] = (u32)(id >> 32); + + xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); + + spin_lock(&mp->m_sb_lock); + statp->f_bsize = sbp->sb_blocksize; + lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; + statp->f_blocks = sbp->sb_dblocks - lsize; + statp->f_bfree = statp->f_bavail = + sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); + fakeinos = statp->f_bfree << sbp->sb_inopblog; +#if XFS_BIG_INUMS + fakeinos += mp->m_inoadd; +#endif + statp->f_files = + MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); + if (mp->m_maxicount) +#if XFS_BIG_INUMS + if (!mp->m_inoadd) +#endif + statp->f_files = min_t(typeof(statp->f_files), + statp->f_files, + mp->m_maxicount); + statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); + spin_unlock(&mp->m_sb_lock); + + XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp); + return 0; } STATIC int @@ -704,11 +1226,19 @@ xfs_fs_remount( return -error; } +/* + * Second stage of a freeze. The data is already frozen so we only + * need to take care of themetadata. Once that's done write a dummy + * record to dirty the log in case of a crash while frozen. + */ STATIC void xfs_fs_lockfs( struct super_block *sb) { - xfs_freeze(XFS_M(sb)); + struct xfs_mount *mp = XFS_M(sb); + + xfs_attr_quiesce(mp); + xfs_fs_log_dummy(mp); } STATIC int @@ -779,7 +1309,6 @@ xfs_fs_fill_super( struct inode *rootvp; struct xfs_mount *mp = NULL; struct xfs_mount_args *args = xfs_args_allocate(sb, silent); - struct kstatfs statvfs; int error; mp = xfs_mount_init(); @@ -807,21 +1336,19 @@ xfs_fs_fill_super( if (error) goto fail_vfsop; - error = xfs_statvfs(mp, &statvfs, NULL); - if (error) - goto fail_unmount; - sb->s_dirt = 1; - sb->s_magic = statvfs.f_type; - sb->s_blocksize = statvfs.f_bsize; - sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1; + sb->s_magic = XFS_SB_MAGIC; + sb->s_blocksize = mp->m_sb.sb_blocksize; + sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); sb->s_time_gran = 1; set_posix_acl_flag(sb); - error = xfs_root(mp, &rootvp); - if (error) + rootvp = igrab(mp->m_rootip->i_vnode); + if (!rootvp) { + error = ENOENT; goto fail_unmount; + } sb->s_root = d_alloc_root(vn_to_inode(rootvp)); if (!sb->s_root) { @@ -841,8 +1368,7 @@ xfs_fs_fill_super( goto fail_vnrele; } - vn_trace_exit(XFS_I(sb->s_root->d_inode), __FUNCTION__, - (inst_t *)__return_address); + xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); kmem_free(args, sizeof(*args)); return 0; diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 814169fd7e1..bc7afe00733 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -40,7 +40,7 @@ #define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) static wait_queue_head_t vsync[NVSYNC]; -void +void __init vn_init(void) { int i; @@ -82,84 +82,55 @@ vn_ioerror( xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l); } -bhv_vnode_t * -vn_initialize( - struct inode *inode) -{ - bhv_vnode_t *vp = vn_from_inode(inode); - - XFS_STATS_INC(vn_active); - XFS_STATS_INC(vn_alloc); - - ASSERT(VN_CACHED(vp) == 0); - - return vp; -} - /* - * Revalidate the Linux inode from the vattr. + * Revalidate the Linux inode from the XFS inode. * Note: i_size _not_ updated; we must hold the inode * semaphore when doing that - callers responsibility. */ -void -vn_revalidate_core( - bhv_vnode_t *vp, - bhv_vattr_t *vap) +int +vn_revalidate( + bhv_vnode_t *vp) { - struct inode *inode = vn_to_inode(vp); - - inode->i_mode = vap->va_mode; - inode->i_nlink = vap->va_nlink; - inode->i_uid = vap->va_uid; - inode->i_gid = vap->va_gid; - inode->i_blocks = vap->va_nblocks; - inode->i_mtime = vap->va_mtime; - inode->i_ctime = vap->va_ctime; - if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) + struct inode *inode = vn_to_inode(vp); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + unsigned long xflags; + + xfs_itrace_entry(ip); + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + inode->i_mode = ip->i_d.di_mode; + inode->i_uid = ip->i_d.di_uid; + inode->i_gid = ip->i_d.di_gid; + inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; + inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; + inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; + inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; + + xflags = xfs_ip2xflags(ip); + if (xflags & XFS_XFLAG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; - if (vap->va_xflags & XFS_XFLAG_APPEND) + if (xflags & XFS_XFLAG_APPEND) inode->i_flags |= S_APPEND; else inode->i_flags &= ~S_APPEND; - if (vap->va_xflags & XFS_XFLAG_SYNC) + if (xflags & XFS_XFLAG_SYNC) inode->i_flags |= S_SYNC; else inode->i_flags &= ~S_SYNC; - if (vap->va_xflags & XFS_XFLAG_NOATIME) + if (xflags & XFS_XFLAG_NOATIME) inode->i_flags |= S_NOATIME; else inode->i_flags &= ~S_NOATIME; -} - -/* - * Revalidate the Linux inode from the vnode. - */ -int -__vn_revalidate( - bhv_vnode_t *vp, - bhv_vattr_t *vattr) -{ - int error; - - vn_trace_entry(xfs_vtoi(vp), __FUNCTION__, (inst_t *)__return_address); - vattr->va_mask = XFS_AT_STAT | XFS_AT_XFLAGS; - error = xfs_getattr(xfs_vtoi(vp), vattr, 0); - if (likely(!error)) { - vn_revalidate_core(vp, vattr); - xfs_iflags_clear(xfs_vtoi(vp), XFS_IMODIFIED); - } - return -error; -} - -int -vn_revalidate( - bhv_vnode_t *vp) -{ - bhv_vattr_t vattr; + xfs_iunlock(ip, XFS_ILOCK_SHARED); - return __vn_revalidate(vp, &vattr); + xfs_iflags_clear(ip, XFS_IMODIFIED); + return 0; } /* @@ -179,7 +150,7 @@ vn_hold( return vp; } -#ifdef XFS_VNODE_TRACE +#ifdef XFS_INODE_TRACE /* * Reference count of Linux inode if present, -1 if the xfs_inode @@ -211,32 +182,32 @@ static inline int xfs_icount(struct xfs_inode *ip) * Vnode tracing code. */ void -vn_trace_entry(xfs_inode_t *ip, const char *func, inst_t *ra) +_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra) { - KTRACE_ENTER(ip, VNODE_KTRACE_ENTRY, func, 0, ra); + KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra); } void -vn_trace_exit(xfs_inode_t *ip, const char *func, inst_t *ra) +_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra) { - KTRACE_ENTER(ip, VNODE_KTRACE_EXIT, func, 0, ra); + KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra); } void -vn_trace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra) +xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra) { - KTRACE_ENTER(ip, VNODE_KTRACE_HOLD, file, line, ra); + KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra); } void -vn_trace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra) +_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra) { - KTRACE_ENTER(ip, VNODE_KTRACE_REF, file, line, ra); + KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra); } void -vn_trace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra) +xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra) { - KTRACE_ENTER(ip, VNODE_KTRACE_RELE, file, line, ra); + KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra); } -#endif /* XFS_VNODE_TRACE */ +#endif /* XFS_INODE_TRACE */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 55fb4694858..b5ea418693b 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -187,10 +187,7 @@ typedef struct bhv_vattr { (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) extern void vn_init(void); -extern bhv_vnode_t *vn_initialize(struct inode *); extern int vn_revalidate(bhv_vnode_t *); -extern int __vn_revalidate(bhv_vnode_t *, bhv_vattr_t *); -extern void vn_revalidate_core(bhv_vnode_t *, bhv_vattr_t *); /* * Yeah, these don't take vnode anymore at all, all this should be @@ -210,12 +207,12 @@ static inline int vn_count(bhv_vnode_t *vp) */ extern bhv_vnode_t *vn_hold(bhv_vnode_t *); -#if defined(XFS_VNODE_TRACE) +#if defined(XFS_INODE_TRACE) #define VN_HOLD(vp) \ ((void)vn_hold(vp), \ - vn_trace_hold(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address)) + xfs_itrace_hold(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address)) #define VN_RELE(vp) \ - (vn_trace_rele(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address), \ + (xfs_itrace_rele(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address), \ iput(vn_to_inode(vp))) #else #define VN_HOLD(vp) ((void)vn_hold(vp)) @@ -238,11 +235,6 @@ static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp) /* * Dealing with bad inodes */ -static inline void vn_mark_bad(bhv_vnode_t *vp) -{ - make_bad_inode(vn_to_inode(vp)); -} - static inline int VN_BAD(bhv_vnode_t *vp) { return is_bad_inode(vn_to_inode(vp)); @@ -296,26 +288,36 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) /* * Tracking vnode activity. */ -#if defined(XFS_VNODE_TRACE) - -#define VNODE_TRACE_SIZE 16 /* number of trace entries */ -#define VNODE_KTRACE_ENTRY 1 -#define VNODE_KTRACE_EXIT 2 -#define VNODE_KTRACE_HOLD 3 -#define VNODE_KTRACE_REF 4 -#define VNODE_KTRACE_RELE 5 - -extern void vn_trace_entry(struct xfs_inode *, const char *, inst_t *); -extern void vn_trace_exit(struct xfs_inode *, const char *, inst_t *); -extern void vn_trace_hold(struct xfs_inode *, char *, int, inst_t *); -extern void vn_trace_ref(struct xfs_inode *, char *, int, inst_t *); -extern void vn_trace_rele(struct xfs_inode *, char *, int, inst_t *); +#if defined(XFS_INODE_TRACE) + +#define INODE_TRACE_SIZE 16 /* number of trace entries */ +#define INODE_KTRACE_ENTRY 1 +#define INODE_KTRACE_EXIT 2 +#define INODE_KTRACE_HOLD 3 +#define INODE_KTRACE_REF 4 +#define INODE_KTRACE_RELE 5 + +extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *); +extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *); +extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *); +extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); +extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); +#define xfs_itrace_entry(ip) \ + _xfs_itrace_entry(ip, __FUNCTION__, (inst_t *)__return_address) +#define xfs_itrace_exit(ip) \ + _xfs_itrace_exit(ip, __FUNCTION__, (inst_t *)__return_address) +#define xfs_itrace_exit_tag(ip, tag) \ + _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) +#define xfs_itrace_ref(ip) \ + _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address) + #else -#define vn_trace_entry(a,b,c) -#define vn_trace_exit(a,b,c) -#define vn_trace_hold(a,b,c,d) -#define vn_trace_ref(a,b,c,d) -#define vn_trace_rele(a,b,c,d) +#define xfs_itrace_entry(a) +#define xfs_itrace_exit(a) +#define xfs_itrace_exit_tag(a, b) +#define xfs_itrace_hold(a, b, c, d) +#define xfs_itrace_ref(a) +#define xfs_itrace_rele(a, b, c, d) #endif #endif /* __XFS_VNODE_H__ */ diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index cfdd35ee9f7..665babcca6a 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1209,7 +1209,6 @@ xfs_qm_dqflush( xfs_buf_t *bp; xfs_disk_dquot_t *ddqp; int error; - SPLDECL(s); ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); @@ -1270,9 +1269,9 @@ xfs_qm_dqflush( mp = dqp->q_mount; /* lsn is 64 bits */ - AIL_LOCK(mp, s); + spin_lock(&mp->m_ail_lock); dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn; - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); /* * Attach an iodone routine so that we can remove this dquot from the @@ -1318,7 +1317,6 @@ xfs_qm_dqflush_done( xfs_dq_logitem_t *qip) { xfs_dquot_t *dqp; - SPLDECL(s); dqp = qip->qli_dquot; @@ -1333,15 +1331,15 @@ xfs_qm_dqflush_done( if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && qip->qli_item.li_lsn == qip->qli_flush_lsn) { - AIL_LOCK(dqp->q_mount, s); + spin_lock(&dqp->q_mount->m_ail_lock); /* * xfs_trans_delete_ail() drops the AIL lock. */ if (qip->qli_item.li_lsn == qip->qli_flush_lsn) xfs_trans_delete_ail(dqp->q_mount, - (xfs_log_item_t*)qip, s); + (xfs_log_item_t*)qip); else - AIL_UNLOCK(dqp->q_mount, s); + spin_unlock(&dqp->q_mount->m_ail_lock); } /* diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 78d3ab95c5f..5c371a92e3e 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -123,11 +123,6 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) vsema(&((dqp)->q_flock)); \ (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); } -#define XFS_DQ_PINLOCK(dqp) mutex_spinlock( \ - &(XFS_DQ_TO_QINF(dqp)->qi_pinlock)) -#define XFS_DQ_PINUNLOCK(dqp, s) mutex_spinunlock( \ - &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s) - #define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock))) #define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index ddb61fe22a5..1800e8d1f64 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -94,14 +94,13 @@ STATIC void xfs_qm_dquot_logitem_pin( xfs_dq_logitem_t *logitem) { - unsigned long s; xfs_dquot_t *dqp; dqp = logitem->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); - s = XFS_DQ_PINLOCK(dqp); + spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); dqp->q_pincount++; - XFS_DQ_PINUNLOCK(dqp, s); + spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); } /* @@ -115,17 +114,16 @@ xfs_qm_dquot_logitem_unpin( xfs_dq_logitem_t *logitem, int stale) { - unsigned long s; xfs_dquot_t *dqp; dqp = logitem->qli_dquot; ASSERT(dqp->q_pincount > 0); - s = XFS_DQ_PINLOCK(dqp); + spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); dqp->q_pincount--; if (dqp->q_pincount == 0) { sv_broadcast(&dqp->q_pinwait); } - XFS_DQ_PINUNLOCK(dqp, s); + spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); } /* ARGSUSED */ @@ -189,8 +187,6 @@ void xfs_qm_dqunpin_wait( xfs_dquot_t *dqp) { - SPLDECL(s); - ASSERT(XFS_DQ_IS_LOCKED(dqp)); if (dqp->q_pincount == 0) { return; @@ -200,9 +196,9 @@ xfs_qm_dqunpin_wait( * Give the log a push so we don't wait here too long. */ xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); - s = XFS_DQ_PINLOCK(dqp); + spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); if (dqp->q_pincount == 0) { - XFS_DQ_PINUNLOCK(dqp, s); + spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); return; } sv_wait(&(dqp->q_pinwait), PINOD, @@ -216,8 +212,8 @@ xfs_qm_dqunpin_wait( * If so, we want to push it out to help us take this item off the AIL as soon * as possible. * - * We must not be holding the AIL_LOCK at this point. Calling incore() to - * search the buffer cache can be a time consuming thing, and AIL_LOCK is a + * We must not be holding the AIL lock at this point. Calling incore() to + * search the buffer cache can be a time consuming thing, and AIL lock is a * spinlock. */ STATIC void @@ -322,7 +318,7 @@ xfs_qm_dquot_logitem_trylock( * want to do that now since we might sleep in the device * strategy routine. We also don't want to grab the buffer lock * here because we'd like not to call into the buffer cache - * while holding the AIL_LOCK. + * while holding the AIL lock. * Make sure to only return PUSHBUF if we set pushbuf_flag * ourselves. If someone else is doing it then we don't * want to go to the push routine and duplicate their efforts. @@ -562,15 +558,14 @@ xfs_qm_qoffend_logitem_committed( xfs_lsn_t lsn) { xfs_qoff_logitem_t *qfs; - SPLDECL(s); qfs = qfe->qql_start_lip; - AIL_LOCK(qfs->qql_item.li_mountp,s); + spin_lock(&qfs->qql_item.li_mountp->m_ail_lock); /* * Delete the qoff-start logitem from the AIL. * xfs_trans_delete_ail() drops the AIL lock. */ - xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs, s); + xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs); kmem_free(qfs, sizeof(xfs_qoff_logitem_t)); kmem_free(qfe, sizeof(xfs_qoff_logitem_t)); return (xfs_lsn_t)-1; diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index d488645f833..35582fe9d64 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -310,7 +310,6 @@ xfs_qm_mount_quotas( xfs_mount_t *mp, int mfsi_flags) { - unsigned long s; int error = 0; uint sbf; @@ -367,13 +366,13 @@ xfs_qm_mount_quotas( write_changes: /* - * We actually don't have to acquire the SB_LOCK at all. + * We actually don't have to acquire the m_sb_lock at all. * This can only be called from mount, and that's single threaded. XXX */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); sbf = mp->m_sb.sb_qflags; mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { @@ -1139,7 +1138,7 @@ xfs_qm_init_quotainfo( return error; } - spinlock_init(&qinf->qi_pinlock, "xfs_qinf_pin"); + spin_lock_init(&qinf->qi_pinlock); xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0); qinf->qi_dqreclaims = 0; @@ -1370,7 +1369,6 @@ xfs_qm_qino_alloc( { xfs_trans_t *tp; int error; - unsigned long s; int committed; tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); @@ -1402,7 +1400,7 @@ xfs_qm_qino_alloc( * sbfields arg may contain fields other than *QUOTINO; * VERSIONNUM for example. */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); if (flags & XFS_QMOPT_SBVERSION) { #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) unsigned oldv = mp->m_sb.sb_versionnum; @@ -1429,7 +1427,7 @@ xfs_qm_qino_alloc( mp->m_sb.sb_uquotino = (*ip)->i_ino; else mp->m_sb.sb_gquotino = (*ip)->i_ino; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); xfs_mod_sb(tp, sbfields); if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index 23ccaa5fcea..baf537c1c17 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -52,8 +52,8 @@ extern kmem_zone_t *qm_dqtrxzone; /* * Dquot hashtable constants/threshold values. */ -#define XFS_QM_HASHSIZE_LOW (NBPP / sizeof(xfs_dqhash_t)) -#define XFS_QM_HASHSIZE_HIGH ((NBPP * 4) / sizeof(xfs_dqhash_t)) +#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t)) +#define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t)) /* * This defines the unit of allocation of dquots. @@ -106,7 +106,7 @@ typedef struct xfs_qm { typedef struct xfs_quotainfo { xfs_inode_t *qi_uquotaip; /* user quota inode */ xfs_inode_t *qi_gquotaip; /* group quota inode */ - lock_t qi_pinlock; /* dquot pinning mutex */ + spinlock_t qi_pinlock; /* dquot pinning lock */ xfs_dqlist_t qi_dqlist; /* all dquots in filesys */ int qi_dqreclaims; /* a change here indicates a removal in the dqlist */ diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index ad5579d4eac..2cc5886cfe8 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -200,7 +200,6 @@ xfs_qm_scall_quotaoff( boolean_t force) { uint dqtype; - unsigned long s; int error; uint inactivate_flags; xfs_qoff_logitem_t *qoffstart; @@ -237,9 +236,9 @@ xfs_qm_scall_quotaoff( if ((flags & XFS_ALL_QUOTA_ACCT) == 0) { mp->m_qflags &= ~(flags); - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); mp->m_sb.sb_qflags = mp->m_qflags; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); /* XXX what to do if error ? Revert back to old vals incore ? */ @@ -415,7 +414,6 @@ xfs_qm_scall_quotaon( uint flags) { int error; - unsigned long s; uint qf; uint accflags; __int64_t sbflags; @@ -468,10 +466,10 @@ xfs_qm_scall_quotaon( * Change sb_qflags on disk but not incore mp->qflags * if this is the root filesystem. */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); qf = mp->m_sb.sb_qflags; mp->m_sb.sb_qflags = qf | flags; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); /* * There's nothing to change if it's the same. @@ -815,7 +813,6 @@ xfs_qm_log_quotaoff( { xfs_trans_t *tp; int error; - unsigned long s; xfs_qoff_logitem_t *qoffi=NULL; uint oldsbqflag=0; @@ -832,10 +829,10 @@ xfs_qm_log_quotaoff( qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); xfs_trans_log_quotaoff_item(tp, qoffi); - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); oldsbqflag = mp->m_sb.sb_qflags; mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); xfs_mod_sb(tp, XFS_SB_QFLAGS); @@ -854,9 +851,9 @@ error0: * No one else is modifying sb_qflags, so this is OK. * We still hold the quotaofflock. */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); mp->m_sb.sb_qflags = oldsbqflag; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); } *qoffstartp = qoffi; return (error); diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index f45a49ffd3a..c27abef7b84 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -17,7 +17,6 @@ */ #include <xfs.h> #include "debug.h" -#include "spin.h" static char message[1024]; /* keep it off the stack */ static DEFINE_SPINLOCK(xfs_err_lock); @@ -81,3 +80,9 @@ assfail(char *expr, char *file, int line) printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line); BUG(); } + +void +xfs_hex_dump(void *p, int length) +{ + print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); +} diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c index 5cf2e86caa7..129067cfcb8 100644 --- a/fs/xfs/support/ktrace.c +++ b/fs/xfs/support/ktrace.c @@ -21,7 +21,7 @@ static kmem_zone_t *ktrace_hdr_zone; static kmem_zone_t *ktrace_ent_zone; static int ktrace_zentries; -void +void __init ktrace_init(int zentries) { ktrace_zentries = zentries; @@ -36,7 +36,7 @@ ktrace_init(int zentries) ASSERT(ktrace_ent_zone); } -void +void __exit ktrace_uninit(void) { kmem_zone_destroy(ktrace_hdr_zone); @@ -90,8 +90,6 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep) return NULL; } - spinlock_init(&(ktp->kt_lock), "kt_lock"); - ktp->kt_entries = ktep; ktp->kt_nentries = nentries; ktp->kt_index = 0; @@ -114,8 +112,6 @@ ktrace_free(ktrace_t *ktp) if (ktp == (ktrace_t *)NULL) return; - spinlock_destroy(&ktp->kt_lock); - /* * Special treatment for the Vnode trace buffer. */ diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h index 0d73216287c..56e72b40a85 100644 --- a/fs/xfs/support/ktrace.h +++ b/fs/xfs/support/ktrace.h @@ -18,8 +18,6 @@ #ifndef __XFS_SUPPORT_KTRACE_H__ #define __XFS_SUPPORT_KTRACE_H__ -#include <spin.h> - /* * Trace buffer entry structure. */ @@ -31,7 +29,6 @@ typedef struct ktrace_entry { * Trace buffer header structure. */ typedef struct ktrace { - lock_t kt_lock; /* mutex to guard counters */ int kt_nentries; /* number of entries in trace buf */ int kt_index; /* current index in entries */ int kt_rollover; diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c index e157015c70f..493a6ecf859 100644 --- a/fs/xfs/support/uuid.c +++ b/fs/xfs/support/uuid.c @@ -133,7 +133,7 @@ uuid_table_remove(uuid_t *uuid) mutex_unlock(&uuid_monitor); } -void +void __init uuid_init(void) { mutex_init(&uuid_monitor); diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index b5a7d92c684..540e4c98982 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -37,7 +37,7 @@ #define XFS_LOG_TRACE 1 #define XFS_RW_TRACE 1 #define XFS_BUF_TRACE 1 -#define XFS_VNODE_TRACE 1 +#define XFS_INODE_TRACE 1 #define XFS_FILESTREAMS_TRACE 1 #endif diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 5bfb66f33ca..7272fe39a92 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -392,32 +392,6 @@ xfs_acl_allow_set( } /* - * The access control process to determine the access permission: - * if uid == file owner id, use the file owner bits. - * if gid == file owner group id, use the file group bits. - * scan ACL for a matching user or group, and use matched entry - * permission. Use total permissions of all matching group entries, - * until all acl entries are exhausted. The final permission produced - * by matching acl entry or entries needs to be & with group permission. - * if not owner, owning group, or matching entry in ACL, use file - * other bits. - */ -STATIC int -xfs_acl_capability_check( - mode_t mode, - cred_t *cr) -{ - if ((mode & ACL_READ) && !capable_cred(cr, CAP_DAC_READ_SEARCH)) - return EACCES; - if ((mode & ACL_WRITE) && !capable_cred(cr, CAP_DAC_OVERRIDE)) - return EACCES; - if ((mode & ACL_EXECUTE) && !capable_cred(cr, CAP_DAC_OVERRIDE)) - return EACCES; - - return 0; -} - -/* * Note: cr is only used here for the capability check if the ACL test fails. * It is not used to find out the credentials uid or groups etc, as was * done in IRIX. It is assumed that the uid and groups for the current @@ -438,7 +412,6 @@ xfs_acl_access( matched.ae_tag = 0; /* Invalid type */ matched.ae_perm = 0; - md >>= 6; /* Normalize the bits for comparison */ for (i = 0; i < fap->acl_cnt; i++) { /* @@ -520,7 +493,8 @@ xfs_acl_access( break; } - return xfs_acl_capability_check(md, cr); + /* EACCES tells generic_permission to check for capability overrides */ + return EACCES; } /* diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 34b7d339129..332a772461c 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -75,7 +75,6 @@ extern int xfs_acl_vremove(bhv_vnode_t *, int); #define _ACL_GET_DEFAULT(pv,pd) (xfs_acl_vtoacl(pv,NULL,pd) == 0) #define _ACL_ACCESS_EXISTS xfs_acl_vhasacl_access #define _ACL_DEFAULT_EXISTS xfs_acl_vhasacl_default -#define _ACL_XFS_IACCESS(i,m,c) (XFS_IFORK_Q(i) ? xfs_acl_iaccess(i,m,c) : -1) #define _ACL_ALLOC(a) ((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP)) #define _ACL_FREE(a) ((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0) @@ -95,7 +94,6 @@ extern int xfs_acl_vremove(bhv_vnode_t *, int); #define _ACL_GET_DEFAULT(pv,pd) (0) #define _ACL_ACCESS_EXISTS (NULL) #define _ACL_DEFAULT_EXISTS (NULL) -#define _ACL_XFS_IACCESS(i,m,c) (-1) #endif #endif /* __XFS_ACL_H__ */ diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 9381b0360c4..61b292a9fb4 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -193,7 +193,7 @@ typedef struct xfs_perag xfs_agino_t pagi_count; /* number of allocated inodes */ int pagb_count; /* pagb slots in use */ #ifdef __KERNEL__ - lock_t pagb_lock; /* lock for pagb_list */ + spinlock_t pagb_lock; /* lock for pagb_list */ #endif xfs_perag_busy_t *pagb_list; /* unstable blocks */ atomic_t pagf_fstrms; /* # of filestreams active in this AG */ diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 012a649a19c..ea6aa60ace0 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2206,7 +2206,7 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); pag->pagf_levels[XFS_BTNUM_CNTi] = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); - spinlock_init(&pag->pagb_lock, "xfspagb"); + spin_lock_init(&pag->pagb_lock); pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS * sizeof(xfs_perag_busy_t), KM_SLEEP); pag->pagf_init = 1; @@ -2500,10 +2500,9 @@ xfs_alloc_mark_busy(xfs_trans_t *tp, xfs_mount_t *mp; xfs_perag_busy_t *bsy; int n; - SPLDECL(s); mp = tp->t_mountp; - s = mutex_spinlock(&mp->m_perag[agno].pagb_lock); + spin_lock(&mp->m_perag[agno].pagb_lock); /* search pagb_list for an open slot */ for (bsy = mp->m_perag[agno].pagb_list, n = 0; @@ -2533,7 +2532,7 @@ xfs_alloc_mark_busy(xfs_trans_t *tp, xfs_trans_set_sync(tp); } - mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); + spin_unlock(&mp->m_perag[agno].pagb_lock); } void @@ -2543,11 +2542,10 @@ xfs_alloc_clear_busy(xfs_trans_t *tp, { xfs_mount_t *mp; xfs_perag_busy_t *list; - SPLDECL(s); mp = tp->t_mountp; - s = mutex_spinlock(&mp->m_perag[agno].pagb_lock); + spin_lock(&mp->m_perag[agno].pagb_lock); list = mp->m_perag[agno].pagb_list; ASSERT(idx < XFS_PAGB_NUM_SLOTS); @@ -2559,7 +2557,7 @@ xfs_alloc_clear_busy(xfs_trans_t *tp, TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp); } - mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); + spin_unlock(&mp->m_perag[agno].pagb_lock); } @@ -2578,11 +2576,10 @@ xfs_alloc_search_busy(xfs_trans_t *tp, xfs_agblock_t uend, bend; xfs_lsn_t lsn; int cnt; - SPLDECL(s); mp = tp->t_mountp; - s = mutex_spinlock(&mp->m_perag[agno].pagb_lock); + spin_lock(&mp->m_perag[agno].pagb_lock); cnt = mp->m_perag[agno].pagb_count; uend = bno + len - 1; @@ -2615,12 +2612,12 @@ xfs_alloc_search_busy(xfs_trans_t *tp, if (cnt) { TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp); lsn = bsy->busy_tp->t_commit_lsn; - mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); + spin_unlock(&mp->m_perag[agno].pagb_lock); xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); } else { TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp); n = -1; - mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); + spin_unlock(&mp->m_perag[agno].pagb_lock); } return n; diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 93fa64dd1be..e58f321fdae 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -929,7 +929,7 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) * This leaf block cannot have a "remote" value, we only call this routine * if bmap_one_block() says there is only one block (ie: no remote blks). */ -int +STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args) { xfs_inode_t *dp; diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 81f45dae1c5..eb3815ebb7a 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -226,17 +226,15 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) STATIC void xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp) { - unsigned long s; - if ((mp->m_flags & XFS_MOUNT_ATTR2) && !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) { - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) { XFS_SB_VERSION_ADDATTR2(&mp->m_sb); - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); } else - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); } } diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c index fab0b6d5a41..48228848f5a 100644 --- a/fs/xfs/xfs_bit.c +++ b/fs/xfs/xfs_bit.c @@ -25,109 +25,6 @@ * XFS bit manipulation routines, used in non-realtime code. */ -#ifndef HAVE_ARCH_HIGHBIT -/* - * Index of high bit number in byte, -1 for none set, 0..7 otherwise. - */ -static const char xfs_highbit[256] = { - -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */ - 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */ - 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */ - 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 1f */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 20 .. 27 */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 28 .. 2f */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 30 .. 37 */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 38 .. 3f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 40 .. 47 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 48 .. 4f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 50 .. 57 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 58 .. 5f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 60 .. 67 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 68 .. 6f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 70 .. 77 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 78 .. 7f */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 80 .. 87 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 88 .. 8f */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 90 .. 97 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 98 .. 9f */ - 7, 7, 7, 7, 7, 7, 7, 7, /* a0 .. a7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* a8 .. af */ - 7, 7, 7, 7, 7, 7, 7, 7, /* b0 .. b7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* b8 .. bf */ - 7, 7, 7, 7, 7, 7, 7, 7, /* c0 .. c7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* c8 .. cf */ - 7, 7, 7, 7, 7, 7, 7, 7, /* d0 .. d7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* d8 .. df */ - 7, 7, 7, 7, 7, 7, 7, 7, /* e0 .. e7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* e8 .. ef */ - 7, 7, 7, 7, 7, 7, 7, 7, /* f0 .. f7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* f8 .. ff */ -}; -#endif - -/* - * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set. - */ -inline int -xfs_highbit32( - __uint32_t v) -{ -#ifdef HAVE_ARCH_HIGHBIT - return highbit32(v); -#else - int i; - - if (v & 0xffff0000) - if (v & 0xff000000) - i = 24; - else - i = 16; - else if (v & 0x0000ffff) - if (v & 0x0000ff00) - i = 8; - else - i = 0; - else - return -1; - return i + xfs_highbit[(v >> i) & 0xff]; -#endif -} - -/* - * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set. - */ -int -xfs_lowbit64( - __uint64_t v) -{ - __uint32_t w = (__uint32_t)v; - int n = 0; - - if (w) { /* lower bits */ - n = ffs(w); - } else { /* upper bits */ - w = (__uint32_t)(v >> 32); - if (w && (n = ffs(w))) - n += 32; - } - return n - 1; -} - -/* - * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set. - */ -int -xfs_highbit64( - __uint64_t v) -{ - __uint32_t h = (__uint32_t)(v >> 32); - - if (h) - return xfs_highbit32(h) + 32; - return xfs_highbit32((__uint32_t)v); -} - - /* * Return whether bitmap is empty. * Size is number of words in the bitmap, which is padded to word boundary diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h index 082641a9782..325a007dec9 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/xfs_bit.h @@ -47,13 +47,30 @@ static inline __uint64_t xfs_mask64lo(int n) } /* Get high bit set out of 32-bit argument, -1 if none set */ -extern int xfs_highbit32(__uint32_t v); - -/* Get low bit set out of 64-bit argument, -1 if none set */ -extern int xfs_lowbit64(__uint64_t v); +static inline int xfs_highbit32(__uint32_t v) +{ + return fls(v) - 1; +} /* Get high bit set out of 64-bit argument, -1 if none set */ -extern int xfs_highbit64(__uint64_t); +static inline int xfs_highbit64(__uint64_t v) +{ + return fls64(v) - 1; +} + +/* Get low bit set out of 32-bit argument, -1 if none set */ +static inline int xfs_lowbit32(__uint32_t v) +{ + __uint32_t t = v; + return (t) ? find_first_bit((unsigned long *)&t, 32) : -1; +} + +/* Get low bit set out of 64-bit argument, -1 if none set */ +static inline int xfs_lowbit64(__uint64_t v) +{ + __uint64_t t = v; + return (t) ? find_first_bit((unsigned long *)&t, 64) : -1; +} /* Return whether bitmap is empty (1 == empty) */ extern int xfs_bitmap_empty(uint *map, uint size); diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 2e9b34b7344..1c0a5a585a8 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -2830,11 +2830,11 @@ xfs_bmap_btalloc( args.prod = align; if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) args.mod = (xfs_extlen_t)(args.prod - args.mod); - } else if (mp->m_sb.sb_blocksize >= NBPP) { + } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) { args.prod = 1; args.mod = 0; } else { - args.prod = NBPP >> mp->m_sb.sb_blocklog; + args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog; if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod)))) args.mod = (xfs_extlen_t)(args.prod - args.mod); } @@ -2969,7 +2969,7 @@ STATIC int xfs_bmap_alloc( xfs_bmalloca_t *ap) /* bmap alloc argument struct */ { - if ((ap->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && ap->userdata) + if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata) return xfs_bmap_rtalloc(ap); return xfs_bmap_btalloc(ap); } @@ -3096,8 +3096,7 @@ xfs_bmap_del_extent( /* * Realtime allocation. Free it and record di_nblocks update. */ - if (whichfork == XFS_DATA_FORK && - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { + if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { xfs_fsblock_t bno; xfs_filblks_t len; @@ -3956,7 +3955,6 @@ xfs_bmap_add_attrfork( xfs_bmap_free_t flist; /* freed extent records */ xfs_mount_t *mp; /* mount structure */ xfs_trans_t *tp; /* transaction pointer */ - unsigned long s; /* spinlock spl value */ int blks; /* space reservation */ int version = 1; /* superblock attr version */ int committed; /* xaction was committed */ @@ -4053,7 +4051,7 @@ xfs_bmap_add_attrfork( (!XFS_SB_VERSION_HASATTR2(&mp->m_sb) && version == 2)) { __int64_t sbfields = 0; - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); if (!XFS_SB_VERSION_HASATTR(&mp->m_sb)) { XFS_SB_VERSION_ADDATTR(&mp->m_sb); sbfields |= XFS_SB_VERSIONNUM; @@ -4063,10 +4061,10 @@ xfs_bmap_add_attrfork( sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); } if (sbfields) { - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); xfs_mod_sb(tp, sbfields); } else - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); } if ((error = xfs_bmap_finish(&tp, &flist, &committed))) goto error2; @@ -6394,7 +6392,7 @@ xfs_bmap_count_blocks( * Recursively walks each level of a btree * to count total fsblocks is use. */ -int /* error */ +STATIC int /* error */ xfs_bmap_count_tree( xfs_mount_t *mp, /* file system mount point */ xfs_trans_t *tp, /* transaction pointer */ @@ -6470,7 +6468,7 @@ xfs_bmap_count_tree( /* * Count leaf blocks given a range of extent records. */ -int +STATIC int xfs_bmap_count_leaves( xfs_ifork_t *ifp, xfs_extnum_t idx, @@ -6490,7 +6488,7 @@ xfs_bmap_count_leaves( * Count leaf blocks given a range of extent records originally * in btree format. */ -int +STATIC int xfs_bmap_disk_count_leaves( xfs_extnum_t idx, xfs_bmbt_block_t *block, diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 68267d75ff1..87224b7d798 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -25,6 +25,8 @@ struct xfs_inode; struct xfs_mount; struct xfs_trans; +extern kmem_zone_t *xfs_bmap_free_item_zone; + /* * DELTA: describe a change to the in-core extent list. * diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 32b49ec00fb..c4181d85605 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2062,8 +2062,7 @@ xfs_bmbt_insert( pcur->bc_private.b.allocated; pcur->bc_private.b.allocated = 0; ASSERT((cur->bc_private.b.firstblock != NULLFSBLOCK) || - (cur->bc_private.b.ip->i_d.di_flags & - XFS_DIFLAG_REALTIME)); + XFS_IS_REALTIME_INODE(cur->bc_private.b.ip)); cur->bc_private.b.firstblock = pcur->bc_private.b.firstblock; ASSERT(cur->bc_private.b.flist == diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 6e40a0a198f..7440b78f9ce 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -24,6 +24,8 @@ struct xfs_inode; struct xfs_mount; struct xfs_trans; +extern kmem_zone_t *xfs_btree_cur_zone; + /* * This nonsense is to make -wlint happy. */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index c8f2c2886fe..63debd147eb 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -378,7 +378,6 @@ xfs_buf_item_unpin( xfs_mount_t *mp; xfs_buf_t *bp; int freed; - SPLDECL(s); bp = bip->bli_buf; ASSERT(bp != NULL); @@ -409,8 +408,8 @@ xfs_buf_item_unpin( XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); } else { - AIL_LOCK(mp,s); - xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip, s); + spin_lock(&mp->m_ail_lock); + xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); xfs_buf_item_relse(bp); ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); } @@ -1113,7 +1112,6 @@ xfs_buf_iodone( xfs_buf_log_item_t *bip) { struct xfs_mount *mp; - SPLDECL(s); ASSERT(bip->bli_buf == bp); @@ -1128,11 +1126,11 @@ xfs_buf_iodone( * * Either way, AIL is useless if we're forcing a shutdown. */ - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); /* * xfs_trans_delete_ail() drops the AIL lock. */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip, s); + xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); #ifdef XFS_TRANS_DEBUG kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp)); diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index d7e13614306..5a41c348bb1 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -18,6 +18,8 @@ #ifndef __XFS_BUF_ITEM_H__ #define __XFS_BUF_ITEM_H__ +extern kmem_zone_t *xfs_buf_item_zone; + /* * This is the structure used to lay out a buf log item in the * log. The data map describes which 128 byte chunks of the buffer diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 26d09e2e1a7..1b446849fb3 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -2218,7 +2218,7 @@ xfs_da_state_free(xfs_da_state_t *state) #ifdef XFS_DABUF_DEBUG xfs_dabuf_t *xfs_dabuf_global_list; -lock_t xfs_dabuf_global_lock; +spinlock_t xfs_dabuf_global_lock; #endif /* @@ -2264,10 +2264,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) } #ifdef XFS_DABUF_DEBUG { - SPLDECL(s); xfs_dabuf_t *p; - s = mutex_spinlock(&xfs_dabuf_global_lock); + spin_lock(&xfs_dabuf_global_lock); for (p = xfs_dabuf_global_list; p; p = p->next) { ASSERT(p->blkno != dabuf->blkno || p->target != dabuf->target); @@ -2277,7 +2276,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) xfs_dabuf_global_list->prev = dabuf; dabuf->next = xfs_dabuf_global_list; xfs_dabuf_global_list = dabuf; - mutex_spinunlock(&xfs_dabuf_global_lock, s); + spin_unlock(&xfs_dabuf_global_lock); } #endif return dabuf; @@ -2319,16 +2318,14 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf) kmem_free(dabuf->data, BBTOB(dabuf->bbcount)); #ifdef XFS_DABUF_DEBUG { - SPLDECL(s); - - s = mutex_spinlock(&xfs_dabuf_global_lock); + spin_lock(&xfs_dabuf_global_lock); if (dabuf->prev) dabuf->prev->next = dabuf->next; else xfs_dabuf_global_list = dabuf->next; if (dabuf->next) dabuf->next->prev = dabuf->prev; - mutex_spinunlock(&xfs_dabuf_global_lock, s); + spin_unlock(&xfs_dabuf_global_lock); } memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf)); #endif diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 44dabf02f2a..7facf86f74f 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -260,6 +260,7 @@ void xfs_da_binval(struct xfs_trans *tp, xfs_dabuf_t *dabuf); xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf); extern struct kmem_zone *xfs_da_state_zone; +extern struct kmem_zone *xfs_dabuf_zone; #endif /* __KERNEL__ */ #endif /* __XFS_DA_BTREE_H__ */ diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 584f1ae85cd..3f53fad356a 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -52,76 +52,72 @@ xfs_swapext( xfs_swapext_t __user *sxu) { xfs_swapext_t *sxp; - xfs_inode_t *ip=NULL, *tip=NULL; - xfs_mount_t *mp; - struct file *fp = NULL, *tfp = NULL; - bhv_vnode_t *vp, *tvp; + xfs_inode_t *ip, *tip; + struct file *file, *target_file; int error = 0; sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL); if (!sxp) { error = XFS_ERROR(ENOMEM); - goto error0; + goto out; } if (copy_from_user(sxp, sxu, sizeof(xfs_swapext_t))) { error = XFS_ERROR(EFAULT); - goto error0; + goto out_free_sxp; } /* Pull information for the target fd */ - if (((fp = fget((int)sxp->sx_fdtarget)) == NULL) || - ((vp = vn_from_inode(fp->f_path.dentry->d_inode)) == NULL)) { + file = fget((int)sxp->sx_fdtarget); + if (!file) { error = XFS_ERROR(EINVAL); - goto error0; + goto out_free_sxp; } - ip = xfs_vtoi(vp); - if (ip == NULL) { + if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); - goto error0; + goto out_put_file; } - if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) || - ((tvp = vn_from_inode(tfp->f_path.dentry->d_inode)) == NULL)) { + target_file = fget((int)sxp->sx_fdtmp); + if (!target_file) { error = XFS_ERROR(EINVAL); - goto error0; + goto out_put_file; } - tip = xfs_vtoi(tvp); - if (tip == NULL) { + if (!(target_file->f_mode & FMODE_WRITE) || + (target_file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); - goto error0; + goto out_put_target_file; } + ip = XFS_I(file->f_path.dentry->d_inode); + tip = XFS_I(target_file->f_path.dentry->d_inode); + if (ip->i_mount != tip->i_mount) { - error = XFS_ERROR(EINVAL); - goto error0; + error = XFS_ERROR(EINVAL); + goto out_put_target_file; } if (ip->i_ino == tip->i_ino) { - error = XFS_ERROR(EINVAL); - goto error0; + error = XFS_ERROR(EINVAL); + goto out_put_target_file; } - mp = ip->i_mount; - - if (XFS_FORCED_SHUTDOWN(mp)) { - error = XFS_ERROR(EIO); - goto error0; + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + error = XFS_ERROR(EIO); + goto out_put_target_file; } - error = XFS_SWAP_EXTENTS(mp, &ip->i_iocore, &tip->i_iocore, sxp); - - error0: - if (fp != NULL) - fput(fp); - if (tfp != NULL) - fput(tfp); - - if (sxp != NULL) - kmem_free(sxp, sizeof(xfs_swapext_t)); + error = xfs_swap_extents(ip, tip, sxp); + out_put_target_file: + fput(target_file); + out_put_file: + fput(file); + out_free_sxp: + kmem_free(sxp, sizeof(xfs_swapext_t)); + out: return error; } @@ -169,15 +165,6 @@ xfs_swap_extents( xfs_lock_inodes(ips, 2, 0, lock_flags); locked = 1; - /* Check permissions */ - error = xfs_iaccess(ip, S_IWUSR, NULL); - if (error) - goto error0; - - error = xfs_iaccess(tip, S_IWUSR, NULL); - if (error) - goto error0; - /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { error = XFS_ERROR(EINVAL); @@ -185,8 +172,7 @@ xfs_swap_extents( } /* Verify both files are either real-time or non-realtime */ - if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != - (tip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { + if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { error = XFS_ERROR(EINVAL); goto error0; } @@ -199,7 +185,7 @@ xfs_swap_extents( } if (VN_CACHED(tvp) != 0) { - xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1); + xfs_inval_cached_trace(tip, 0, -1, 0, -1); error = xfs_flushinval_pages(tip, 0, -1, FI_REMAPF_LOCKED); if (error) diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index dedd713574e..c9065eaf2a4 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -171,69 +171,35 @@ typedef enum xfs_dinode_fmt /* * Inode data & attribute fork sizes, per inode. */ -#define XFS_CFORK_Q(dcp) ((dcp)->di_forkoff != 0) -#define XFS_CFORK_Q_DISK(dcp) ((dcp)->di_forkoff != 0) - -#define XFS_CFORK_BOFF(dcp) ((int)((dcp)->di_forkoff << 3)) -#define XFS_CFORK_BOFF_DISK(dcp) ((int)((dcp)->di_forkoff << 3)) - -#define XFS_CFORK_DSIZE_DISK(dcp,mp) \ - (XFS_CFORK_Q_DISK(dcp) ? XFS_CFORK_BOFF_DISK(dcp) : XFS_LITINO(mp)) -#define XFS_CFORK_DSIZE(dcp,mp) \ - (XFS_CFORK_Q(dcp) ? XFS_CFORK_BOFF(dcp) : XFS_LITINO(mp)) - -#define XFS_CFORK_ASIZE_DISK(dcp,mp) \ - (XFS_CFORK_Q_DISK(dcp) ? XFS_LITINO(mp) - XFS_CFORK_BOFF_DISK(dcp) : 0) -#define XFS_CFORK_ASIZE(dcp,mp) \ - (XFS_CFORK_Q(dcp) ? XFS_LITINO(mp) - XFS_CFORK_BOFF(dcp) : 0) - -#define XFS_CFORK_SIZE_DISK(dcp,mp,w) \ - ((w) == XFS_DATA_FORK ? \ - XFS_CFORK_DSIZE_DISK(dcp, mp) : \ - XFS_CFORK_ASIZE_DISK(dcp, mp)) -#define XFS_CFORK_SIZE(dcp,mp,w) \ - ((w) == XFS_DATA_FORK ? \ - XFS_CFORK_DSIZE(dcp, mp) : XFS_CFORK_ASIZE(dcp, mp)) +#define XFS_DFORK_Q(dip) ((dip)->di_core.di_forkoff != 0) +#define XFS_DFORK_BOFF(dip) ((int)((dip)->di_core.di_forkoff << 3)) #define XFS_DFORK_DSIZE(dip,mp) \ - XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp) -#define XFS_DFORK_DSIZE_HOST(dip,mp) \ - XFS_CFORK_DSIZE(&(dip)->di_core, mp) + (XFS_DFORK_Q(dip) ? \ + XFS_DFORK_BOFF(dip) : \ + XFS_LITINO(mp)) #define XFS_DFORK_ASIZE(dip,mp) \ - XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp) -#define XFS_DFORK_ASIZE_HOST(dip,mp) \ - XFS_CFORK_ASIZE(&(dip)->di_core, mp) -#define XFS_DFORK_SIZE(dip,mp,w) \ - XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w) -#define XFS_DFORK_SIZE_HOST(dip,mp,w) \ - XFS_CFORK_SIZE(&(dip)->di_core, mp, w) + (XFS_DFORK_Q(dip) ? \ + XFS_LITINO(mp) - XFS_DFORK_BOFF(dip) : \ + 0) +#define XFS_DFORK_SIZE(dip,mp,w) \ + ((w) == XFS_DATA_FORK ? \ + XFS_DFORK_DSIZE(dip, mp) : \ + XFS_DFORK_ASIZE(dip, mp)) -#define XFS_DFORK_Q(dip) XFS_CFORK_Q_DISK(&(dip)->di_core) -#define XFS_DFORK_BOFF(dip) XFS_CFORK_BOFF_DISK(&(dip)->di_core) -#define XFS_DFORK_DPTR(dip) ((dip)->di_u.di_c) -#define XFS_DFORK_APTR(dip) \ +#define XFS_DFORK_DPTR(dip) ((dip)->di_u.di_c) +#define XFS_DFORK_APTR(dip) \ ((dip)->di_u.di_c + XFS_DFORK_BOFF(dip)) -#define XFS_DFORK_PTR(dip,w) \ +#define XFS_DFORK_PTR(dip,w) \ ((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip)) -#define XFS_CFORK_FORMAT(dcp,w) \ - ((w) == XFS_DATA_FORK ? (dcp)->di_format : (dcp)->di_aformat) -#define XFS_CFORK_FMT_SET(dcp,w,n) \ +#define XFS_DFORK_FORMAT(dip,w) \ ((w) == XFS_DATA_FORK ? \ - ((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n))) -#define XFS_DFORK_FORMAT(dip,w) XFS_CFORK_FORMAT(&(dip)->di_core, w) - -#define XFS_CFORK_NEXTENTS_DISK(dcp,w) \ + (dip)->di_core.di_format : \ + (dip)->di_core.di_aformat) +#define XFS_DFORK_NEXTENTS(dip,w) \ ((w) == XFS_DATA_FORK ? \ - be32_to_cpu((dcp)->di_nextents) : \ - be16_to_cpu((dcp)->di_anextents)) -#define XFS_CFORK_NEXTENTS(dcp,w) \ - ((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents) -#define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w) -#define XFS_DFORK_NEXTENTS_HOST(dip,w) XFS_CFORK_NEXTENTS(&(dip)->di_core, w) - -#define XFS_CFORK_NEXT_SET(dcp,w,n) \ - ((w) == XFS_DATA_FORK ? \ - ((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n))) + be32_to_cpu((dip)->di_core.di_nextents) : \ + be16_to_cpu((dip)->di_core.di_anextents)) #define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) @@ -273,6 +239,12 @@ typedef enum xfs_dinode_fmt #define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT) #define XFS_DIFLAG_FILESTREAM (1 << XFS_DIFLAG_FILESTREAM_BIT) +#ifdef CONFIG_XFS_RT +#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) +#else +#define XFS_IS_REALTIME_INODE(ip) (0) +#endif + #define XFS_DIFLAG_ANY \ (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index b0f1ee8fcb9..be7c4251fa6 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -42,6 +42,7 @@ #include "xfs_dir2_node.h" #include "xfs_dir2_trace.h" #include "xfs_error.h" +#include "xfs_vnodeops.h" void @@ -301,7 +302,7 @@ xfs_readdir( int rval; /* return value */ int v; /* type-checking value */ - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return XFS_ERROR(EIO); diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index a4634d94e56..05e5365d3c3 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -230,37 +230,6 @@ xfs_error_report( } } -STATIC void -xfs_hex_dump(void *p, int length) -{ - __uint8_t *uip = (__uint8_t*)p; - int i; - char sbuf[128], *s; - - s = sbuf; - *s = '\0'; - for (i=0; i<length; i++, uip++) { - if ((i % 16) == 0) { - if (*s != '\0') - cmn_err(CE_ALERT, "%s\n", sbuf); - s = sbuf; - sprintf(s, "0x%x: ", i); - while( *s != '\0') - s++; - } - sprintf(s, "%02x ", *uip); - - /* - * the kernel sprintf is a void; user sprintf returns - * the sprintf'ed string's length. Find the new end- - * of-string - */ - while( *s != '\0') - s++; - } - cmn_err(CE_ALERT, "%s\n", sbuf); -} - void xfs_corruption_error( char *tag, diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 10e9d9619ae..6490d2a9f8e 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -174,6 +174,8 @@ extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, /* PRINTFLIKE3 */ extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...); +extern void xfs_hex_dump(void *p, int length); + #define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \ xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args) diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index f938a51be81..132bd07b9bb 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -110,19 +110,18 @@ STATIC void xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) { xfs_mount_t *mp; - SPLDECL(s); mp = efip->efi_item.li_mountp; - AIL_LOCK(mp, s); + spin_lock(&mp->m_ail_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * xfs_trans_delete_ail() drops the AIL lock. */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); + xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } } @@ -138,10 +137,9 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) { xfs_mount_t *mp; xfs_log_item_desc_t *lidp; - SPLDECL(s); mp = efip->efi_item.li_mountp; - AIL_LOCK(mp, s); + spin_lock(&mp->m_ail_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * free the xaction descriptor pointing to this item @@ -152,11 +150,11 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) * pull the item off the AIL. * xfs_trans_delete_ail() drops the AIL lock. */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); + xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } } @@ -350,13 +348,12 @@ xfs_efi_release(xfs_efi_log_item_t *efip, { xfs_mount_t *mp; int extents_left; - SPLDECL(s); mp = efip->efi_item.li_mountp; ASSERT(efip->efi_next_extent > 0); ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); - AIL_LOCK(mp, s); + spin_lock(&mp->m_ail_lock); ASSERT(efip->efi_next_extent >= nextents); efip->efi_next_extent -= nextents; extents_left = efip->efi_next_extent; @@ -364,10 +361,10 @@ xfs_efi_release(xfs_efi_log_item_t *efip, /* * xfs_trans_delete_ail() drops the AIL lock. */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); + xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 36d8f6aa11a..eb03eab5ca5 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -348,7 +348,7 @@ _xfs_filestream_update_ag( } /* xfs_fstrm_free_func(): callback for freeing cached stream items. */ -void +STATIC void xfs_fstrm_free_func( unsigned long ino, void *data) diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index aab96627651..3bed6433d05 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -419,9 +419,13 @@ typedef struct xfs_handle { /* * ioctl commands that are used by Linux filesystems */ -#define XFS_IOC_GETXFLAGS _IOR('f', 1, long) -#define XFS_IOC_SETXFLAGS _IOW('f', 2, long) -#define XFS_IOC_GETVERSION _IOR('v', 1, long) +#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS +#define XFS_IOC_SETXFLAGS FS_IOC_SETFLAGS +#define XFS_IOC_GETVERSION FS_IOC_GETVERSION +/* 32-bit compat counterparts */ +#define XFS_IOC32_GETXFLAGS FS_IOC32_GETFLAGS +#define XFS_IOC32_SETXFLAGS FS_IOC32_SETFLAGS +#define XFS_IOC32_GETVERSION FS_IOC32_GETVERSION /* * ioctl commands that replace IRIX fcntl()'s diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index c92d5b82102..b8de7f3cc17 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -462,15 +462,13 @@ xfs_fs_counts( xfs_mount_t *mp, xfs_fsop_counts_t *cnt) { - unsigned long s; - xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); cnt->freertx = mp->m_sb.sb_frextents; cnt->freeino = mp->m_sb.sb_ifree; cnt->allocino = mp->m_sb.sb_icount; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); return 0; } @@ -497,7 +495,6 @@ xfs_reserve_blocks( { __int64_t lcounter, delta, fdblks_delta; __uint64_t request; - unsigned long s; /* If inval is null, report current values and return */ if (inval == (__uint64_t *)NULL) { @@ -515,7 +512,7 @@ xfs_reserve_blocks( * problem. we needto work out if we are freeing or allocation * blocks first, then we can do the modification as necessary. * - * We do this under the XFS_SB_LOCK so that if we are near + * We do this under the m_sb_lock so that if we are near * ENOSPC, we will hold out any changes while we work out * what to do. This means that the amount of free space can * change while we do this, so we need to retry if we end up @@ -526,7 +523,7 @@ xfs_reserve_blocks( * enabled, disabled or even compiled in.... */ retry: - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); xfs_icsb_sync_counters_flags(mp, XFS_ICSB_SB_LOCKED); /* @@ -569,7 +566,7 @@ out: outval->resblks = mp->m_resblks; outval->resblks_avail = mp->m_resblks_avail; } - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); if (fdblks_delta) { /* diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index bf8e9aff272..8efc4a5b8b9 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -81,8 +81,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; #define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) #define XFS_INOBT_IS_FREE(rp,i) \ (((rp)->ir_free & XFS_INOBT_MASK(i)) != 0) -#define XFS_INOBT_IS_FREE_DISK(rp,i) \ - ((be64_to_cpu((rp)->ir_free) & XFS_INOBT_MASK(i)) != 0) #define XFS_INOBT_SET_FREE(rp,i) ((rp)->ir_free |= XFS_INOBT_MASK(i)) #define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i)) diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index fb69ef180b2..f01b07687fa 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -65,7 +65,7 @@ */ STATIC int xfs_iget_core( - bhv_vnode_t *vp, + struct inode *inode, xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, @@ -74,9 +74,9 @@ xfs_iget_core( xfs_inode_t **ipp, xfs_daddr_t bno) { + struct inode *old_inode; xfs_inode_t *ip; xfs_inode_t *iq; - bhv_vnode_t *inode_vp; int error; xfs_icluster_t *icl, *new_icl = NULL; unsigned long first_index, mask; @@ -111,8 +111,8 @@ again: goto again; } - inode_vp = XFS_ITOV_NULL(ip); - if (inode_vp == NULL) { + old_inode = ip->i_vnode; + if (old_inode == NULL) { /* * If IRECLAIM is set this inode is * on its way out of the system, @@ -140,28 +140,9 @@ again: return ENOENT; } - /* - * There may be transactions sitting in the - * incore log buffers or being flushed to disk - * at this time. We can't clear the - * XFS_IRECLAIMABLE flag until these - * transactions have hit the disk, otherwise we - * will void the guarantee the flag provides - * xfs_iunpin() - */ - if (xfs_ipincount(ip)) { - read_unlock(&pag->pag_ici_lock); - xfs_log_force(mp, 0, - XFS_LOG_FORCE|XFS_LOG_SYNC); - XFS_STATS_INC(xs_ig_frecycle); - goto again; - } - - vn_trace_exit(ip, "xfs_iget.alloc", - (inst_t *)__return_address); + xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); XFS_STATS_INC(xs_ig_found); - xfs_iflags_clear(ip, XFS_IRECLAIMABLE); read_unlock(&pag->pag_ici_lock); @@ -171,13 +152,11 @@ again: goto finish_inode; - } else if (vp != inode_vp) { - struct inode *inode = vn_to_inode(inode_vp); - + } else if (inode != old_inode) { /* The inode is being torn down, pause and * try again. */ - if (inode->i_state & (I_FREEING | I_CLEAR)) { + if (old_inode->i_state & (I_FREEING | I_CLEAR)) { read_unlock(&pag->pag_ici_lock); delay(1); XFS_STATS_INC(xs_ig_frecycle); @@ -190,7 +169,7 @@ again: */ cmn_err(CE_PANIC, "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", - inode_vp, vp); + old_inode, inode); } /* @@ -200,20 +179,16 @@ again: XFS_STATS_INC(xs_ig_found); finish_inode: - if (ip->i_d.di_mode == 0) { - if (!(flags & XFS_IGET_CREATE)) { - xfs_put_perag(mp, pag); - return ENOENT; - } - xfs_iocore_inode_reinit(ip); + if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { + xfs_put_perag(mp, pag); + return ENOENT; } if (lock_flags != 0) xfs_ilock(ip, lock_flags); xfs_iflags_clear(ip, XFS_ISTALE); - vn_trace_exit(ip, "xfs_iget.found", - (inst_t *)__return_address); + xfs_itrace_exit_tag(ip, "xfs_iget.found"); goto return_ip; } @@ -234,10 +209,16 @@ finish_inode: return error; } - vn_trace_exit(ip, "xfs_iget.alloc", (inst_t *)__return_address); + xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); + + + mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, + "xfsino", ip->i_ino); + mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); + init_waitqueue_head(&ip->i_ipin_wait); + atomic_set(&ip->i_pincount, 0); + initnsema(&ip->i_flock, 1, "xfsfino"); - xfs_inode_lock_init(ip, vp); - xfs_iocore_inode_init(ip); if (lock_flags) xfs_ilock(ip, lock_flags); @@ -333,9 +314,6 @@ finish_inode: ASSERT(ip->i_df.if_ext_max == XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); - ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == - ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); - xfs_iflags_set(ip, XFS_IMODIFIED); *ipp = ip; @@ -343,7 +321,7 @@ finish_inode: * If we have a real type for an on-disk inode, we can set ops(&unlock) * now. If it's a new inode being created, xfs_ialloc will handle it. */ - xfs_initialize_vnode(mp, vp, ip); + xfs_initialize_vnode(mp, inode, ip); return 0; } @@ -363,69 +341,58 @@ xfs_iget( xfs_daddr_t bno) { struct inode *inode; - bhv_vnode_t *vp = NULL; + xfs_inode_t *ip; int error; XFS_STATS_INC(xs_ig_attempts); retry: inode = iget_locked(mp->m_super, ino); - if (inode) { - xfs_inode_t *ip; - - vp = vn_from_inode(inode); - if (inode->i_state & I_NEW) { - vn_initialize(inode); - error = xfs_iget_core(vp, mp, tp, ino, flags, - lock_flags, ipp, bno); - if (error) { - vn_mark_bad(vp); - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - iput(inode); - } - } else { - /* - * If the inode is not fully constructed due to - * filehandle mismatches wait for the inode to go - * away and try again. - * - * iget_locked will call __wait_on_freeing_inode - * to wait for the inode to go away. - */ - if (is_bad_inode(inode) || - ((ip = xfs_vtoi(vp)) == NULL)) { - iput(inode); - delay(1); - goto retry; - } - - if (lock_flags != 0) - xfs_ilock(ip, lock_flags); - XFS_STATS_INC(xs_ig_found); - *ipp = ip; - error = 0; + if (!inode) + /* If we got no inode we are out of memory */ + return ENOMEM; + + if (inode->i_state & I_NEW) { + XFS_STATS_INC(vn_active); + XFS_STATS_INC(vn_alloc); + + error = xfs_iget_core(inode, mp, tp, ino, flags, + lock_flags, ipp, bno); + if (error) { + make_bad_inode(inode); + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + iput(inode); } - } else - error = ENOMEM; /* If we got no inode we are out of memory */ + return error; + } - return error; -} + /* + * If the inode is not fully constructed due to + * filehandle mismatches wait for the inode to go + * away and try again. + * + * iget_locked will call __wait_on_freeing_inode + * to wait for the inode to go away. + */ + if (is_bad_inode(inode)) { + iput(inode); + delay(1); + goto retry; + } -/* - * Do the setup for the various locks within the incore inode. - */ -void -xfs_inode_lock_init( - xfs_inode_t *ip, - bhv_vnode_t *vp) -{ - mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, - "xfsino", ip->i_ino); - mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); - init_waitqueue_head(&ip->i_ipin_wait); - atomic_set(&ip->i_pincount, 0); - initnsema(&ip->i_flock, 1, "xfsfino"); + ip = XFS_I(inode); + if (!ip) { + iput(inode); + delay(1); + goto retry; + } + + if (lock_flags != 0) + xfs_ilock(ip, lock_flags); + XFS_STATS_INC(xs_ig_found); + *ipp = ip; + return 0; } /* @@ -465,11 +432,9 @@ void xfs_iput(xfs_inode_t *ip, uint lock_flags) { - bhv_vnode_t *vp = XFS_ITOV(ip); - - vn_trace_entry(ip, "xfs_iput", (inst_t *)__return_address); + xfs_itrace_entry(ip); xfs_iunlock(ip, lock_flags); - VN_RELE(vp); + IRELE(ip); } /* @@ -479,20 +444,19 @@ void xfs_iput_new(xfs_inode_t *ip, uint lock_flags) { - bhv_vnode_t *vp = XFS_ITOV(ip); - struct inode *inode = vn_to_inode(vp); + struct inode *inode = ip->i_vnode; - vn_trace_entry(ip, "xfs_iput_new", (inst_t *)__return_address); + xfs_itrace_entry(ip); if ((ip->i_d.di_mode == 0)) { ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - vn_mark_bad(vp); + make_bad_inode(inode); } if (inode->i_state & I_NEW) unlock_new_inode(inode); if (lock_flags) xfs_iunlock(ip, lock_flags); - VN_RELE(vp); + IRELE(ip); } @@ -505,8 +469,6 @@ xfs_iput_new(xfs_inode_t *ip, void xfs_ireclaim(xfs_inode_t *ip) { - bhv_vnode_t *vp; - /* * Remove from old hash list and mount list. */ @@ -535,9 +497,8 @@ xfs_ireclaim(xfs_inode_t *ip) /* * Pull our behavior descriptor from the vnode chain. */ - vp = XFS_ITOV_NULL(ip); - if (vp) { - vn_to_inode(vp)->i_private = NULL; + if (ip->i_vnode) { + ip->i_vnode->i_private = NULL; ip->i_vnode = NULL; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 34494808281..a550546a708 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -15,6 +15,8 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <linux/log2.h> + #include "xfs.h" #include "xfs_fs.h" #include "xfs_types.h" @@ -826,15 +828,17 @@ xfs_ip2xflags( xfs_icdinode_t *dic = &ip->i_d; return _xfs_dic2xflags(dic->di_flags) | - (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0); + (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); } uint xfs_dic2xflags( - xfs_dinode_core_t *dic) + xfs_dinode_t *dip) { + xfs_dinode_core_t *dic = &dip->di_core; + return _xfs_dic2xflags(be16_to_cpu(dic->di_flags)) | - (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0); + (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); } /* @@ -884,8 +888,8 @@ xfs_iread( * Initialize inode's trace buffers. * Do this before xfs_iformat in case it adds entries. */ -#ifdef XFS_VNODE_TRACE - ip->i_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); +#ifdef XFS_INODE_TRACE + ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_SLEEP); #endif #ifdef XFS_BMAP_TRACE ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP); @@ -1220,10 +1224,8 @@ xfs_ialloc( ip->i_d.di_extsize = pip->i_d.di_extsize; } } else if ((mode & S_IFMT) == S_IFREG) { - if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) { + if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) di_flags |= XFS_DIFLAG_REALTIME; - ip->i_iocore.io_flags |= XFS_IOCORE_RT; - } if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { di_flags |= XFS_DIFLAG_EXTSIZE; ip->i_d.di_extsize = pip->i_d.di_extsize; @@ -1298,7 +1300,10 @@ xfs_isize_check( if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) return; - if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE)) + if (XFS_IS_REALTIME_INODE(ip)) + return; + + if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) return; nimaps = 2; @@ -1711,7 +1716,7 @@ xfs_itruncate_finish( * runs. */ XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_BUNMAPI(mp, ntp, &ip->i_iocore, + error = xfs_bunmapi(ntp, ip, first_unmap_block, unmap_len, XFS_BMAPI_AFLAG(fork) | (sync ? 0 : XFS_BMAPI_ASYNC), @@ -1844,8 +1849,6 @@ xfs_igrow_start( xfs_fsize_t new_size, cred_t *credp) { - int error; - ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); ASSERT(new_size > ip->i_size); @@ -1855,9 +1858,7 @@ xfs_igrow_start( * xfs_write_file() beyond the end of the file * and any blocks between the old and new file sizes. */ - error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, - ip->i_size); - return error; + return xfs_zero_eof(ip, new_size, ip->i_size); } /* @@ -1959,24 +1960,6 @@ xfs_iunlink( ASSERT(agi->agi_unlinked[bucket_index]); ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); - if (error) - return error; - - /* - * Clear the on-disk di_nlink. This is to prevent xfs_bulkstat - * from picking up this inode when it is reclaimed (its incore state - * initialzed but not flushed to disk yet). The in-core di_nlink is - * already cleared in xfs_droplink() and a corresponding transaction - * logged. The hack here just synchronizes the in-core to on-disk - * di_nlink value in advance before the actual inode sync to disk. - * This is OK because the inode is already unlinked and would never - * change its di_nlink again for this inode generation. - * This is a temporary hack that would require a proper fix - * in the future. - */ - dip->di_core.di_nlink = 0; - if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) { /* * There is already another inode in the bucket we need @@ -1984,6 +1967,10 @@ xfs_iunlink( * Here we put the head pointer into our next pointer, * and then we fall through to point the head at us. */ + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); + if (error) + return error; + ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); /* both on-disk, don't endian flip twice */ dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; @@ -2209,7 +2196,6 @@ xfs_ifree_cluster( xfs_inode_log_item_t *iip; xfs_log_item_t *lip; xfs_perag_t *pag = xfs_get_perag(mp, inum); - SPLDECL(s); if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { blks_per_cluster = 1; @@ -2311,9 +2297,9 @@ xfs_ifree_cluster( iip = (xfs_inode_log_item_t *)lip; ASSERT(iip->ili_logged == 1); lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); iip->ili_flush_lsn = iip->ili_item.li_lsn; - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); xfs_iflags_set(iip->ili_inode, XFS_ISTALE); pre_flushed++; } @@ -2334,9 +2320,9 @@ xfs_ifree_cluster( iip->ili_last_fields = iip->ili_format.ilf_fields; iip->ili_format.ilf_fields = 0; iip->ili_logged = 1; - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); iip->ili_flush_lsn = iip->ili_item.li_lsn; - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) @@ -2374,6 +2360,8 @@ xfs_ifree( int error; int delete; xfs_ino_t first_ino; + xfs_dinode_t *dip; + xfs_buf_t *ibp; ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); ASSERT(ip->i_transp == tp); @@ -2409,8 +2397,27 @@ xfs_ifree( * by reincarnations of this inode. */ ip->i_d.di_gen++; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0); + if (error) + return error; + + /* + * Clear the on-disk di_mode. This is to prevent xfs_bulkstat + * from picking up this inode when it is reclaimed (its incore state + * initialzed but not flushed to disk yet). The in-core di_mode is + * already cleared and a corresponding transaction logged. + * The hack here just synchronizes the in-core to on-disk + * di_mode value in advance before the actual inode sync to disk. + * This is OK because the inode is already unlinked and would never + * change its di_mode again for this inode generation. + * This is a temporary hack that would require a proper fix + * in the future. + */ + dip->di_core.di_mode = 0; + if (delete) { xfs_ifree_cluster(ip, tp, first_ino); } @@ -2735,7 +2742,6 @@ void xfs_idestroy( xfs_inode_t *ip) { - switch (ip->i_d.di_mode & S_IFMT) { case S_IFREG: case S_IFDIR: @@ -2749,7 +2755,7 @@ xfs_idestroy( mrfree(&ip->i_iolock); freesema(&ip->i_flock); -#ifdef XFS_VNODE_TRACE +#ifdef XFS_INODE_TRACE ktrace_free(ip->i_trace); #endif #ifdef XFS_BMAP_TRACE @@ -2775,16 +2781,15 @@ xfs_idestroy( */ xfs_mount_t *mp = ip->i_mount; xfs_log_item_t *lip = &ip->i_itemp->ili_item; - int s; ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || XFS_FORCED_SHUTDOWN(ip->i_mount)); if (lip->li_flags & XFS_LI_IN_AIL) { - AIL_LOCK(mp, s); + spin_lock(&mp->m_ail_lock); if (lip->li_flags & XFS_LI_IN_AIL) - xfs_trans_delete_ail(mp, lip, s); + xfs_trans_delete_ail(mp, lip); else - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } xfs_inode_item_destroy(ip); } @@ -2816,40 +2821,8 @@ xfs_iunpin( { ASSERT(atomic_read(&ip->i_pincount) > 0); - if (atomic_dec_and_lock(&ip->i_pincount, &ip->i_flags_lock)) { - - /* - * If the inode is currently being reclaimed, the link between - * the bhv_vnode and the xfs_inode will be broken after the - * XFS_IRECLAIM* flag is set. Hence, if these flags are not - * set, then we can move forward and mark the linux inode dirty - * knowing that it is still valid as it won't freed until after - * the bhv_vnode<->xfs_inode link is broken in xfs_reclaim. The - * i_flags_lock is used to synchronise the setting of the - * XFS_IRECLAIM* flags and the breaking of the link, and so we - * can execute atomically w.r.t to reclaim by holding this lock - * here. - * - * However, we still need to issue the unpin wakeup call as the - * inode reclaim may be blocked waiting for the inode to become - * unpinned. - */ - - if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) { - bhv_vnode_t *vp = XFS_ITOV_NULL(ip); - struct inode *inode = NULL; - - BUG_ON(vp == NULL); - inode = vn_to_inode(vp); - BUG_ON(inode->i_state & I_CLEAR); - - /* make sync come back and flush this inode */ - if (!(inode->i_state & (I_NEW|I_FREEING))) - mark_inode_dirty_sync(inode); - } - spin_unlock(&ip->i_flags_lock); + if (atomic_dec_and_test(&ip->i_pincount)) wake_up(&ip->i_ipin_wait); - } } /* @@ -3338,7 +3311,6 @@ xfs_iflush_int( #ifdef XFS_TRANS_DEBUG int first; #endif - SPLDECL(s); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); ASSERT(issemalocked(&(ip->i_flock))); @@ -3533,9 +3505,9 @@ xfs_iflush_int( iip->ili_logged = 1; ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); iip->ili_flush_lsn = iip->ili_item.li_lsn; - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); /* * Attach the function xfs_iflush_done to the inode's @@ -3611,95 +3583,6 @@ xfs_iflush_all( XFS_MOUNT_IUNLOCK(mp); } -/* - * xfs_iaccess: check accessibility of inode for mode. - */ -int -xfs_iaccess( - xfs_inode_t *ip, - mode_t mode, - cred_t *cr) -{ - int error; - mode_t orgmode = mode; - struct inode *inode = vn_to_inode(XFS_ITOV(ip)); - - if (mode & S_IWUSR) { - umode_t imode = inode->i_mode; - - if (IS_RDONLY(inode) && - (S_ISREG(imode) || S_ISDIR(imode) || S_ISLNK(imode))) - return XFS_ERROR(EROFS); - - if (IS_IMMUTABLE(inode)) - return XFS_ERROR(EACCES); - } - - /* - * If there's an Access Control List it's used instead of - * the mode bits. - */ - if ((error = _ACL_XFS_IACCESS(ip, mode, cr)) != -1) - return error ? XFS_ERROR(error) : 0; - - if (current_fsuid(cr) != ip->i_d.di_uid) { - mode >>= 3; - if (!in_group_p((gid_t)ip->i_d.di_gid)) - mode >>= 3; - } - - /* - * If the DACs are ok we don't need any capability check. - */ - if ((ip->i_d.di_mode & mode) == mode) - return 0; - /* - * Read/write DACs are always overridable. - * Executable DACs are overridable if at least one exec bit is set. - */ - if (!(orgmode & S_IXUSR) || - (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) - if (capable_cred(cr, CAP_DAC_OVERRIDE)) - return 0; - - if ((orgmode == S_IRUSR) || - (S_ISDIR(inode->i_mode) && (!(orgmode & S_IWUSR)))) { - if (capable_cred(cr, CAP_DAC_READ_SEARCH)) - return 0; -#ifdef NOISE - cmn_err(CE_NOTE, "Ick: mode=%o, orgmode=%o", mode, orgmode); -#endif /* NOISE */ - return XFS_ERROR(EACCES); - } - return XFS_ERROR(EACCES); -} - -/* - * xfs_iroundup: round up argument to next power of two - */ -uint -xfs_iroundup( - uint v) -{ - int i; - uint m; - - if ((v & (v - 1)) == 0) - return v; - ASSERT((v & 0x80000000) == 0); - if ((v & (v + 1)) == 0) - return v + 1; - for (i = 0, m = 1; i < 31; i++, m <<= 1) { - if (v & m) - continue; - v |= m; - if ((v & (v + 1)) == 0) - return v + 1; - } - ASSERT(0); - return( 0 ); -} - #ifdef XFS_ILOCK_TRACE ktrace_t *xfs_ilock_trace_buf; @@ -4206,7 +4089,7 @@ xfs_iext_realloc_direct( return; } if (!is_power_of_2(new_size)){ - rnew_size = xfs_iroundup(new_size); + rnew_size = roundup_pow_of_two(new_size); } if (rnew_size != ifp->if_real_bytes) { ifp->if_u1.if_extents = @@ -4229,7 +4112,7 @@ xfs_iext_realloc_direct( else { new_size += ifp->if_bytes; if (!is_power_of_2(new_size)) { - rnew_size = xfs_iroundup(new_size); + rnew_size = roundup_pow_of_two(new_size); } xfs_iext_inline_to_direct(ifp, rnew_size); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index e5aff929cc6..bfcd72cbaee 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -132,45 +132,6 @@ typedef struct dm_attrs_s { __uint16_t da_pad; /* DMIG extra padding */ } dm_attrs_t; -typedef struct xfs_iocore { - void *io_obj; /* pointer to container - * inode or dcxvn structure */ - struct xfs_mount *io_mount; /* fs mount struct ptr */ -#ifdef DEBUG - mrlock_t *io_lock; /* inode IO lock */ - mrlock_t *io_iolock; /* inode IO lock */ -#endif - - /* I/O state */ - xfs_fsize_t io_new_size; /* sz when write completes */ - - /* Miscellaneous state. */ - unsigned int io_flags; /* IO related flags */ - - /* DMAPI state */ - dm_attrs_t io_dmattrs; - -} xfs_iocore_t; - -#define io_dmevmask io_dmattrs.da_dmevmask -#define io_dmstate io_dmattrs.da_dmstate - -#define XFS_IO_INODE(io) ((xfs_inode_t *) ((io)->io_obj)) -#define XFS_IO_DCXVN(io) ((dcxvn_t *) ((io)->io_obj)) - -/* - * Flags in the flags field - */ - -#define XFS_IOCORE_RT 0x1 - -/* - * xfs_iocore prototypes - */ - -extern void xfs_iocore_inode_init(struct xfs_inode *); -extern void xfs_iocore_inode_reinit(struct xfs_inode *); - /* * This is the xfs inode cluster structure. This structure is used by * xfs_iflush to find inodes that share a cluster and can be flushed to disk at @@ -181,7 +142,7 @@ typedef struct xfs_icluster { xfs_daddr_t icl_blkno; /* starting block number of * the cluster */ struct xfs_buf *icl_buf; /* the inode buffer */ - lock_t icl_lock; /* inode list lock */ + spinlock_t icl_lock; /* inode list lock */ } xfs_icluster_t; /* @@ -283,9 +244,6 @@ typedef struct xfs_inode { struct xfs_inode **i_refcache; /* ptr to entry in ref cache */ struct xfs_inode *i_release; /* inode to unref */ #endif - /* I/O state */ - xfs_iocore_t i_iocore; /* I/O core */ - /* Miscellaneous state. */ unsigned short i_flags; /* see defined flags below */ unsigned char i_update_core; /* timestamps/size is dirty */ @@ -298,9 +256,10 @@ typedef struct xfs_inode { struct hlist_node i_cnode; /* cluster link node */ xfs_fsize_t i_size; /* in-memory size */ + xfs_fsize_t i_new_size; /* size when write completes */ atomic_t i_iocount; /* outstanding I/O count */ /* Trace buffers per inode. */ -#ifdef XFS_VNODE_TRACE +#ifdef XFS_INODE_TRACE struct ktrace *i_trace; /* general inode trace */ #endif #ifdef XFS_BMAP_TRACE @@ -382,17 +341,42 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) /* * Fork handling. */ -#define XFS_IFORK_PTR(ip,w) \ - ((w) == XFS_DATA_FORK ? &(ip)->i_df : (ip)->i_afp) -#define XFS_IFORK_Q(ip) XFS_CFORK_Q(&(ip)->i_d) -#define XFS_IFORK_DSIZE(ip) XFS_CFORK_DSIZE(&ip->i_d, ip->i_mount) -#define XFS_IFORK_ASIZE(ip) XFS_CFORK_ASIZE(&ip->i_d, ip->i_mount) -#define XFS_IFORK_SIZE(ip,w) XFS_CFORK_SIZE(&ip->i_d, ip->i_mount, w) -#define XFS_IFORK_FORMAT(ip,w) XFS_CFORK_FORMAT(&ip->i_d, w) -#define XFS_IFORK_FMT_SET(ip,w,n) XFS_CFORK_FMT_SET(&ip->i_d, w, n) -#define XFS_IFORK_NEXTENTS(ip,w) XFS_CFORK_NEXTENTS(&ip->i_d, w) -#define XFS_IFORK_NEXT_SET(ip,w,n) XFS_CFORK_NEXT_SET(&ip->i_d, w, n) +#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0) +#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3)) + +#define XFS_IFORK_PTR(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + &(ip)->i_df : \ + (ip)->i_afp) +#define XFS_IFORK_DSIZE(ip) \ + (XFS_IFORK_Q(ip) ? \ + XFS_IFORK_BOFF(ip) : \ + XFS_LITINO((ip)->i_mount)) +#define XFS_IFORK_ASIZE(ip) \ + (XFS_IFORK_Q(ip) ? \ + XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \ + 0) +#define XFS_IFORK_SIZE(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + XFS_IFORK_DSIZE(ip) : \ + XFS_IFORK_ASIZE(ip)) +#define XFS_IFORK_FORMAT(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + (ip)->i_d.di_format : \ + (ip)->i_d.di_aformat) +#define XFS_IFORK_FMT_SET(ip,w,n) \ + ((w) == XFS_DATA_FORK ? \ + ((ip)->i_d.di_format = (n)) : \ + ((ip)->i_d.di_aformat = (n))) +#define XFS_IFORK_NEXTENTS(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + (ip)->i_d.di_nextents : \ + (ip)->i_d.di_anextents) +#define XFS_IFORK_NEXT_SET(ip,w,n) \ + ((w) == XFS_DATA_FORK ? \ + ((ip)->i_d.di_nextents = (n)) : \ + ((ip)->i_d.di_anextents = (n))) #ifdef __KERNEL__ @@ -509,7 +493,6 @@ void xfs_ihash_init(struct xfs_mount *); void xfs_ihash_free(struct xfs_mount *); xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, struct xfs_trans *); -void xfs_inode_lock_init(xfs_inode_t *, bhv_vnode_t *); int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, uint, uint, xfs_inode_t **, xfs_daddr_t); void xfs_iput(xfs_inode_t *, uint); @@ -545,7 +528,7 @@ void xfs_dinode_to_disk(struct xfs_dinode_core *, struct xfs_icdinode *); uint xfs_ip2xflags(struct xfs_inode *); -uint xfs_dic2xflags(struct xfs_dinode_core *); +uint xfs_dic2xflags(struct xfs_dinode *); int xfs_ifree(struct xfs_trans *, xfs_inode_t *, struct xfs_bmap_free *); int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); @@ -567,13 +550,12 @@ void xfs_iunpin(xfs_inode_t *); int xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int); int xfs_iflush(xfs_inode_t *, uint); void xfs_iflush_all(struct xfs_mount *); -int xfs_iaccess(xfs_inode_t *, mode_t, cred_t *); -uint xfs_iroundup(uint); void xfs_ichgtime(xfs_inode_t *, int); xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); void xfs_lock_inodes(xfs_inode_t **, int, int, uint); void xfs_synchronize_atime(xfs_inode_t *); +void xfs_mark_inode_dirty_sync(xfs_inode_t *); xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 565d470a6b4..034ca720229 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -274,6 +274,11 @@ xfs_inode_item_format( */ xfs_synchronize_atime(ip); + /* + * make sure the linux inode is dirty + */ + xfs_mark_inode_dirty_sync(ip); + vecp->i_addr = (xfs_caddr_t)&ip->i_d; vecp->i_len = sizeof(xfs_dinode_core_t); XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); @@ -615,7 +620,7 @@ xfs_inode_item_trylock( return XFS_ITEM_PUSHBUF; } else { /* - * We hold the AIL_LOCK, so we must specify the + * We hold the AIL lock, so we must specify the * NONOTIFY flag so that we won't double trip. */ xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); @@ -749,7 +754,7 @@ xfs_inode_item_committed( * marked delayed write. If that's the case, we'll initiate a bawrite on that * buffer to expedite the process. * - * We aren't holding the AIL_LOCK (or the flush lock) when this gets called, + * We aren't holding the AIL lock (or the flush lock) when this gets called, * so it is inherently race-y. */ STATIC void @@ -792,7 +797,7 @@ xfs_inode_item_pushbuf( if (XFS_BUF_ISDELAYWRITE(bp)) { /* * We were racing with iflush because we don't hold - * the AIL_LOCK or the flush lock. However, at this point, + * the AIL lock or the flush lock. However, at this point, * we have the buffer, and we know that it's dirty. * So, it's possible that iflush raced with us, and * this item is already taken off the AIL. @@ -968,7 +973,6 @@ xfs_iflush_done( xfs_inode_log_item_t *iip) { xfs_inode_t *ip; - SPLDECL(s); ip = iip->ili_inode; @@ -983,15 +987,15 @@ xfs_iflush_done( */ if (iip->ili_logged && (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { - AIL_LOCK(ip->i_mount, s); + spin_lock(&ip->i_mount->m_ail_lock); if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { /* * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(ip->i_mount, - (xfs_log_item_t*)iip, s); + (xfs_log_item_t*)iip); } else { - AIL_UNLOCK(ip->i_mount, s); + spin_unlock(&ip->i_mount->m_ail_lock); } } @@ -1025,21 +1029,19 @@ xfs_iflush_abort( { xfs_inode_log_item_t *iip; xfs_mount_t *mp; - SPLDECL(s); iip = ip->i_itemp; mp = ip->i_mount; if (iip) { if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { - AIL_LOCK(mp, s); + spin_lock(&mp->m_ail_lock); if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { /* * xfs_trans_delete_ail() drops the AIL lock. */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip, - s); + xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip); } else - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } iip->ili_logged = 0; /* diff --git a/fs/xfs/xfs_iocore.c b/fs/xfs/xfs_iocore.c deleted file mode 100644 index b27b5d5be84..00000000000 --- a/fs/xfs/xfs_iocore.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dfrag.h" -#include "xfs_dmapi.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_itable.h" -#include "xfs_btree.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" -#include "xfs_bmap.h" -#include "xfs_error.h" -#include "xfs_rw.h" -#include "xfs_quota.h" -#include "xfs_trans_space.h" -#include "xfs_iomap.h" - - -STATIC xfs_fsize_t -xfs_size_fn( - xfs_inode_t *ip) -{ - return XFS_ISIZE(ip); -} - -STATIC int -xfs_ioinit( - struct xfs_mount *mp, - struct xfs_mount_args *mntargs, - int flags) -{ - return xfs_mountfs(mp, flags); -} - -xfs_ioops_t xfs_iocore_xfs = { - .xfs_ioinit = (xfs_ioinit_t) xfs_ioinit, - .xfs_bmapi_func = (xfs_bmapi_t) xfs_bmapi, - .xfs_bunmapi_func = (xfs_bunmapi_t) xfs_bunmapi, - .xfs_bmap_eof_func = (xfs_bmap_eof_t) xfs_bmap_eof, - .xfs_iomap_write_direct = - (xfs_iomap_write_direct_t) xfs_iomap_write_direct, - .xfs_iomap_write_delay = - (xfs_iomap_write_delay_t) xfs_iomap_write_delay, - .xfs_iomap_write_allocate = - (xfs_iomap_write_allocate_t) xfs_iomap_write_allocate, - .xfs_iomap_write_unwritten = - (xfs_iomap_write_unwritten_t) xfs_iomap_write_unwritten, - .xfs_ilock = (xfs_lock_t) xfs_ilock, - .xfs_lck_map_shared = (xfs_lck_map_shared_t) xfs_ilock_map_shared, - .xfs_ilock_demote = (xfs_lock_demote_t) xfs_ilock_demote, - .xfs_ilock_nowait = (xfs_lock_nowait_t) xfs_ilock_nowait, - .xfs_unlock = (xfs_unlk_t) xfs_iunlock, - .xfs_size_func = (xfs_size_t) xfs_size_fn, - .xfs_iodone = (xfs_iodone_t) fs_noerr, - .xfs_swap_extents_func = (xfs_swap_extents_t) xfs_swap_extents, -}; - -void -xfs_iocore_inode_reinit( - xfs_inode_t *ip) -{ - xfs_iocore_t *io = &ip->i_iocore; - - io->io_flags = 0; - if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) - io->io_flags |= XFS_IOCORE_RT; - io->io_dmevmask = ip->i_d.di_dmevmask; - io->io_dmstate = ip->i_d.di_dmstate; -} - -void -xfs_iocore_inode_init( - xfs_inode_t *ip) -{ - xfs_iocore_t *io = &ip->i_iocore; - xfs_mount_t *mp = ip->i_mount; - - io->io_mount = mp; -#ifdef DEBUG - io->io_lock = &ip->i_lock; - io->io_iolock = &ip->i_iolock; -#endif - - io->io_obj = (void *)ip; - - xfs_iocore_inode_reinit(ip); -} diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 72786e356d5..fde37f87d52 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -53,12 +53,10 @@ void xfs_iomap_enter_trace( int tag, - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, ssize_t count) { - xfs_inode_t *ip = XFS_IO_INODE(io); - if (!ip->i_rwtrace) return; @@ -70,8 +68,8 @@ xfs_iomap_enter_trace( (void *)((unsigned long)((offset >> 32) & 0xffffffff)), (void *)((unsigned long)(offset & 0xffffffff)), (void *)((unsigned long)count), - (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)), - (void *)((unsigned long)(io->io_new_size & 0xffffffff)), + (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)), + (void *)((unsigned long)(ip->i_new_size & 0xffffffff)), (void *)((unsigned long)current_pid()), (void *)NULL, (void *)NULL, @@ -84,15 +82,13 @@ xfs_iomap_enter_trace( void xfs_iomap_map_trace( int tag, - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, ssize_t count, xfs_iomap_t *iomapp, xfs_bmbt_irec_t *imapp, int flags) { - xfs_inode_t *ip = XFS_IO_INODE(io); - if (!ip->i_rwtrace) return; @@ -126,7 +122,7 @@ xfs_iomap_map_trace( STATIC int xfs_imap_to_bmap( - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, xfs_bmbt_irec_t *imap, xfs_iomap_t *iomapp, @@ -134,11 +130,10 @@ xfs_imap_to_bmap( int iomaps, /* Number of iomap entries */ int flags) { - xfs_mount_t *mp; + xfs_mount_t *mp = ip->i_mount; int pbm; xfs_fsblock_t start_block; - mp = io->io_mount; for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) { iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff); @@ -146,7 +141,7 @@ xfs_imap_to_bmap( iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount); iomapp->iomap_flags = flags; - if (io->io_flags & XFS_IOCORE_RT) { + if (XFS_IS_REALTIME_INODE(ip)) { iomapp->iomap_flags |= IOMAP_REALTIME; iomapp->iomap_target = mp->m_rtdev_targp; } else { @@ -160,7 +155,7 @@ xfs_imap_to_bmap( iomapp->iomap_bn = IOMAP_DADDR_NULL; iomapp->iomap_flags |= IOMAP_DELAY; } else { - iomapp->iomap_bn = XFS_FSB_TO_DB_IO(io, start_block); + iomapp->iomap_bn = XFS_FSB_TO_DB(ip, start_block); if (ISUNWRITTEN(imap)) iomapp->iomap_flags |= IOMAP_UNWRITTEN; } @@ -172,14 +167,14 @@ xfs_imap_to_bmap( int xfs_iomap( - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, ssize_t count, int flags, xfs_iomap_t *iomapp, int *niomaps) { - xfs_mount_t *mp = io->io_mount; + xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb, end_fsb; int error = 0; int lockmode = 0; @@ -188,45 +183,37 @@ xfs_iomap( int bmapi_flags = 0; int iomap_flags = 0; + ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); + if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - switch (flags & - (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE | - BMAPI_UNWRITTEN | BMAPI_DEVICE)) { + switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) { case BMAPI_READ: - xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count); - lockmode = XFS_LCK_MAP_SHARED(mp, io); + xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, ip, offset, count); + lockmode = xfs_ilock_map_shared(ip); bmapi_flags = XFS_BMAPI_ENTIRE; break; case BMAPI_WRITE: - xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count); + xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count); lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; if (flags & BMAPI_IGNSTATE) bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; - XFS_ILOCK(mp, io, lockmode); + xfs_ilock(ip, lockmode); break; case BMAPI_ALLOCATE: - xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, io, offset, count); + xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count); lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD; bmapi_flags = XFS_BMAPI_ENTIRE; + /* Attempt non-blocking lock */ if (flags & BMAPI_TRYLOCK) { - if (!XFS_ILOCK_NOWAIT(mp, io, lockmode)) + if (!xfs_ilock_nowait(ip, lockmode)) return XFS_ERROR(EAGAIN); } else { - XFS_ILOCK(mp, io, lockmode); + xfs_ilock(ip, lockmode); } break; - case BMAPI_UNWRITTEN: - goto phase2; - case BMAPI_DEVICE: - lockmode = XFS_LCK_MAP_SHARED(mp, io); - iomapp->iomap_target = io->io_flags & XFS_IOCORE_RT ? - mp->m_rtdev_targp : mp->m_ddev_targp; - error = 0; - *niomaps = 1; - goto out; default: BUG(); } @@ -237,7 +224,7 @@ xfs_iomap( end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); - error = XFS_BMAPI(mp, NULL, io, offset_fsb, + error = xfs_bmapi(NULL, ip, offset_fsb, (xfs_filblks_t)(end_fsb - offset_fsb), bmapi_flags, NULL, 0, &imap, &nimaps, NULL, NULL); @@ -245,54 +232,48 @@ xfs_iomap( if (error) goto out; -phase2: - switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE|BMAPI_UNWRITTEN)) { + switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { case BMAPI_WRITE: /* If we found an extent, return it */ if (nimaps && (imap.br_startblock != HOLESTARTBLOCK) && (imap.br_startblock != DELAYSTARTBLOCK)) { - xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io, + xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, offset, count, iomapp, &imap, flags); break; } if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) { - error = XFS_IOMAP_WRITE_DIRECT(mp, io, offset, - count, flags, &imap, &nimaps, nimaps); + error = xfs_iomap_write_direct(ip, offset, count, flags, + &imap, &nimaps, nimaps); } else { - error = XFS_IOMAP_WRITE_DELAY(mp, io, offset, count, - flags, &imap, &nimaps); + error = xfs_iomap_write_delay(ip, offset, count, flags, + &imap, &nimaps); } if (!error) { - xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, io, + xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, ip, offset, count, iomapp, &imap, flags); } iomap_flags = IOMAP_NEW; break; case BMAPI_ALLOCATE: /* If we found an extent, return it */ - XFS_IUNLOCK(mp, io, lockmode); + xfs_iunlock(ip, lockmode); lockmode = 0; if (nimaps && !ISNULLSTARTBLOCK(imap.br_startblock)) { - xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io, + xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, offset, count, iomapp, &imap, flags); break; } - error = XFS_IOMAP_WRITE_ALLOCATE(mp, io, offset, count, + error = xfs_iomap_write_allocate(ip, offset, count, &imap, &nimaps); break; - case BMAPI_UNWRITTEN: - lockmode = 0; - error = XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count); - nimaps = 0; - break; } if (nimaps) { - *niomaps = xfs_imap_to_bmap(io, offset, &imap, + *niomaps = xfs_imap_to_bmap(ip, offset, &imap, iomapp, nimaps, *niomaps, iomap_flags); } else if (niomaps) { *niomaps = 0; @@ -300,14 +281,15 @@ phase2: out: if (lockmode) - XFS_IUNLOCK(mp, io, lockmode); + xfs_iunlock(ip, lockmode); return XFS_ERROR(error); } + STATIC int xfs_iomap_eof_align_last_fsb( xfs_mount_t *mp, - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_fsize_t isize, xfs_extlen_t extsize, xfs_fileoff_t *last_fsb) @@ -316,7 +298,7 @@ xfs_iomap_eof_align_last_fsb( xfs_extlen_t align; int eof, error; - if (io->io_flags & XFS_IOCORE_RT) + if (XFS_IS_REALTIME_INODE(ip)) ; /* * If mounted with the "-o swalloc" option, roundup the allocation @@ -347,7 +329,7 @@ xfs_iomap_eof_align_last_fsb( } if (new_last_fsb) { - error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof); + error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); if (error) return error; if (eof) @@ -416,7 +398,6 @@ xfs_iomap_write_direct( int found) { xfs_mount_t *mp = ip->i_mount; - xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb; xfs_fileoff_t last_fsb; xfs_filblks_t count_fsb, resaligned; @@ -446,13 +427,13 @@ xfs_iomap_write_direct( extsz = xfs_get_extsz_hint(ip); isize = ip->i_size; - if (io->io_new_size > isize) - isize = io->io_new_size; + if (ip->i_new_size > isize) + isize = ip->i_new_size; offset_fsb = XFS_B_TO_FSBT(mp, offset); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); if ((offset + count) > isize) { - error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz, + error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz, &last_fsb); if (error) goto error_out; @@ -519,7 +500,7 @@ xfs_iomap_write_direct( */ XFS_BMAP_INIT(&free_list, &firstfsb); nimaps = 1; - error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, bmapi_flag, + error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag, &firstfsb, 0, &imap, &nimaps, &free_list, NULL); if (error) goto error0; @@ -542,7 +523,8 @@ xfs_iomap_write_direct( goto error_out; } - if (unlikely(!imap.br_startblock && !(io->io_flags & XFS_IOCORE_RT))) { + if (unlikely(!imap.br_startblock && + !(XFS_IS_REALTIME_INODE(ip)))) { error = xfs_cmn_err_fsblock_zero(ip, &imap); goto error_out; } @@ -577,7 +559,7 @@ error_out: STATIC int xfs_iomap_eof_want_preallocate( xfs_mount_t *mp, - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_fsize_t isize, xfs_off_t offset, size_t count, @@ -604,7 +586,7 @@ xfs_iomap_eof_want_preallocate( while (count_fsb > 0) { imaps = nimaps; firstblock = NULLFSBLOCK; - error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 0, + error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0, &firstblock, 0, imap, &imaps, NULL, NULL); if (error) return error; @@ -630,7 +612,6 @@ xfs_iomap_write_delay( int *nmaps) { xfs_mount_t *mp = ip->i_mount; - xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb; xfs_fileoff_t last_fsb; xfs_off_t aligned_offset; @@ -658,10 +639,10 @@ xfs_iomap_write_delay( retry: isize = ip->i_size; - if (io->io_new_size > isize) - isize = io->io_new_size; + if (ip->i_new_size > isize) + isize = ip->i_new_size; - error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count, + error = xfs_iomap_eof_want_preallocate(mp, ip, isize, offset, count, ioflag, imap, XFS_WRITE_IMAPS, &prealloc); if (error) return error; @@ -675,7 +656,7 @@ retry: } if (prealloc || extsz) { - error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz, + error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz, &last_fsb); if (error) return error; @@ -683,7 +664,7 @@ retry: nimaps = XFS_WRITE_IMAPS; firstblock = NULLFSBLOCK; - error = XFS_BMAPI(mp, NULL, io, offset_fsb, + error = xfs_bmapi(NULL, ip, offset_fsb, (xfs_filblks_t)(last_fsb - offset_fsb), XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, @@ -697,7 +678,7 @@ retry: */ if (nimaps == 0) { xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, - io, offset, count); + ip, offset, count); if (xfs_flush_space(ip, &fsynced, &ioflag)) return XFS_ERROR(ENOSPC); @@ -705,7 +686,8 @@ retry: goto retry; } - if (unlikely(!imap[0].br_startblock && !(io->io_flags & XFS_IOCORE_RT))) + if (unlikely(!imap[0].br_startblock && + !(XFS_IS_REALTIME_INODE(ip)))) return xfs_cmn_err_fsblock_zero(ip, &imap[0]); *ret_imap = imap[0]; @@ -720,6 +702,9 @@ retry: * the originating callers request. * * Called without a lock on the inode. + * + * We no longer bother to look at the incoming map - all we have to + * guarantee is that whatever we allocate fills the required range. */ int xfs_iomap_write_allocate( @@ -730,15 +715,14 @@ xfs_iomap_write_allocate( int *retmap) { xfs_mount_t *mp = ip->i_mount; - xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb, last_block; xfs_fileoff_t end_fsb, map_start_fsb; xfs_fsblock_t first_block; xfs_bmap_free_t free_list; xfs_filblks_t count_fsb; - xfs_bmbt_irec_t imap[XFS_STRAT_WRITE_IMAPS]; + xfs_bmbt_irec_t imap; xfs_trans_t *tp; - int i, nimaps, committed; + int nimaps, committed; int error = 0; int nres; @@ -785,13 +769,38 @@ xfs_iomap_write_allocate( XFS_BMAP_INIT(&free_list, &first_block); - nimaps = XFS_STRAT_WRITE_IMAPS; /* - * Ensure we don't go beyond eof - it is possible - * the extents changed since we did the read call, - * we dropped the ilock in the interim. + * it is possible that the extents have changed since + * we did the read call as we dropped the ilock for a + * while. We have to be careful about truncates or hole + * punchs here - we are not allowed to allocate + * non-delalloc blocks here. + * + * The only protection against truncation is the pages + * for the range we are being asked to convert are + * locked and hence a truncate will block on them + * first. + * + * As a result, if we go beyond the range we really + * need and hit an delalloc extent boundary followed by + * a hole while we have excess blocks in the map, we + * will fill the hole incorrectly and overrun the + * transaction reservation. + * + * Using a single map prevents this as we are forced to + * check each map we look for overlap with the desired + * range and abort as soon as we find it. Also, given + * that we only return a single map, having one beyond + * what we can return is probably a bit silly. + * + * We also need to check that we don't go beyond EOF; + * this is a truncate optimisation as a truncate sets + * the new file size before block on the pages we + * currently have locked under writeback. Because they + * are about to be tossed, we don't need to write them + * back.... */ - + nimaps = 1; end_fsb = XFS_B_TO_FSB(mp, ip->i_size); xfs_bmap_last_offset(NULL, ip, &last_block, XFS_DATA_FORK); @@ -805,9 +814,9 @@ xfs_iomap_write_allocate( } /* Go get the actual blocks */ - error = XFS_BMAPI(mp, tp, io, map_start_fsb, count_fsb, + error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb, XFS_BMAPI_WRITE, &first_block, 1, - imap, &nimaps, &free_list, NULL); + &imap, &nimaps, &free_list, NULL); if (error) goto trans_cancel; @@ -826,27 +835,24 @@ xfs_iomap_write_allocate( * See if we were able to allocate an extent that * covers at least part of the callers request */ - for (i = 0; i < nimaps; i++) { - if (unlikely(!imap[i].br_startblock && - !(io->io_flags & XFS_IOCORE_RT))) - return xfs_cmn_err_fsblock_zero(ip, &imap[i]); - if ((offset_fsb >= imap[i].br_startoff) && - (offset_fsb < (imap[i].br_startoff + - imap[i].br_blockcount))) { - *map = imap[i]; - *retmap = 1; - XFS_STATS_INC(xs_xstrat_quick); - return 0; - } - count_fsb -= imap[i].br_blockcount; + if (unlikely(!imap.br_startblock && + XFS_IS_REALTIME_INODE(ip))) + return xfs_cmn_err_fsblock_zero(ip, &imap); + if ((offset_fsb >= imap.br_startoff) && + (offset_fsb < (imap.br_startoff + + imap.br_blockcount))) { + *map = imap; + *retmap = 1; + XFS_STATS_INC(xs_xstrat_quick); + return 0; } - /* So far we have not mapped the requested part of the + /* + * So far we have not mapped the requested part of the * file, just surrounding data, try again. */ - nimaps--; - map_start_fsb = imap[nimaps].br_startoff + - imap[nimaps].br_blockcount; + count_fsb -= imap.br_blockcount; + map_start_fsb = imap.br_startoff + imap.br_blockcount; } trans_cancel: @@ -864,7 +870,6 @@ xfs_iomap_write_unwritten( size_t count) { xfs_mount_t *mp = ip->i_mount; - xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb; xfs_filblks_t count_fsb; xfs_filblks_t numblks_fsb; @@ -877,8 +882,7 @@ xfs_iomap_write_unwritten( int committed; int error; - xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, - &ip->i_iocore, offset, count); + xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, ip, offset, count); offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); @@ -912,7 +916,7 @@ xfs_iomap_write_unwritten( */ XFS_BMAP_INIT(&free_list, &firstfsb); nimaps = 1; - error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, + error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, 1, &imap, &nimaps, &free_list, NULL); if (error) @@ -928,7 +932,7 @@ xfs_iomap_write_unwritten( return XFS_ERROR(error); if (unlikely(!imap.br_startblock && - !(io->io_flags & XFS_IOCORE_RT))) + !(XFS_IS_REALTIME_INODE(ip)))) return xfs_cmn_err_fsblock_zero(ip, &imap); if ((numblks_fsb = imap.br_blockcount) == 0) { diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index f5c09887fe9..ee1a0c134cc 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -36,14 +36,12 @@ typedef enum { BMAPI_READ = (1 << 0), /* read extents */ BMAPI_WRITE = (1 << 1), /* create extents */ BMAPI_ALLOCATE = (1 << 2), /* delayed allocate to real extents */ - BMAPI_UNWRITTEN = (1 << 3), /* unwritten extents to real extents */ /* modifiers */ BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */ BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */ BMAPI_MMAP = (1 << 6), /* allocate for mmap write */ BMAPI_SYNC = (1 << 7), /* sync write to flush delalloc space */ BMAPI_TRYLOCK = (1 << 8), /* non-blocking request */ - BMAPI_DEVICE = (1 << 9), /* we only want to know the device */ } bmapi_flags_t; @@ -73,11 +71,10 @@ typedef struct xfs_iomap { iomap_flags_t iomap_flags; } xfs_iomap_t; -struct xfs_iocore; struct xfs_inode; struct xfs_bmbt_irec; -extern int xfs_iomap(struct xfs_iocore *, xfs_off_t, ssize_t, int, +extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, struct xfs_iomap *, int *); extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, int, struct xfs_bmbt_irec *, int *, int); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 9fc4c288652..658aab6b1bb 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -170,7 +170,7 @@ xfs_bulkstat_one_dinode( buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec); buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec); buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec); - buf->bs_xflags = xfs_dic2xflags(dic); + buf->bs_xflags = xfs_dic2xflags(dip); buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; buf->bs_extents = be32_to_cpu(dic->di_nextents); buf->bs_gen = be32_to_cpu(dic->di_gen); @@ -291,7 +291,7 @@ xfs_bulkstat_use_dinode( dip = (xfs_dinode_t *) xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog); /* - * Check the buffer containing the on-disk inode for di_nlink == 0. + * Check the buffer containing the on-disk inode for di_mode == 0. * This is to prevent xfs_bulkstat from picking up just reclaimed * inodes that have their in-core state initialized but not flushed * to disk yet. This is a temporary hack that would require a proper @@ -299,7 +299,7 @@ xfs_bulkstat_use_dinode( */ if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC || !XFS_DINODE_GOOD_VERSION(dip->di_core.di_version) || - !dip->di_core.di_nlink) + !dip->di_core.di_mode) return 0; if (flags & BULKSTAT_FG_QUICK) { *dipp = dip; @@ -307,7 +307,7 @@ xfs_bulkstat_use_dinode( } /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ aformat = dip->di_core.di_aformat; - if ((XFS_CFORK_Q(&dip->di_core) == 0) || + if ((XFS_DFORK_Q(dip) == 0) || (aformat == XFS_DINODE_FMT_LOCAL) || (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) { *dipp = dip; @@ -399,7 +399,7 @@ xfs_bulkstat( (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); nimask = ~(nicluster - 1); nbcluster = nicluster >> mp->m_sb.sb_inopblog; - irbuf = kmem_zalloc_greedy(&irbsize, NBPC, NBPC * 4, + irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4, KM_SLEEP | KM_MAYFAIL | KM_LARGE); nirbuf = irbsize / sizeof(*irbuf); @@ -830,7 +830,7 @@ xfs_inumbers( agino = XFS_INO_TO_AGINO(mp, ino); left = *count; *count = 0; - bcount = MIN(left, (int)(NBPP / sizeof(*buffer))); + bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer))); buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP); error = bufidx = 0; cur = NULL; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 77c12715a7d..b3ac3805d3c 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -399,10 +399,10 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ { xlog_t *log = mp->m_log; xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; - int abortflg, spl; + int abortflg; cb->cb_next = NULL; - spl = LOG_LOCK(log); + spin_lock(&log->l_icloglock); abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); if (!abortflg) { ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || @@ -411,7 +411,7 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ *(iclog->ic_callback_tail) = cb; iclog->ic_callback_tail = &(cb->cb_next); } - LOG_UNLOCK(log, spl); + spin_unlock(&log->l_icloglock); return abortflg; } /* xfs_log_notify */ @@ -498,11 +498,14 @@ xfs_log_reserve(xfs_mount_t *mp, * Return error or zero. */ int -xfs_log_mount(xfs_mount_t *mp, - xfs_buftarg_t *log_target, - xfs_daddr_t blk_offset, - int num_bblks) +xfs_log_mount( + xfs_mount_t *mp, + xfs_buftarg_t *log_target, + xfs_daddr_t blk_offset, + int num_bblks) { + int error; + if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); else { @@ -515,11 +518,21 @@ xfs_log_mount(xfs_mount_t *mp, mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); /* + * Initialize the AIL now we have a log. + */ + spin_lock_init(&mp->m_ail_lock); + error = xfs_trans_ail_init(mp); + if (error) { + cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); + goto error; + } + + /* * skip log recovery on a norecovery mount. pretend it all * just worked. */ if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { - int error, readonly = (mp->m_flags & XFS_MOUNT_RDONLY); + int readonly = (mp->m_flags & XFS_MOUNT_RDONLY); if (readonly) mp->m_flags &= ~XFS_MOUNT_RDONLY; @@ -530,8 +543,7 @@ xfs_log_mount(xfs_mount_t *mp, mp->m_flags |= XFS_MOUNT_RDONLY; if (error) { cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); - xlog_dealloc_log(mp->m_log); - return error; + goto error; } } @@ -540,6 +552,9 @@ xfs_log_mount(xfs_mount_t *mp, /* End mounting message in xfs_log_mount_finish */ return 0; +error: + xfs_log_unmount_dealloc(mp); + return error; } /* xfs_log_mount */ /* @@ -606,7 +621,6 @@ xfs_log_unmount_write(xfs_mount_t *mp) xfs_log_ticket_t tic = NULL; xfs_lsn_t lsn; int error; - SPLDECL(s); /* the data section must be 32 bit size aligned */ struct { @@ -659,24 +673,24 @@ xfs_log_unmount_write(xfs_mount_t *mp) } - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; iclog->ic_refcnt++; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); xlog_state_want_sync(log, iclog); (void) xlog_state_release_iclog(log, iclog); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (!(iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_DIRTY)) { if (!XLOG_FORCED_SHUTDOWN(log)) { sv_wait(&iclog->ic_forcesema, PMEM, &log->l_icloglock, s); } else { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } } else { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } if (tic) { xlog_trace_loggrant(log, tic, "unmount rec"); @@ -697,15 +711,15 @@ xfs_log_unmount_write(xfs_mount_t *mp) * a file system that went into forced_shutdown as * the result of an unmount.. */ - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; iclog->ic_refcnt++; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); xlog_state_want_sync(log, iclog); (void) xlog_state_release_iclog(log, iclog); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if ( ! ( iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_DIRTY @@ -714,7 +728,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) sv_wait(&iclog->ic_forcesema, PMEM, &log->l_icloglock, s); } else { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } } @@ -723,10 +737,14 @@ xfs_log_unmount_write(xfs_mount_t *mp) /* * Deallocate log structures for unmount/relocation. + * + * We need to stop the aild from running before we destroy + * and deallocate the log as the aild references the log. */ void xfs_log_unmount_dealloc(xfs_mount_t *mp) { + xfs_trans_ail_destroy(mp); xlog_dealloc_log(mp->m_log); } @@ -762,20 +780,18 @@ xfs_log_move_tail(xfs_mount_t *mp, xlog_ticket_t *tic; xlog_t *log = mp->m_log; int need_bytes, free_bytes, cycle, bytes; - SPLDECL(s); if (XLOG_FORCED_SHUTDOWN(log)) return; - ASSERT(!XFS_FORCED_SHUTDOWN(mp)); if (tail_lsn == 0) { /* needed since sync_lsn is 64 bits */ - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); tail_lsn = log->l_last_sync_lsn; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); /* Also an invalid lsn. 1 implies that we aren't passing in a valid * tail_lsn. @@ -824,7 +840,7 @@ xfs_log_move_tail(xfs_mount_t *mp, tic = tic->t_next; } while (tic != log->l_reserve_headq); } - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); } /* xfs_log_move_tail */ /* @@ -836,14 +852,13 @@ xfs_log_move_tail(xfs_mount_t *mp, int xfs_log_need_covered(xfs_mount_t *mp) { - SPLDECL(s); int needed = 0, gen; xlog_t *log = mp->m_log; if (!xfs_fs_writable(mp)) return 0; - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || (log->l_covered_state == XLOG_STATE_COVER_NEED2)) && !xfs_trans_first_ail(mp, &gen) @@ -856,7 +871,7 @@ xfs_log_need_covered(xfs_mount_t *mp) } needed = 1; } - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return needed; } @@ -881,17 +896,16 @@ xfs_lsn_t xlog_assign_tail_lsn(xfs_mount_t *mp) { xfs_lsn_t tail_lsn; - SPLDECL(s); xlog_t *log = mp->m_log; tail_lsn = xfs_trans_tail_ail(mp); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); if (tail_lsn != 0) { log->l_tail_lsn = tail_lsn; } else { tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn; } - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return tail_lsn; } /* xlog_assign_tail_lsn */ @@ -911,7 +925,7 @@ xlog_assign_tail_lsn(xfs_mount_t *mp) * the tail. The details of this case are described below, but the end * result is that we return the size of the log as the amount of space left. */ -int +STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes) { int free_bytes; @@ -1165,7 +1179,7 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_flags |= XLOG_ACTIVE_RECOVERY; log->l_prev_block = -1; - ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, 1, 0); + log->l_tail_lsn = xlog_assign_lsn(1, 0); /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ log->l_last_sync_lsn = log->l_tail_lsn; log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ @@ -1193,8 +1207,8 @@ xlog_alloc_log(xfs_mount_t *mp, ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); log->l_xbuf = bp; - spinlock_init(&log->l_icloglock, "iclog"); - spinlock_init(&log->l_grant_lock, "grhead_iclog"); + spin_lock_init(&log->l_icloglock); + spin_lock_init(&log->l_grant_lock); initnsema(&log->l_flushsema, 0, "ic-flush"); xlog_state_ticket_alloc(log); /* wait until after icloglock inited */ @@ -1231,12 +1245,12 @@ xlog_alloc_log(xfs_mount_t *mp, head = &iclog->ic_header; memset(head, 0, sizeof(xlog_rec_header_t)); - INT_SET(head->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM); - INT_SET(head->h_version, ARCH_CONVERT, + head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); + head->h_version = cpu_to_be32( XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1); - INT_SET(head->h_size, ARCH_CONVERT, log->l_iclog_size); + head->h_size = cpu_to_be32(log->l_iclog_size); /* new fields */ - INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT); + head->h_fmt = cpu_to_be32(XLOG_FMT); memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); @@ -1293,7 +1307,7 @@ xlog_commit_record(xfs_mount_t *mp, * pushes on an lsn which is further along in the log once we reach the high * water mark. In this manner, we would be creating a low water mark. */ -void +STATIC void xlog_grant_push_ail(xfs_mount_t *mp, int need_bytes) { @@ -1305,11 +1319,10 @@ xlog_grant_push_ail(xfs_mount_t *mp, int threshold_block; /* block in lsn we'd like to be at */ int threshold_cycle; /* lsn cycle we'd like to be at */ int free_threshold; - SPLDECL(s); ASSERT(BTOBB(need_bytes) < log->l_logBBsize); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, log->l_grant_reserve_bytes); @@ -1331,8 +1344,7 @@ xlog_grant_push_ail(xfs_mount_t *mp, threshold_block -= log->l_logBBsize; threshold_cycle += 1; } - ASSIGN_ANY_LSN_HOST(threshold_lsn, threshold_cycle, - threshold_block); + threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block); /* Don't pass in an lsn greater than the lsn of the last * log record known to be on disk. @@ -1340,7 +1352,7 @@ xlog_grant_push_ail(xfs_mount_t *mp, if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0) threshold_lsn = log->l_last_sync_lsn; } - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); /* * Get the transaction layer to kick the dirty buffers out to @@ -1378,19 +1390,18 @@ xlog_grant_push_ail(xfs_mount_t *mp, * is added immediately before calling bwrite(). */ -int +STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog) { xfs_caddr_t dptr; /* pointer to byte sized element */ xfs_buf_t *bp; - int i, ops; + int i; uint count; /* byte count of bwrite */ uint count_init; /* initial count before roundup */ int roundoff; /* roundoff to BB or stripe */ int split = 0; /* split write into two regions */ int error; - SPLDECL(s); int v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb); XFS_STATS_INC(xs_log_writes); @@ -1415,30 +1426,26 @@ xlog_sync(xlog_t *log, roundoff < BBTOB(1))); /* move grant heads by roundoff in sync */ - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_grant_add_space(log, roundoff); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); /* put cycle number in every block */ xlog_pack_data(log, iclog, roundoff); /* real byte length */ if (v2) { - INT_SET(iclog->ic_header.h_len, - ARCH_CONVERT, - iclog->ic_offset + roundoff); + iclog->ic_header.h_len = + cpu_to_be32(iclog->ic_offset + roundoff); } else { - INT_SET(iclog->ic_header.h_len, ARCH_CONVERT, iclog->ic_offset); + iclog->ic_header.h_len = + cpu_to_be32(iclog->ic_offset); } - /* put ops count in correct order */ - ops = iclog->ic_header.h_num_logops; - INT_SET(iclog->ic_header.h_num_logops, ARCH_CONVERT, ops); - bp = iclog->ic_bp; ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); - XFS_BUF_SET_ADDR(bp, BLOCK_LSN(INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT))); + XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); @@ -1501,10 +1508,10 @@ xlog_sync(xlog_t *log, * a new cycle. Watch out for the header magic number * case, though. */ - for (i=0; i<split; i += BBSIZE) { - INT_MOD(*(uint *)dptr, ARCH_CONVERT, +1); - if (INT_GET(*(uint *)dptr, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) - INT_MOD(*(uint *)dptr, ARCH_CONVERT, +1); + for (i = 0; i < split; i += BBSIZE) { + be32_add((__be32 *)dptr, 1); + if (be32_to_cpu(*(__be32 *)dptr) == XLOG_HEADER_MAGIC_NUM) + be32_add((__be32 *)dptr, 1); dptr += BBSIZE; } @@ -1527,14 +1534,13 @@ xlog_sync(xlog_t *log, /* * Deallocate a log structure */ -void +STATIC void xlog_dealloc_log(xlog_t *log) { xlog_in_core_t *iclog, *next_iclog; xlog_ticket_t *tic, *next_tic; int i; - iclog = log->l_iclog; for (i=0; i<log->l_iclog_bufs; i++) { sv_destroy(&iclog->ic_forcesema); @@ -1565,7 +1571,7 @@ xlog_dealloc_log(xlog_t *log) tic = log->l_unmount_free; while (tic) { next_tic = tic->t_next; - kmem_free(tic, NBPP); + kmem_free(tic, PAGE_SIZE); tic = next_tic; } } @@ -1592,14 +1598,12 @@ xlog_state_finish_copy(xlog_t *log, int record_cnt, int copy_bytes) { - SPLDECL(s); + spin_lock(&log->l_icloglock); - s = LOG_LOCK(log); - - iclog->ic_header.h_num_logops += record_cnt; + be32_add(&iclog->ic_header.h_num_logops, record_cnt); iclog->ic_offset += copy_bytes; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } /* xlog_state_finish_copy */ @@ -1752,7 +1756,7 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) * we don't update ic_offset until the end when we know exactly how many * bytes have been written out. */ -int +STATIC int xlog_write(xfs_mount_t * mp, xfs_log_iovec_t reg[], int nentries, @@ -1823,7 +1827,7 @@ xlog_write(xfs_mount_t * mp, /* start_lsn is the first lsn written to. That's all we need. */ if (! *start_lsn) - *start_lsn = INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT); + *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn); /* This loop writes out as many regions as can fit in the amount * of space which was allocated by xlog_state_get_iclog_space(). @@ -1839,7 +1843,7 @@ xlog_write(xfs_mount_t * mp, */ if (ticket->t_flags & XLOG_TIC_INITED) { logop_head = (xlog_op_header_t *)ptr; - INT_SET(logop_head->oh_tid, ARCH_CONVERT, ticket->t_tid); + logop_head->oh_tid = cpu_to_be32(ticket->t_tid); logop_head->oh_clientid = ticket->t_clientid; logop_head->oh_len = 0; logop_head->oh_flags = XLOG_START_TRANS; @@ -1853,7 +1857,7 @@ xlog_write(xfs_mount_t * mp, /* Copy log operation header directly into data section */ logop_head = (xlog_op_header_t *)ptr; - INT_SET(logop_head->oh_tid, ARCH_CONVERT, ticket->t_tid); + logop_head->oh_tid = cpu_to_be32(ticket->t_tid); logop_head->oh_clientid = ticket->t_clientid; logop_head->oh_res2 = 0; @@ -1888,13 +1892,14 @@ xlog_write(xfs_mount_t * mp, copy_off = partial_copy_len; if (need_copy <= iclog->ic_size - log_offset) { /*complete write */ - INT_SET(logop_head->oh_len, ARCH_CONVERT, copy_len = need_copy); + copy_len = need_copy; + logop_head->oh_len = cpu_to_be32(copy_len); if (partial_copy) logop_head->oh_flags|= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS); partial_copy_len = partial_copy = 0; } else { /* partial write */ copy_len = iclog->ic_size - log_offset; - INT_SET(logop_head->oh_len, ARCH_CONVERT, copy_len); + logop_head->oh_len = cpu_to_be32(copy_len); logop_head->oh_flags |= XLOG_CONTINUE_TRANS; if (partial_copy) logop_head->oh_flags |= XLOG_WAS_CONT_TRANS; @@ -1992,7 +1997,8 @@ xlog_state_clean_log(xlog_t *log) * We don't need to cover the dummy. */ if (!changed && - (INT_GET(iclog->ic_header.h_num_logops, ARCH_CONVERT) == XLOG_COVER_OPS)) { + (be32_to_cpu(iclog->ic_header.h_num_logops) == + XLOG_COVER_OPS)) { changed = 1; } else { /* @@ -2060,7 +2066,7 @@ xlog_get_lowest_lsn( lowest_lsn = 0; do { if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) { - lsn = INT_GET(lsn_log->ic_header.h_lsn, ARCH_CONVERT); + lsn = be64_to_cpu(lsn_log->ic_header.h_lsn); if ((lsn && !lowest_lsn) || (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) { lowest_lsn = lsn; @@ -2089,9 +2095,8 @@ xlog_state_do_callback( int funcdidcallbacks; /* flag: function did callbacks */ int repeats; /* for issuing console warnings if * looping too many times */ - SPLDECL(s); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); first_iclog = iclog = log->l_iclog; ioerrors = 0; funcdidcallbacks = 0; @@ -2136,7 +2141,7 @@ xlog_state_do_callback( * to DO_CALLBACK, we will not process it when * we retry since a previous iclog is in the * CALLBACK and the state cannot change since - * we are holding the LOG_LOCK. + * we are holding the l_icloglock. */ if (!(iclog->ic_state & (XLOG_STATE_DONE_SYNC | @@ -2162,11 +2167,9 @@ xlog_state_do_callback( */ lowest_lsn = xlog_get_lowest_lsn(log); - if (lowest_lsn && ( - XFS_LSN_CMP( - lowest_lsn, - INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) - )<0)) { + if (lowest_lsn && + XFS_LSN_CMP(lowest_lsn, + be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { iclog = iclog->ic_next; continue; /* Leave this iclog for * another thread */ @@ -2174,19 +2177,18 @@ xlog_state_do_callback( iclog->ic_state = XLOG_STATE_CALLBACK; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); /* l_last_sync_lsn field protected by - * GRANT_LOCK. Don't worry about iclog's lsn. + * l_grant_lock. Don't worry about iclog's lsn. * No one else can be here except us. */ - s = GRANT_LOCK(log); - ASSERT(XFS_LSN_CMP( - log->l_last_sync_lsn, - INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) - )<=0); - log->l_last_sync_lsn = INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT); - GRANT_UNLOCK(log, s); + spin_lock(&log->l_grant_lock); + ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn, + be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); + log->l_last_sync_lsn = + be64_to_cpu(iclog->ic_header.h_lsn); + spin_unlock(&log->l_grant_lock); /* * Keep processing entries in the callback list @@ -2195,7 +2197,7 @@ xlog_state_do_callback( * empty and change the state to DIRTY so that * we don't miss any more callbacks being added. */ - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); } else { ioerrors++; } @@ -2204,14 +2206,14 @@ xlog_state_do_callback( while (cb) { iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_callback = NULL; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); /* perform callbacks in the order given */ for (; cb; cb = cb_next) { cb_next = cb->cb_next; cb->cb_func(cb->cb_arg, aborted); } - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); cb = iclog->ic_callback; } @@ -2258,7 +2260,7 @@ xlog_state_do_callback( * * SYNCING - i/o completion will go through logs * DONE_SYNC - interrupt thread should be waiting for - * LOG_LOCK + * l_icloglock * IOERROR - give up hope all ye who enter here */ if (iclog->ic_state == XLOG_STATE_WANT_SYNC || @@ -2276,7 +2278,7 @@ xlog_state_do_callback( flushcnt = log->l_flushcnt; log->l_flushcnt = 0; } - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); while (flushcnt--) vsema(&log->l_flushsema); } /* xlog_state_do_callback */ @@ -2296,15 +2298,14 @@ xlog_state_do_callback( * global state machine log lock. Assume that the calls to cvsema won't * take a long time. At least we know it won't sleep. */ -void +STATIC void xlog_state_done_syncing( xlog_in_core_t *iclog, int aborted) { xlog_t *log = iclog->ic_log; - SPLDECL(s); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || iclog->ic_state == XLOG_STATE_IOERROR); @@ -2320,7 +2321,7 @@ xlog_state_done_syncing( */ if (iclog->ic_state != XLOG_STATE_IOERROR) { if (--iclog->ic_bwritecnt == 1) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return; } iclog->ic_state = XLOG_STATE_DONE_SYNC; @@ -2332,7 +2333,7 @@ xlog_state_done_syncing( * I/O, the others get to wait for the result. */ sv_broadcast(&iclog->ic_writesema); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ } /* xlog_state_done_syncing */ @@ -2357,7 +2358,7 @@ xlog_state_done_syncing( * needs to be incremented, depending on the amount of data which * is copied. */ -int +STATIC int xlog_state_get_iclog_space(xlog_t *log, int len, xlog_in_core_t **iclogp, @@ -2365,23 +2366,22 @@ xlog_state_get_iclog_space(xlog_t *log, int *continued_write, int *logoffsetp) { - SPLDECL(s); int log_offset; xlog_rec_header_t *head; xlog_in_core_t *iclog; int error; restart: - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (XLOG_FORCED_SHUTDOWN(log)) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } iclog = log->l_iclog; if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) { log->l_flushcnt++; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH); XFS_STATS_INC(xs_log_noiclogs); /* Ensure that log writes happen */ @@ -2404,8 +2404,9 @@ restart: xlog_tic_add_region(ticket, log->l_iclog_hsize, XLOG_REG_TYPE_LRHEADER); - INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle); - ASSIGN_LSN(head->h_lsn, log); + head->h_cycle = cpu_to_be32(log->l_curr_cycle); + head->h_lsn = cpu_to_be64( + xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block)); ASSERT(log->l_curr_block >= 0); } @@ -2423,12 +2424,12 @@ restart: /* If I'm the only one writing to this iclog, sync it to disk */ if (iclog->ic_refcnt == 1) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); if ((error = xlog_state_release_iclog(log, iclog))) return error; } else { iclog->ic_refcnt--; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } goto restart; } @@ -2449,7 +2450,7 @@ restart: *iclogp = iclog; ASSERT(iclog->ic_offset <= iclog->ic_size); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); *logoffsetp = log_offset; return 0; @@ -2467,7 +2468,6 @@ xlog_grant_log_space(xlog_t *log, { int free_bytes; int need_bytes; - SPLDECL(s); #ifdef DEBUG xfs_lsn_t tail_lsn; #endif @@ -2479,7 +2479,7 @@ xlog_grant_log_space(xlog_t *log, #endif /* Is there space or do we need to sleep? */ - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_trace_loggrant(log, tic, "xlog_grant_log_space: enter"); /* something is already sleeping; insert new transaction at end */ @@ -2502,7 +2502,7 @@ xlog_grant_log_space(xlog_t *log, */ xlog_trace_loggrant(log, tic, "xlog_grant_log_space: wake 1"); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); } if (tic->t_flags & XFS_LOG_PERM_RESERV) need_bytes = tic->t_unit_res*tic->t_ocnt; @@ -2524,14 +2524,14 @@ redo: sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); if (XLOG_FORCED_SHUTDOWN(log)) { - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto error_return; } xlog_trace_loggrant(log, tic, "xlog_grant_log_space: wake 2"); xlog_grant_push_ail(log->l_mp, need_bytes); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto redo; } else if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_reserve_headq, tic); @@ -2553,7 +2553,7 @@ redo: #endif xlog_trace_loggrant(log, tic, "xlog_grant_log_space: exit"); xlog_verify_grant_head(log, 1); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return 0; error_return: @@ -2567,7 +2567,7 @@ redo: */ tic->t_curr_res = 0; tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return XFS_ERROR(EIO); } /* xlog_grant_log_space */ @@ -2581,7 +2581,6 @@ STATIC int xlog_regrant_write_log_space(xlog_t *log, xlog_ticket_t *tic) { - SPLDECL(s); int free_bytes, need_bytes; xlog_ticket_t *ntic; #ifdef DEBUG @@ -2599,7 +2598,7 @@ xlog_regrant_write_log_space(xlog_t *log, panic("regrant Recovery problem"); #endif - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: enter"); if (XLOG_FORCED_SHUTDOWN(log)) @@ -2638,14 +2637,14 @@ xlog_regrant_write_log_space(xlog_t *log, /* If we're shutting down, this tic is already * off the queue */ if (XLOG_FORCED_SHUTDOWN(log)) { - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto error_return; } xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: wake 1"); xlog_grant_push_ail(log->l_mp, tic->t_unit_res); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); } } @@ -2665,14 +2664,14 @@ redo: /* If we're shutting down, this tic is already off the queue */ if (XLOG_FORCED_SHUTDOWN(log)) { - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto error_return; } xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: wake 2"); xlog_grant_push_ail(log->l_mp, need_bytes); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto redo; } else if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_write_headq, tic); @@ -2689,7 +2688,7 @@ redo: xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: exit"); xlog_verify_grant_head(log, 1); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return 0; @@ -2704,7 +2703,7 @@ redo: */ tic->t_curr_res = 0; tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return XFS_ERROR(EIO); } /* xlog_regrant_write_log_space */ @@ -2720,14 +2719,12 @@ STATIC void xlog_regrant_reserve_log_space(xlog_t *log, xlog_ticket_t *ticket) { - SPLDECL(s); - xlog_trace_loggrant(log, ticket, "xlog_regrant_reserve_log_space: enter"); if (ticket->t_cnt > 0) ticket->t_cnt--; - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_grant_sub_space(log, ticket->t_curr_res); ticket->t_curr_res = ticket->t_unit_res; xlog_tic_reset_res(ticket); @@ -2737,7 +2734,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, /* just return if we still have some of the pre-reserved space */ if (ticket->t_cnt > 0) { - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return; } @@ -2745,7 +2742,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, xlog_trace_loggrant(log, ticket, "xlog_regrant_reserve_log_space: exit"); xlog_verify_grant_head(log, 0); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); ticket->t_curr_res = ticket->t_unit_res; xlog_tic_reset_res(ticket); } /* xlog_regrant_reserve_log_space */ @@ -2769,12 +2766,10 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, xlog_ticket_t *ticket) { - SPLDECL(s); - if (ticket->t_cnt > 0) ticket->t_cnt--; - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter"); xlog_grant_sub_space(log, ticket->t_curr_res); @@ -2791,7 +2786,7 @@ xlog_ungrant_log_space(xlog_t *log, xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit"); xlog_verify_grant_head(log, 1); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); xfs_log_move_tail(log->l_mp, 1); } /* xlog_ungrant_log_space */ @@ -2799,15 +2794,13 @@ xlog_ungrant_log_space(xlog_t *log, /* * Atomically put back used ticket. */ -void +STATIC void xlog_state_put_ticket(xlog_t *log, xlog_ticket_t *tic) { - unsigned long s; - - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); xlog_ticket_put(log, tic); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } /* xlog_state_put_ticket */ /* @@ -2819,19 +2812,18 @@ xlog_state_put_ticket(xlog_t *log, * * */ -int +STATIC int xlog_state_release_iclog(xlog_t *log, xlog_in_core_t *iclog) { - SPLDECL(s); int sync = 0; /* do we sync? */ xlog_assign_tail_lsn(log->l_mp); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } @@ -2843,12 +2835,12 @@ xlog_state_release_iclog(xlog_t *log, iclog->ic_state == XLOG_STATE_WANT_SYNC) { sync++; iclog->ic_state = XLOG_STATE_SYNCING; - INT_SET(iclog->ic_header.h_tail_lsn, ARCH_CONVERT, log->l_tail_lsn); + iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); /* cycle incremented when incrementing curr_block */ } - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); /* * We let the log lock go, so it's possible that we hit a log I/O @@ -2881,7 +2873,7 @@ xlog_state_switch_iclogs(xlog_t *log, if (!eventual_size) eventual_size = iclog->ic_offset; iclog->ic_state = XLOG_STATE_WANT_SYNC; - INT_SET(iclog->ic_header.h_prev_block, ARCH_CONVERT, log->l_prev_block); + iclog->ic_header.h_prev_block = cpu_to_be32(log->l_prev_block); log->l_prev_block = log->l_curr_block; log->l_prev_cycle = log->l_curr_cycle; @@ -2939,13 +2931,12 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) { xlog_in_core_t *iclog; xfs_lsn_t lsn; - SPLDECL(s); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } @@ -2978,15 +2969,15 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) * the previous sync. */ iclog->ic_refcnt++; - lsn = INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT); + lsn = be64_to_cpu(iclog->ic_header.h_lsn); xlog_state_switch_iclogs(log, iclog, 0); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) return XFS_ERROR(EIO); *log_flushed = 1; - s = LOG_LOCK(log); - if (INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) == lsn && + spin_lock(&log->l_icloglock); + if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && iclog->ic_state != XLOG_STATE_DIRTY) goto maybe_sleep; else @@ -3011,12 +3002,12 @@ maybe_sleep: if (flags & XFS_LOG_SYNC) { /* * We must check if we're shutting down here, before - * we wait, while we're holding the LOG_LOCK. + * we wait, while we're holding the l_icloglock. * Then we check again after waking up, in case our * sleep was disturbed by a bad news. */ if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); @@ -3033,7 +3024,7 @@ maybe_sleep: } else { no_sleep: - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } return 0; } /* xlog_state_sync_all */ @@ -3051,7 +3042,7 @@ no_sleep: * If filesystem activity goes to zero, the iclog will get flushed only by * bdflush(). */ -int +STATIC int xlog_state_sync(xlog_t *log, xfs_lsn_t lsn, uint flags, @@ -3059,26 +3050,24 @@ xlog_state_sync(xlog_t *log, { xlog_in_core_t *iclog; int already_slept = 0; - SPLDECL(s); - try_again: - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } do { - if (INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) != lsn) { - iclog = iclog->ic_next; - continue; + if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { + iclog = iclog->ic_next; + continue; } if (iclog->ic_state == XLOG_STATE_DIRTY) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return 0; } @@ -3113,11 +3102,11 @@ try_again: } else { iclog->ic_refcnt++; xlog_state_switch_iclogs(log, iclog, 0); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) return XFS_ERROR(EIO); *log_flushed = 1; - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); } } @@ -3129,7 +3118,7 @@ try_again: * gotten a log write error. */ if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); @@ -3143,13 +3132,13 @@ try_again: return XFS_ERROR(EIO); *log_flushed = 1; } else { /* just return */ - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } return 0; } while (iclog != log->l_iclog); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return 0; } /* xlog_state_sync */ @@ -3158,12 +3147,10 @@ try_again: * Called when we want to mark the current iclog as being ready to sync to * disk. */ -void +STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) { - SPLDECL(s); - - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (iclog->ic_state == XLOG_STATE_ACTIVE) { xlog_state_switch_iclogs(log, iclog, 0); @@ -3172,7 +3159,7 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR)); } - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } /* xlog_state_want_sync */ @@ -3193,16 +3180,15 @@ xlog_state_ticket_alloc(xlog_t *log) xlog_ticket_t *t_list; xlog_ticket_t *next; xfs_caddr_t buf; - uint i = (NBPP / sizeof(xlog_ticket_t)) - 2; - SPLDECL(s); + uint i = (PAGE_SIZE / sizeof(xlog_ticket_t)) - 2; /* * The kmem_zalloc may sleep, so we shouldn't be holding the * global lock. XXXmiken: may want to use zone allocator. */ - buf = (xfs_caddr_t) kmem_zalloc(NBPP, KM_SLEEP); + buf = (xfs_caddr_t) kmem_zalloc(PAGE_SIZE, KM_SLEEP); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); /* Attach 1st ticket to Q, so we can keep track of allocated memory */ t_list = (xlog_ticket_t *)buf; @@ -3231,7 +3217,7 @@ xlog_state_ticket_alloc(xlog_t *log) } t_list->t_next = NULL; log->l_tail = t_list; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } /* xlog_state_ticket_alloc */ @@ -3273,7 +3259,7 @@ xlog_ticket_put(xlog_t *log, /* * Grab ticket off freelist or allocation some more */ -xlog_ticket_t * +STATIC xlog_ticket_t * xlog_ticket_get(xlog_t *log, int unit_bytes, int cnt, @@ -3282,15 +3268,14 @@ xlog_ticket_get(xlog_t *log, { xlog_ticket_t *tic; uint num_headers; - SPLDECL(s); alloc: if (log->l_freelist == NULL) xlog_state_ticket_alloc(log); /* potentially sleep */ - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (log->l_freelist == NULL) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); goto alloc; } tic = log->l_freelist; @@ -3298,7 +3283,7 @@ xlog_ticket_get(xlog_t *log, if (log->l_freelist == NULL) log->l_tail = NULL; log->l_ticket_cnt--; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); /* * Permanent reservations have up to 'cnt'-1 active log operations @@ -3473,10 +3458,9 @@ xlog_verify_iclog(xlog_t *log, __uint8_t clientid; int len, i, j, k, op_len; int idx; - SPLDECL(s); /* check validity of iclog pointers */ - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); icptr = log->l_iclog; for (i=0; i < log->l_iclog_bufs; i++) { if (icptr == NULL) @@ -3485,21 +3469,21 @@ xlog_verify_iclog(xlog_t *log, } if (icptr != log->l_iclog) xlog_panic("xlog_verify_iclog: corrupt iclog ring"); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); /* check log magic numbers */ - ptr = (xfs_caddr_t) &(iclog->ic_header); - if (INT_GET(*(uint *)ptr, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) + if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) xlog_panic("xlog_verify_iclog: invalid magic num"); - for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&(iclog->ic_header))+count; + ptr = (xfs_caddr_t) &iclog->ic_header; + for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; ptr += BBSIZE) { - if (INT_GET(*(uint *)ptr, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) + if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) xlog_panic("xlog_verify_iclog: unexpected magic num"); } /* check fields */ - len = INT_GET(iclog->ic_header.h_num_logops, ARCH_CONVERT); + len = be32_to_cpu(iclog->ic_header.h_num_logops); ptr = iclog->ic_datap; base_ptr = ptr; ophead = (xlog_op_header_t *)ptr; @@ -3517,9 +3501,11 @@ xlog_verify_iclog(xlog_t *log, if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); - clientid = GET_CLIENT_ID(xhdr[j].hic_xheader.xh_cycle_data[k], ARCH_CONVERT); + clientid = xlog_get_client_id( + xhdr[j].hic_xheader.xh_cycle_data[k]); } else { - clientid = GET_CLIENT_ID(iclog->ic_header.h_cycle_data[idx], ARCH_CONVERT); + clientid = xlog_get_client_id( + iclog->ic_header.h_cycle_data[idx]); } } if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) @@ -3531,16 +3517,16 @@ xlog_verify_iclog(xlog_t *log, field_offset = (__psint_t) ((xfs_caddr_t)&(ophead->oh_len) - base_ptr); if (syncing == B_FALSE || (field_offset & 0x1ff)) { - op_len = INT_GET(ophead->oh_len, ARCH_CONVERT); + op_len = be32_to_cpu(ophead->oh_len); } else { idx = BTOBBT((__psint_t)&ophead->oh_len - (__psint_t)iclog->ic_datap); if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); - op_len = INT_GET(xhdr[j].hic_xheader.xh_cycle_data[k], ARCH_CONVERT); + op_len = be32_to_cpu(xhdr[j].hic_xheader.xh_cycle_data[k]); } else { - op_len = INT_GET(iclog->ic_header.h_cycle_data[idx], ARCH_CONVERT); + op_len = be32_to_cpu(iclog->ic_header.h_cycle_data[idx]); } } ptr += sizeof(xlog_op_header_t) + op_len; @@ -3549,7 +3535,7 @@ xlog_verify_iclog(xlog_t *log, #endif /* - * Mark all iclogs IOERROR. LOG_LOCK is held by the caller. + * Mark all iclogs IOERROR. l_icloglock is held by the caller. */ STATIC int xlog_state_ioerror( @@ -3597,8 +3583,6 @@ xfs_log_force_umount( xlog_t *log; int retval; int dummy; - SPLDECL(s); - SPLDECL(s2); log = mp->m_log; @@ -3627,8 +3611,8 @@ xfs_log_force_umount( * before we mark the filesystem SHUTDOWN and wake * everybody up to tell the bad news. */ - s = GRANT_LOCK(log); - s2 = LOG_LOCK(log); + spin_lock(&log->l_grant_lock); + spin_lock(&log->l_icloglock); mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; XFS_BUF_DONE(mp->m_sb_bp); /* @@ -3644,7 +3628,7 @@ xfs_log_force_umount( */ if (logerror) retval = xlog_state_ioerror(log); - LOG_UNLOCK(log, s2); + spin_unlock(&log->l_icloglock); /* * We don't want anybody waiting for log reservations @@ -3667,7 +3651,7 @@ xfs_log_force_umount( tic = tic->t_next; } while (tic != log->l_write_headq); } - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { ASSERT(!logerror); @@ -3676,9 +3660,9 @@ xfs_log_force_umount( * log down completely. */ xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); - s2 = LOG_LOCK(log); + spin_lock(&log->l_icloglock); retval = xlog_state_ioerror(log); - LOG_UNLOCK(log, s2); + spin_unlock(&log->l_icloglock); } /* * Wake up everybody waiting on xfs_log_force. @@ -3691,13 +3675,13 @@ xfs_log_force_umount( { xlog_in_core_t *iclog; - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; do { ASSERT(iclog->ic_callback == 0); iclog = iclog->ic_next; } while (iclog != log->l_iclog); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } #endif /* return non-zero if log IOERROR transition had already happened */ diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index ebbe93f4f97..4cdac048df5 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -22,8 +22,9 @@ #define CYCLE_LSN(lsn) ((uint)((lsn)>>32)) #define BLOCK_LSN(lsn) ((uint)(lsn)) + /* this is used in a spot where we might otherwise double-endian-flip */ -#define CYCLE_LSN_DISK(lsn) (((uint *)&(lsn))[0]) +#define CYCLE_LSN_DISK(lsn) (((__be32 *)&(lsn))[0]) #ifdef __KERNEL__ /* diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 752f964b369..e008233ee24 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -55,32 +55,21 @@ struct xfs_mount; BTOBB(XLOG_MAX_ICLOGS << (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? \ XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) -/* - * set lsns - */ -#define ASSIGN_ANY_LSN_HOST(lsn,cycle,block) \ - { \ - (lsn) = ((xfs_lsn_t)(cycle)<<32)|(block); \ - } -#define ASSIGN_ANY_LSN_DISK(lsn,cycle,block) \ - { \ - INT_SET(((uint *)&(lsn))[0], ARCH_CONVERT, (cycle)); \ - INT_SET(((uint *)&(lsn))[1], ARCH_CONVERT, (block)); \ - } -#define ASSIGN_LSN(lsn,log) \ - ASSIGN_ANY_LSN_DISK(lsn,(log)->l_curr_cycle,(log)->l_curr_block); - -#define XLOG_SET(f,b) (((f) & (b)) == (b)) - -#define GET_CYCLE(ptr, arch) \ - (INT_GET(*(uint *)(ptr), arch) == XLOG_HEADER_MAGIC_NUM ? \ - INT_GET(*((uint *)(ptr)+1), arch) : \ - INT_GET(*(uint *)(ptr), arch) \ - ) +static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) +{ + return ((xfs_lsn_t)cycle << 32) | block; +} -#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) +static inline uint xlog_get_cycle(char *ptr) +{ + if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) + return be32_to_cpu(*((__be32 *)ptr + 1)); + else + return be32_to_cpu(*(__be32 *)ptr); +} +#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) #ifdef __KERNEL__ @@ -96,19 +85,10 @@ struct xfs_mount; * * this has endian issues, of course. */ - -#ifndef XFS_NATIVE_HOST -#define GET_CLIENT_ID(i,arch) \ - ((i) & 0xff) -#else -#define GET_CLIENT_ID(i,arch) \ - ((i) >> 24) -#endif - -#define GRANT_LOCK(log) mutex_spinlock(&(log)->l_grant_lock) -#define GRANT_UNLOCK(log, s) mutex_spinunlock(&(log)->l_grant_lock, s) -#define LOG_LOCK(log) mutex_spinlock(&(log)->l_icloglock) -#define LOG_UNLOCK(log, s) mutex_spinunlock(&(log)->l_icloglock, s) +static inline uint xlog_get_client_id(__be32 i) +{ + return be32_to_cpu(i) >> 24; +} #define xlog_panic(args...) cmn_err(CE_PANIC, ## args) #define xlog_exit(args...) cmn_err(CE_PANIC, ## args) @@ -285,11 +265,11 @@ typedef struct xlog_ticket { typedef struct xlog_op_header { - xlog_tid_t oh_tid; /* transaction id of operation : 4 b */ - int oh_len; /* bytes in data region : 4 b */ - __uint8_t oh_clientid; /* who sent me this : 1 b */ - __uint8_t oh_flags; /* : 1 b */ - ushort oh_res2; /* 32 bit align : 2 b */ + __be32 oh_tid; /* transaction id of operation : 4 b */ + __be32 oh_len; /* bytes in data region : 4 b */ + __u8 oh_clientid; /* who sent me this : 1 b */ + __u8 oh_flags; /* : 1 b */ + __u16 oh_res2; /* 32 bit align : 2 b */ } xlog_op_header_t; @@ -307,25 +287,25 @@ typedef struct xlog_op_header { #endif typedef struct xlog_rec_header { - uint h_magicno; /* log record (LR) identifier : 4 */ - uint h_cycle; /* write cycle of log : 4 */ - int h_version; /* LR version : 4 */ - int h_len; /* len in bytes; should be 64-bit aligned: 4 */ - xfs_lsn_t h_lsn; /* lsn of this LR : 8 */ - xfs_lsn_t h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */ - uint h_chksum; /* may not be used; non-zero if used : 4 */ - int h_prev_block; /* block number to previous LR : 4 */ - int h_num_logops; /* number of log operations in this LR : 4 */ - uint h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; + __be32 h_magicno; /* log record (LR) identifier : 4 */ + __be32 h_cycle; /* write cycle of log : 4 */ + __be32 h_version; /* LR version : 4 */ + __be32 h_len; /* len in bytes; should be 64-bit aligned: 4 */ + __be64 h_lsn; /* lsn of this LR : 8 */ + __be64 h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */ + __be32 h_chksum; /* may not be used; non-zero if used : 4 */ + __be32 h_prev_block; /* block number to previous LR : 4 */ + __be32 h_num_logops; /* number of log operations in this LR : 4 */ + __be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* new fields */ - int h_fmt; /* format of log record : 4 */ - uuid_t h_fs_uuid; /* uuid of FS : 16 */ - int h_size; /* iclog size : 4 */ + __be32 h_fmt; /* format of log record : 4 */ + uuid_t h_fs_uuid; /* uuid of FS : 16 */ + __be32 h_size; /* iclog size : 4 */ } xlog_rec_header_t; typedef struct xlog_rec_ext_header { - uint xh_cycle; /* write cycle of log : 4 */ - uint xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */ + __be32 xh_cycle; /* write cycle of log : 4 */ + __be32 xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */ } xlog_rec_ext_header_t; #ifdef __KERNEL__ @@ -415,7 +395,7 @@ typedef struct log { xlog_ticket_t *l_unmount_free;/* kmem_free these addresses */ xlog_ticket_t *l_tail; /* free list of tickets */ xlog_in_core_t *l_iclog; /* head log queue */ - lock_t l_icloglock; /* grab to change iclog state */ + spinlock_t l_icloglock; /* grab to change iclog state */ xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed * buffers */ xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ @@ -439,7 +419,7 @@ typedef struct log { char *l_iclog_bak[XLOG_MAX_ICLOGS]; /* The following block of fields are changed while holding grant_lock */ - lock_t l_grant_lock; + spinlock_t l_grant_lock; xlog_ticket_t *l_reserve_headq; xlog_ticket_t *l_write_headq; int l_grant_reserve_cycle; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 851eca8a715..b82d5d4d246 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -198,7 +198,7 @@ xlog_header_check_dump( cmn_err(CE_DEBUG, " log : uuid = "); for (b = 0; b < 16; b++) cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]); - cmn_err(CE_DEBUG, ", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT)); + cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt)); } #else #define xlog_header_check_dump(mp, head) @@ -212,14 +212,14 @@ xlog_header_check_recover( xfs_mount_t *mp, xlog_rec_header_t *head) { - ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); + ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); /* * IRIX doesn't write the h_fmt field and leaves it zeroed * (XLOG_FMT_UNKNOWN). This stops us from trying to recover * a dirty log created in IRIX. */ - if (unlikely(INT_GET(head->h_fmt, ARCH_CONVERT) != XLOG_FMT)) { + if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { xlog_warn( "XFS: dirty log written in incompatible format - can't recover"); xlog_header_check_dump(mp, head); @@ -245,7 +245,7 @@ xlog_header_check_mount( xfs_mount_t *mp, xlog_rec_header_t *head) { - ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); + ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); if (uuid_is_nil(&head->h_fs_uuid)) { /* @@ -293,7 +293,7 @@ xlog_recover_iodone( * Note that the algorithm can not be perfect because the disk will not * necessarily be perfect. */ -int +STATIC int xlog_find_cycle_start( xlog_t *log, xfs_buf_t *bp, @@ -311,7 +311,7 @@ xlog_find_cycle_start( if ((error = xlog_bread(log, mid_blk, 1, bp))) return error; offset = xlog_align(log, mid_blk, 1, bp); - mid_cycle = GET_CYCLE(offset, ARCH_CONVERT); + mid_cycle = xlog_get_cycle(offset); if (mid_cycle == cycle) { *last_blk = mid_blk; /* last_half_cycle == mid_cycle */ @@ -371,7 +371,7 @@ xlog_find_verify_cycle( buf = xlog_align(log, i, bcount, bp); for (j = 0; j < bcount; j++) { - cycle = GET_CYCLE(buf, ARCH_CONVERT); + cycle = xlog_get_cycle(buf); if (cycle == stop_on_cycle_no) { *new_blk = i+j; goto out; @@ -447,8 +447,7 @@ xlog_find_verify_log_record( head = (xlog_rec_header_t *)offset; - if (XLOG_HEADER_MAGIC_NUM == - INT_GET(head->h_magicno, ARCH_CONVERT)) + if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno)) break; if (!smallmem) @@ -480,7 +479,7 @@ xlog_find_verify_log_record( * record do we update last_blk. */ if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { - uint h_size = INT_GET(head->h_size, ARCH_CONVERT); + uint h_size = be32_to_cpu(head->h_size); xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; if (h_size % XLOG_HEADER_CYCLE_SIZE) @@ -489,8 +488,8 @@ xlog_find_verify_log_record( xhdrs = 1; } - if (*last_blk - i + extra_bblks - != BTOBB(INT_GET(head->h_len, ARCH_CONVERT)) + xhdrs) + if (*last_blk - i + extra_bblks != + BTOBB(be32_to_cpu(head->h_len)) + xhdrs) *last_blk = i; out: @@ -550,13 +549,13 @@ xlog_find_head( if ((error = xlog_bread(log, 0, 1, bp))) goto bp_err; offset = xlog_align(log, 0, 1, bp); - first_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); + first_half_cycle = xlog_get_cycle(offset); last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ if ((error = xlog_bread(log, last_blk, 1, bp))) goto bp_err; offset = xlog_align(log, last_blk, 1, bp); - last_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); + last_half_cycle = xlog_get_cycle(offset); ASSERT(last_half_cycle != 0); /* @@ -808,7 +807,7 @@ xlog_find_tail( if ((error = xlog_bread(log, 0, 1, bp))) goto bread_err; offset = xlog_align(log, 0, 1, bp); - if (GET_CYCLE(offset, ARCH_CONVERT) == 0) { + if (xlog_get_cycle(offset) == 0) { *tail_blk = 0; /* leave all other log inited values alone */ goto exit; @@ -823,8 +822,7 @@ xlog_find_tail( if ((error = xlog_bread(log, i, 1, bp))) goto bread_err; offset = xlog_align(log, i, 1, bp); - if (XLOG_HEADER_MAGIC_NUM == - INT_GET(*(uint *)offset, ARCH_CONVERT)) { + if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { found = 1; break; } @@ -841,7 +839,7 @@ xlog_find_tail( goto bread_err; offset = xlog_align(log, i, 1, bp); if (XLOG_HEADER_MAGIC_NUM == - INT_GET(*(uint*)offset, ARCH_CONVERT)) { + be32_to_cpu(*(__be32 *)offset)) { found = 2; break; } @@ -855,7 +853,7 @@ xlog_find_tail( /* find blk_no of tail of log */ rhead = (xlog_rec_header_t *)offset; - *tail_blk = BLOCK_LSN(INT_GET(rhead->h_tail_lsn, ARCH_CONVERT)); + *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); /* * Reset log values according to the state of the log when we @@ -869,11 +867,11 @@ xlog_find_tail( */ log->l_prev_block = i; log->l_curr_block = (int)*head_blk; - log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT); + log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); if (found == 2) log->l_curr_cycle++; - log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT); - log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT); + log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); + log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); log->l_grant_reserve_cycle = log->l_curr_cycle; log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); log->l_grant_write_cycle = log->l_curr_cycle; @@ -891,8 +889,8 @@ xlog_find_tail( * unmount record rather than the block after it. */ if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { - int h_size = INT_GET(rhead->h_size, ARCH_CONVERT); - int h_version = INT_GET(rhead->h_version, ARCH_CONVERT); + int h_size = be32_to_cpu(rhead->h_size); + int h_version = be32_to_cpu(rhead->h_version); if ((h_version & XLOG_VERSION_2) && (h_size > XLOG_HEADER_CYCLE_SIZE)) { @@ -906,10 +904,10 @@ xlog_find_tail( hblks = 1; } after_umount_blk = (i + hblks + (int) - BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize; + BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; tail_lsn = log->l_tail_lsn; if (*head_blk == after_umount_blk && - INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) { + be32_to_cpu(rhead->h_num_logops) == 1) { umount_data_blk = (i + hblks) % log->l_logBBsize; if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { goto bread_err; @@ -922,10 +920,12 @@ xlog_find_tail( * log records will point recovery to after the * current unmount record. */ - ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, log->l_curr_cycle, - after_umount_blk); - ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle, - after_umount_blk); + log->l_tail_lsn = + xlog_assign_lsn(log->l_curr_cycle, + after_umount_blk); + log->l_last_sync_lsn = + xlog_assign_lsn(log->l_curr_cycle, + after_umount_blk); *tail_blk = after_umount_blk; /* @@ -986,7 +986,7 @@ exit: * -1 => use *blk_no as the first block of the log * >0 => error has occurred */ -int +STATIC int xlog_find_zeroed( xlog_t *log, xfs_daddr_t *blk_no) @@ -1007,7 +1007,7 @@ xlog_find_zeroed( if ((error = xlog_bread(log, 0, 1, bp))) goto bp_err; offset = xlog_align(log, 0, 1, bp); - first_cycle = GET_CYCLE(offset, ARCH_CONVERT); + first_cycle = xlog_get_cycle(offset); if (first_cycle == 0) { /* completely zeroed log */ *blk_no = 0; xlog_put_bp(bp); @@ -1018,7 +1018,7 @@ xlog_find_zeroed( if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) goto bp_err; offset = xlog_align(log, log_bbnum-1, 1, bp); - last_cycle = GET_CYCLE(offset, ARCH_CONVERT); + last_cycle = xlog_get_cycle(offset); if (last_cycle != 0) { /* log completely written to */ xlog_put_bp(bp); return 0; @@ -1098,13 +1098,13 @@ xlog_add_record( xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; memset(buf, 0, BBSIZE); - INT_SET(recp->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM); - INT_SET(recp->h_cycle, ARCH_CONVERT, cycle); - INT_SET(recp->h_version, ARCH_CONVERT, + recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); + recp->h_cycle = cpu_to_be32(cycle); + recp->h_version = cpu_to_be32( XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1); - ASSIGN_ANY_LSN_DISK(recp->h_lsn, cycle, block); - ASSIGN_ANY_LSN_DISK(recp->h_tail_lsn, tail_cycle, tail_block); - INT_SET(recp->h_fmt, ARCH_CONVERT, XLOG_FMT); + recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); + recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); + recp->h_fmt = cpu_to_be32(XLOG_FMT); memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); } @@ -2211,7 +2211,7 @@ xlog_recover_do_buffer_trans( * overlap with future reads of those inodes. */ if (XFS_DINODE_MAGIC == - INT_GET(*((__uint16_t *)(xfs_buf_offset(bp, 0))), ARCH_CONVERT) && + be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize, (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { XFS_BUF_STALE(bp); @@ -2581,8 +2581,7 @@ xlog_recover_do_dquot_trans( /* * This type of quotas was turned off, so ignore this record. */ - type = INT_GET(recddq->d_flags, ARCH_CONVERT) & - (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); + type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); ASSERT(type); if (log->l_quotaoffs_flag & type) return (0); @@ -2660,7 +2659,6 @@ xlog_recover_do_efi_trans( xfs_mount_t *mp; xfs_efi_log_item_t *efip; xfs_efi_log_format_t *efi_formatp; - SPLDECL(s); if (pass == XLOG_RECOVER_PASS1) { return 0; @@ -2678,11 +2676,11 @@ xlog_recover_do_efi_trans( efip->efi_next_extent = efi_formatp->efi_nextents; efip->efi_flags |= XFS_EFI_COMMITTED; - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); /* * xfs_trans_update_ail() drops the AIL lock. */ - xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn, s); + xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn); return 0; } @@ -2707,7 +2705,6 @@ xlog_recover_do_efd_trans( xfs_log_item_t *lip; int gen; __uint64_t efi_id; - SPLDECL(s); if (pass == XLOG_RECOVER_PASS1) { return; @@ -2725,7 +2722,7 @@ xlog_recover_do_efd_trans( * in the AIL. */ mp = log->l_mp; - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); lip = xfs_trans_first_ail(mp, &gen); while (lip != NULL) { if (lip->li_type == XFS_LI_EFI) { @@ -2735,22 +2732,14 @@ xlog_recover_do_efd_trans( * xfs_trans_delete_ail() drops the * AIL lock. */ - xfs_trans_delete_ail(mp, lip, s); - break; + xfs_trans_delete_ail(mp, lip); + xfs_efi_item_free(efip); + return; } } lip = xfs_trans_next_ail(mp, lip, &gen, NULL); } - - /* - * If we found it, then free it up. If it wasn't there, it - * must have been overwritten in the log. Oh well. - */ - if (lip != NULL) { - xfs_efi_item_free(efip); - } else { - AIL_UNLOCK(mp, s); - } + spin_unlock(&mp->m_ail_lock); } /* @@ -2897,8 +2886,8 @@ xlog_recover_process_data( unsigned long hash; uint flags; - lp = dp + INT_GET(rhead->h_len, ARCH_CONVERT); - num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT); + lp = dp + be32_to_cpu(rhead->h_len); + num_logops = be32_to_cpu(rhead->h_num_logops); /* check the log format matches our own - else we can't recover */ if (xlog_header_check_recover(log->l_mp, rhead)) @@ -2915,15 +2904,20 @@ xlog_recover_process_data( ASSERT(0); return (XFS_ERROR(EIO)); } - tid = INT_GET(ohead->oh_tid, ARCH_CONVERT); + tid = be32_to_cpu(ohead->oh_tid); hash = XLOG_RHASH(tid); trans = xlog_recover_find_tid(rhash[hash], tid); if (trans == NULL) { /* not found; add new tid */ if (ohead->oh_flags & XLOG_START_TRANS) xlog_recover_new_tid(&rhash[hash], tid, - INT_GET(rhead->h_lsn, ARCH_CONVERT)); + be64_to_cpu(rhead->h_lsn)); } else { - ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp); + if (dp + be32_to_cpu(ohead->oh_len) > lp) { + xlog_warn( + "XFS: xlog_recover_process_data: bad length"); + WARN_ON(1); + return (XFS_ERROR(EIO)); + } flags = ohead->oh_flags & ~XLOG_END_TRANS; if (flags & XLOG_WAS_CONT_TRANS) flags &= ~XLOG_CONTINUE_TRANS; @@ -2937,8 +2931,7 @@ xlog_recover_process_data( break; case XLOG_WAS_CONT_TRANS: error = xlog_recover_add_to_cont_trans(trans, - dp, INT_GET(ohead->oh_len, - ARCH_CONVERT)); + dp, be32_to_cpu(ohead->oh_len)); break; case XLOG_START_TRANS: xlog_warn( @@ -2949,8 +2942,7 @@ xlog_recover_process_data( case 0: case XLOG_CONTINUE_TRANS: error = xlog_recover_add_to_trans(trans, - dp, INT_GET(ohead->oh_len, - ARCH_CONVERT)); + dp, be32_to_cpu(ohead->oh_len)); break; default: xlog_warn( @@ -2962,7 +2954,7 @@ xlog_recover_process_data( if (error) return error; } - dp += INT_GET(ohead->oh_len, ARCH_CONVERT); + dp += be32_to_cpu(ohead->oh_len); num_logops--; } return 0; @@ -3075,10 +3067,9 @@ xlog_recover_process_efis( xfs_efi_log_item_t *efip; int gen; xfs_mount_t *mp; - SPLDECL(s); mp = log->l_mp; - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); lip = xfs_trans_first_ail(mp, &gen); while (lip != NULL) { @@ -3099,12 +3090,12 @@ xlog_recover_process_efis( continue; } - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); xlog_recover_process_efi(mp, efip); - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); lip = xfs_trans_next_ail(mp, lip, &gen, NULL); } - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } /* @@ -3315,16 +3306,16 @@ xlog_pack_data_checksum( int size) { int i; - uint *up; + __be32 *up; uint chksum = 0; - up = (uint *)iclog->ic_datap; + up = (__be32 *)iclog->ic_datap; /* divide length by 4 to get # words */ for (i = 0; i < (size >> 2); i++) { - chksum ^= INT_GET(*up, ARCH_CONVERT); + chksum ^= be32_to_cpu(*up); up++; } - INT_SET(iclog->ic_header.h_chksum, ARCH_CONVERT, chksum); + iclog->ic_header.h_chksum = cpu_to_be32(chksum); } #else #define xlog_pack_data_checksum(log, iclog, size) @@ -3341,7 +3332,7 @@ xlog_pack_data( { int i, j, k; int size = iclog->ic_offset + roundoff; - uint cycle_lsn; + __be32 cycle_lsn; xfs_caddr_t dp; xlog_in_core_2_t *xhdr; @@ -3352,8 +3343,8 @@ xlog_pack_data( dp = iclog->ic_datap; for (i = 0; i < BTOBB(size) && i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { - iclog->ic_header.h_cycle_data[i] = *(uint *)dp; - *(uint *)dp = cycle_lsn; + iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp; + *(__be32 *)dp = cycle_lsn; dp += BBSIZE; } @@ -3362,8 +3353,8 @@ xlog_pack_data( for ( ; i < BTOBB(size); i++) { j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); - xhdr[j].hic_xheader.xh_cycle_data[k] = *(uint *)dp; - *(uint *)dp = cycle_lsn; + xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp; + *(__be32 *)dp = cycle_lsn; dp += BBSIZE; } @@ -3380,21 +3371,21 @@ xlog_unpack_data_checksum( xfs_caddr_t dp, xlog_t *log) { - uint *up = (uint *)dp; + __be32 *up = (__be32 *)dp; uint chksum = 0; int i; /* divide length by 4 to get # words */ - for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) { - chksum ^= INT_GET(*up, ARCH_CONVERT); + for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) { + chksum ^= be32_to_cpu(*up); up++; } - if (chksum != INT_GET(rhead->h_chksum, ARCH_CONVERT)) { + if (chksum != be32_to_cpu(rhead->h_chksum)) { if (rhead->h_chksum || ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { cmn_err(CE_DEBUG, "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n", - INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum); + be32_to_cpu(rhead->h_chksum), chksum); cmn_err(CE_DEBUG, "XFS: Disregard message if filesystem was created with non-DEBUG kernel"); if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { @@ -3418,18 +3409,18 @@ xlog_unpack_data( int i, j, k; xlog_in_core_2_t *xhdr; - for (i = 0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) && + for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { - *(uint *)dp = *(uint *)&rhead->h_cycle_data[i]; + *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i]; dp += BBSIZE; } if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { xhdr = (xlog_in_core_2_t *)rhead; - for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) { + for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); - *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; + *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; dp += BBSIZE; } } @@ -3445,24 +3436,21 @@ xlog_valid_rec_header( { int hlen; - if (unlikely( - (INT_GET(rhead->h_magicno, ARCH_CONVERT) != - XLOG_HEADER_MAGIC_NUM))) { + if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) { XFS_ERROR_REPORT("xlog_valid_rec_header(1)", XFS_ERRLEVEL_LOW, log->l_mp); return XFS_ERROR(EFSCORRUPTED); } if (unlikely( (!rhead->h_version || - (INT_GET(rhead->h_version, ARCH_CONVERT) & - (~XLOG_VERSION_OKBITS)) != 0))) { + (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { xlog_warn("XFS: %s: unrecognised log version (%d).", - __FUNCTION__, INT_GET(rhead->h_version, ARCH_CONVERT)); + __FUNCTION__, be32_to_cpu(rhead->h_version)); return XFS_ERROR(EIO); } /* LR body must have data or it wouldn't have been written */ - hlen = INT_GET(rhead->h_len, ARCH_CONVERT); + hlen = be32_to_cpu(rhead->h_len); if (unlikely( hlen <= 0 || hlen > INT_MAX )) { XFS_ERROR_REPORT("xlog_valid_rec_header(2)", XFS_ERRLEVEL_LOW, log->l_mp); @@ -3522,9 +3510,8 @@ xlog_do_recovery_pass( error = xlog_valid_rec_header(log, rhead, tail_blk); if (error) goto bread_err1; - h_size = INT_GET(rhead->h_size, ARCH_CONVERT); - if ((INT_GET(rhead->h_version, ARCH_CONVERT) - & XLOG_VERSION_2) && + h_size = be32_to_cpu(rhead->h_size); + if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && (h_size > XLOG_HEADER_CYCLE_SIZE)) { hblks = h_size / XLOG_HEADER_CYCLE_SIZE; if (h_size % XLOG_HEADER_CYCLE_SIZE) @@ -3561,7 +3548,7 @@ xlog_do_recovery_pass( goto bread_err2; /* blocks in data section */ - bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); + bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); error = xlog_bread(log, blk_no + hblks, bblks, dbp); if (error) goto bread_err2; @@ -3636,7 +3623,7 @@ xlog_do_recovery_pass( if (error) goto bread_err2; - bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); + bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); blk_no += hblks; /* Read in data for log record */ @@ -3707,7 +3694,7 @@ xlog_do_recovery_pass( error = xlog_valid_rec_header(log, rhead, blk_no); if (error) goto bread_err2; - bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); + bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) goto bread_err2; offset = xlog_align(log, blk_no+hblks, bblks, dbp); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index ebdb76da527..6409b376299 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -136,15 +136,9 @@ xfs_mount_init(void) mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; } - AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail"); - spinlock_init(&mp->m_sb_lock, "xfs_sb"); + spin_lock_init(&mp->m_sb_lock); mutex_init(&mp->m_ilock); mutex_init(&mp->m_growlock); - /* - * Initialize the AIL. - */ - xfs_trans_ail_init(mp); - atomic_set(&mp->m_active_trans, 0); return mp; @@ -171,7 +165,7 @@ xfs_mount_free( sizeof(xfs_perag_t) * mp->m_sb.sb_agcount); } - AIL_LOCK_DESTROY(&mp->m_ail_lock); + spinlock_destroy(&mp->m_ail_lock); spinlock_destroy(&mp->m_sb_lock); mutex_destroy(&mp->m_ilock); mutex_destroy(&mp->m_growlock); @@ -616,7 +610,7 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) int i; mp->m_agfrotor = mp->m_agirotor = 0; - spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock"); + spin_lock_init(&mp->m_agirotor_lock); mp->m_maxagi = mp->m_sb.sb_agcount; mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; @@ -696,7 +690,6 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) uint64_t bfreelst = 0; uint64_t btree = 0; int error; - int s; for (index = 0; index < agcount; index++) { /* @@ -721,11 +714,11 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) /* * Overwrite incore superblock counters with just-read data */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); sbp->sb_ifree = ifree; sbp->sb_icount = ialloc; sbp->sb_fdblocks = bfree + bfreelst + btree; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); /* Fixup the per-cpu counters as well. */ xfs_icsb_reinit_counters(mp); @@ -734,49 +727,13 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) } /* - * xfs_mountfs - * - * This function does the following on an initial mount of a file system: - * - reads the superblock from disk and init the mount struct - * - if we're a 32-bit kernel, do a size check on the superblock - * so we don't mount terabyte filesystems - * - init mount struct realtime fields - * - allocate inode hash table for fs - * - init directory manager - * - perform recovery and init the log manager + * Update alignment values based on mount options and sb values */ -int -xfs_mountfs( - xfs_mount_t *mp, - int mfsi_flags) +STATIC int +xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags) { - xfs_buf_t *bp; xfs_sb_t *sbp = &(mp->m_sb); - xfs_inode_t *rip; - bhv_vnode_t *rvp = NULL; - int readio_log, writeio_log; - xfs_daddr_t d; - __uint64_t resblks; - __int64_t update_flags; - uint quotamount, quotaflags; - int agno; - int uuid_mounted = 0; - int error = 0; - if (mp->m_sb_bp == NULL) { - if ((error = xfs_readsb(mp, mfsi_flags))) { - return error; - } - } - xfs_mount_common(mp, sbp); - - /* - * Check if sb_agblocks is aligned at stripe boundary - * If sb_agblocks is NOT aligned turn off m_dalign since - * allocator alignment is within an ag, therefore ag has - * to be aligned at stripe boundary. - */ - update_flags = 0LL; if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { /* * If stripe unit and stripe width are not multiples @@ -787,8 +744,7 @@ xfs_mountfs( if (mp->m_flags & XFS_MOUNT_RETERR) { cmn_err(CE_WARN, "XFS: alignment check 1 failed"); - error = XFS_ERROR(EINVAL); - goto error1; + return XFS_ERROR(EINVAL); } mp->m_dalign = mp->m_swidth = 0; } else { @@ -798,8 +754,7 @@ xfs_mountfs( mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { if (mp->m_flags & XFS_MOUNT_RETERR) { - error = XFS_ERROR(EINVAL); - goto error1; + return XFS_ERROR(EINVAL); } xfs_fs_cmn_err(CE_WARN, mp, "stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", @@ -816,8 +771,7 @@ xfs_mountfs( "stripe alignment turned off: sunit(%d) less than bsize(%d)", mp->m_dalign, mp->m_blockmask +1); - error = XFS_ERROR(EINVAL); - goto error1; + return XFS_ERROR(EINVAL); } mp->m_swidth = 0; } @@ -830,11 +784,11 @@ xfs_mountfs( if (XFS_SB_VERSION_HASDALIGN(sbp)) { if (sbp->sb_unit != mp->m_dalign) { sbp->sb_unit = mp->m_dalign; - update_flags |= XFS_SB_UNIT; + *update_flags |= XFS_SB_UNIT; } if (sbp->sb_width != mp->m_swidth) { sbp->sb_width = mp->m_swidth; - update_flags |= XFS_SB_WIDTH; + *update_flags |= XFS_SB_WIDTH; } } } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && @@ -843,49 +797,45 @@ xfs_mountfs( mp->m_swidth = sbp->sb_width; } - xfs_alloc_compute_maxlevels(mp); - xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); - xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); - xfs_ialloc_compute_maxlevels(mp); + return 0; +} - if (sbp->sb_imax_pct) { - __uint64_t icount; +/* + * Set the maximum inode count for this filesystem + */ +STATIC void +xfs_set_maxicount(xfs_mount_t *mp) +{ + xfs_sb_t *sbp = &(mp->m_sb); + __uint64_t icount; - /* Make sure the maximum inode count is a multiple of the - * units we allocate inodes in. + if (sbp->sb_imax_pct) { + /* + * Make sure the maximum inode count is a multiple + * of the units we allocate inodes in. */ - icount = sbp->sb_dblocks * sbp->sb_imax_pct; do_div(icount, 100); do_div(icount, mp->m_ialloc_blks); mp->m_maxicount = (icount * mp->m_ialloc_blks) << sbp->sb_inopblog; - } else + } else { mp->m_maxicount = 0; - - mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); - - /* - * XFS uses the uuid from the superblock as the unique - * identifier for fsid. We can not use the uuid from the volume - * since a single partition filesystem is identical to a single - * partition volume/filesystem. - */ - if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && - (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { - if (xfs_uuid_mount(mp)) { - error = XFS_ERROR(EINVAL); - goto error1; - } - uuid_mounted=1; } +} + +/* + * Set the default minimum read and write sizes unless + * already specified in a mount option. + * We use smaller I/O sizes when the file system + * is being used for NFS service (wsync mount option). + */ +STATIC void +xfs_set_rw_sizes(xfs_mount_t *mp) +{ + xfs_sb_t *sbp = &(mp->m_sb); + int readio_log, writeio_log; - /* - * Set the default minimum read and write sizes unless - * already specified in a mount option. - * We use smaller I/O sizes when the file system - * is being used for NFS service (wsync mount option). - */ if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { if (mp->m_flags & XFS_MOUNT_WSYNC) { readio_log = XFS_WSYNC_READIO_LOG; @@ -911,17 +861,14 @@ xfs_mountfs( mp->m_writeio_log = writeio_log; } mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog); +} - /* - * Set the inode cluster size. - * This may still be overridden by the file system - * block size if it is larger than the chosen cluster size. - */ - mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; - - /* - * Set whether we're using inode alignment. - */ +/* + * Set whether we're using inode alignment. + */ +STATIC void +xfs_set_inoalignment(xfs_mount_t *mp) +{ if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) && mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) @@ -937,14 +884,22 @@ xfs_mountfs( mp->m_sinoalign = mp->m_dalign; else mp->m_sinoalign = 0; - /* - * Check that the data (and log if separate) are an ok size. - */ +} + +/* + * Check that the data (and log if separate) are an ok size. + */ +STATIC int +xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) +{ + xfs_buf_t *bp; + xfs_daddr_t d; + int error; + d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { cmn_err(CE_WARN, "XFS: size check 1 failed"); - error = XFS_ERROR(E2BIG); - goto error1; + return XFS_ERROR(E2BIG); } error = xfs_read_buf(mp, mp->m_ddev_targp, d - XFS_FSS_TO_BB(mp, 1), @@ -953,10 +908,9 @@ xfs_mountfs( xfs_buf_relse(bp); } else { cmn_err(CE_WARN, "XFS: size check 2 failed"); - if (error == ENOSPC) { + if (error == ENOSPC) error = XFS_ERROR(E2BIG); - } - goto error1; + return error; } if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && @@ -964,8 +918,7 @@ xfs_mountfs( d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { cmn_err(CE_WARN, "XFS: size check 3 failed"); - error = XFS_ERROR(E2BIG); - goto error1; + return XFS_ERROR(E2BIG); } error = xfs_read_buf(mp, mp->m_logdev_targp, d - XFS_FSB_TO_BB(mp, 1), @@ -974,17 +927,111 @@ xfs_mountfs( xfs_buf_relse(bp); } else { cmn_err(CE_WARN, "XFS: size check 3 failed"); - if (error == ENOSPC) { + if (error == ENOSPC) error = XFS_ERROR(E2BIG); - } + return error; + } + } + return 0; +} + +/* + * xfs_mountfs + * + * This function does the following on an initial mount of a file system: + * - reads the superblock from disk and init the mount struct + * - if we're a 32-bit kernel, do a size check on the superblock + * so we don't mount terabyte filesystems + * - init mount struct realtime fields + * - allocate inode hash table for fs + * - init directory manager + * - perform recovery and init the log manager + */ +int +xfs_mountfs( + xfs_mount_t *mp, + int mfsi_flags) +{ + xfs_sb_t *sbp = &(mp->m_sb); + xfs_inode_t *rip; + bhv_vnode_t *rvp = NULL; + __uint64_t resblks; + __int64_t update_flags = 0LL; + uint quotamount, quotaflags; + int agno; + int uuid_mounted = 0; + int error = 0; + + if (mp->m_sb_bp == NULL) { + error = xfs_readsb(mp, mfsi_flags); + if (error) + return error; + } + xfs_mount_common(mp, sbp); + + /* + * Check if sb_agblocks is aligned at stripe boundary + * If sb_agblocks is NOT aligned turn off m_dalign since + * allocator alignment is within an ag, therefore ag has + * to be aligned at stripe boundary. + */ + error = xfs_update_alignment(mp, mfsi_flags, &update_flags); + if (error) + goto error1; + + xfs_alloc_compute_maxlevels(mp); + xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); + xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); + xfs_ialloc_compute_maxlevels(mp); + + xfs_set_maxicount(mp); + + mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); + + /* + * XFS uses the uuid from the superblock as the unique + * identifier for fsid. We can not use the uuid from the volume + * since a single partition filesystem is identical to a single + * partition volume/filesystem. + */ + if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && + (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { + if (xfs_uuid_mount(mp)) { + error = XFS_ERROR(EINVAL); goto error1; } + uuid_mounted=1; } /* + * Set the minimum read and write sizes + */ + xfs_set_rw_sizes(mp); + + /* + * Set the inode cluster size. + * This may still be overridden by the file system + * block size if it is larger than the chosen cluster size. + */ + mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; + + /* + * Set inode alignment fields + */ + xfs_set_inoalignment(mp); + + /* + * Check that the data (and log if separate) are an ok size. + */ + error = xfs_check_sizes(mp, mfsi_flags); + if (error) + goto error1; + + /* * Initialize realtime fields in the mount structure */ - if ((error = xfs_rtmount_init(mp))) { + error = xfs_rtmount_init(mp); + if (error) { cmn_err(CE_WARN, "XFS: RT mount failed"); goto error1; } @@ -1102,7 +1149,8 @@ xfs_mountfs( /* * Initialize realtime inode pointers in the mount structure */ - if ((error = xfs_rtmount_inodes(mp))) { + error = xfs_rtmount_inodes(mp); + if (error) { /* * Free up the root inode. */ @@ -1120,7 +1168,8 @@ xfs_mountfs( /* * Initialise the XFS quota management subsystem for this mount */ - if ((error = XFS_QM_INIT(mp, "amount, "aflags))) + error = XFS_QM_INIT(mp, "amount, "aflags); + if (error) goto error4; /* @@ -1137,7 +1186,8 @@ xfs_mountfs( /* * Complete the quota initialisation, post-log-replay component. */ - if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags))) + error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags); + if (error) goto error4; /* @@ -1255,7 +1305,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) #if defined(DEBUG) || defined(INDUCE_IO_ERROR) xfs_errortag_clearall(mp, 0); #endif - XFS_IODONE(mp); xfs_mount_free(mp); return 0; } @@ -1441,7 +1490,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) * Fields are not allowed to dip below zero, so if the delta would * do this do not apply it and return EINVAL. * - * The SB_LOCK must be held when this routine is called. + * The m_sb_lock must be held when this routine is called. */ int xfs_mod_incore_sb_unlocked( @@ -1606,7 +1655,7 @@ xfs_mod_incore_sb_unlocked( /* * xfs_mod_incore_sb() is used to change a field in the in-core * superblock structure by the specified delta. This modification - * is protected by the SB_LOCK. Just use the xfs_mod_incore_sb_unlocked() + * is protected by the m_sb_lock. Just use the xfs_mod_incore_sb_unlocked() * routine to do the work. */ int @@ -1616,7 +1665,6 @@ xfs_mod_incore_sb( int64_t delta, int rsvd) { - unsigned long s; int status; /* check for per-cpu counters */ @@ -1633,9 +1681,9 @@ xfs_mod_incore_sb( /* FALLTHROUGH */ #endif default: - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); break; } @@ -1656,7 +1704,6 @@ xfs_mod_incore_sb( int xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) { - unsigned long s; int status=0; xfs_mod_sb_t *msbp; @@ -1664,10 +1711,10 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) * Loop through the array of mod structures and apply each * individually. If any fail, then back out all those * which have already been applied. Do all of this within - * the scope of the SB_LOCK so that all of the changes will + * the scope of the m_sb_lock so that all of the changes will * be atomic. */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); msbp = &msb[0]; for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { /* @@ -1681,11 +1728,11 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) case XFS_SBS_IFREE: case XFS_SBS_FDBLOCKS: if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); status = xfs_icsb_modify_counters(mp, msbp->msb_field, msbp->msb_delta, rsvd); - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); break; } /* FALLTHROUGH */ @@ -1719,12 +1766,12 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) case XFS_SBS_IFREE: case XFS_SBS_FDBLOCKS: if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); status = xfs_icsb_modify_counters(mp, msbp->msb_field, -(msbp->msb_delta), rsvd); - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); break; } /* FALLTHROUGH */ @@ -1740,7 +1787,7 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) msbp--; } } - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); return status; } @@ -1888,12 +1935,12 @@ xfs_mount_log_sbunit( * * Locking rules: * - * 1. XFS_SB_LOCK() before picking up per-cpu locks + * 1. m_sb_lock before picking up per-cpu locks * 2. per-cpu locks always picked up via for_each_online_cpu() order - * 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks + * 3. accurate counter sync requires m_sb_lock + per cpu locks * 4. modifying per-cpu counters requires holding per-cpu lock - * 5. modifying global counters requires holding XFS_SB_LOCK - * 6. enabling or disabling a counter requires holding the XFS_SB_LOCK + * 5. modifying global counters requires holding m_sb_lock + * 6. enabling or disabling a counter requires holding the m_sb_lock * and _none_ of the per-cpu locks. * * Disabled counters are only ever re-enabled by a balance operation @@ -1920,7 +1967,6 @@ xfs_icsb_cpu_notify( { xfs_icsb_cnts_t *cntp; xfs_mount_t *mp; - int s; mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier); cntp = (xfs_icsb_cnts_t *) @@ -1946,7 +1992,7 @@ xfs_icsb_cpu_notify( * count into the total on the global superblock and * re-enable the counters. */ xfs_icsb_lock(mp); - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); @@ -1963,7 +2009,7 @@ xfs_icsb_cpu_notify( XFS_ICSB_SB_LOCKED, 0); xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED, 0); - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); xfs_icsb_unlock(mp); break; } @@ -2194,11 +2240,10 @@ xfs_icsb_sync_counters_flags( int flags) { xfs_icsb_cnts_t cnt; - int s; /* Pass 1: lock all counters */ if ((flags & XFS_ICSB_SB_LOCKED) == 0) - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); xfs_icsb_count(mp, &cnt, flags); @@ -2211,7 +2256,7 @@ xfs_icsb_sync_counters_flags( mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; if ((flags & XFS_ICSB_SB_LOCKED) == 0) - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); } /* @@ -2252,11 +2297,10 @@ xfs_icsb_balance_counter( { uint64_t count, resid; int weight = num_online_cpus(); - int s; uint64_t min = (uint64_t)min_per_cpu; if (!(flags & XFS_ICSB_SB_LOCKED)) - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); /* disable counter and sync counter */ xfs_icsb_disable_counter(mp, field); @@ -2290,10 +2334,10 @@ xfs_icsb_balance_counter( xfs_icsb_enable_counter(mp, field, count, resid); out: if (!(flags & XFS_ICSB_SB_LOCKED)) - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); } -int +STATIC int xfs_icsb_modify_counters( xfs_mount_t *mp, xfs_sb_field_t field, @@ -2302,7 +2346,7 @@ xfs_icsb_modify_counters( { xfs_icsb_cnts_t *icsbp; long long lcounter; /* long counter for 64 bit fields */ - int cpu, ret = 0, s; + int cpu, ret = 0; might_sleep(); again: @@ -2380,15 +2424,15 @@ slow_path: * running atomically here, we know a rebalance cannot * be in progress. Hence we can go straight to operating * on the global superblock. We do not call xfs_mod_incore_sb() - * here even though we need to get the SB_LOCK. Doing so + * here even though we need to get the m_sb_lock. Doing so * will cause us to re-enter this function and deadlock. - * Hence we get the SB_LOCK ourselves and then call + * Hence we get the m_sb_lock ourselves and then call * xfs_mod_incore_sb_unlocked() as the unlocked path operates * directly on the global counters. */ - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); /* * Now that we've modified the global superblock, we diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index c618f7cb5f0..f7c620ec6e6 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -56,20 +56,12 @@ struct cred; struct log; struct xfs_mount_args; struct xfs_inode; -struct xfs_iocore; struct xfs_bmbt_irec; struct xfs_bmap_free; struct xfs_extdelta; struct xfs_swapext; struct xfs_mru_cache; -#define AIL_LOCK_T lock_t -#define AIL_LOCKINIT(x,y) spinlock_init(x,y) -#define AIL_LOCK_DESTROY(x) spinlock_destroy(x) -#define AIL_LOCK(mp,s) s=mutex_spinlock(&(mp)->m_ail_lock) -#define AIL_UNLOCK(mp,s) mutex_spinunlock(&(mp)->m_ail_lock, s) - - /* * Prototypes and functions for the Data Migration subsystem. */ @@ -196,105 +188,6 @@ typedef struct xfs_qmops { #define XFS_QM_QUOTACTL(mp, cmd, id, addr) \ (*(mp)->m_qm_ops->xfs_quotactl)(mp, cmd, id, addr) - -/* - * Prototypes and functions for I/O core modularization. - */ - -typedef int (*xfs_ioinit_t)(struct xfs_mount *, - struct xfs_mount_args *, int); -typedef int (*xfs_bmapi_t)(struct xfs_trans *, void *, - xfs_fileoff_t, xfs_filblks_t, int, - xfs_fsblock_t *, xfs_extlen_t, - struct xfs_bmbt_irec *, int *, - struct xfs_bmap_free *, struct xfs_extdelta *); -typedef int (*xfs_bunmapi_t)(struct xfs_trans *, - void *, xfs_fileoff_t, - xfs_filblks_t, int, xfs_extnum_t, - xfs_fsblock_t *, struct xfs_bmap_free *, - struct xfs_extdelta *, int *); -typedef int (*xfs_bmap_eof_t)(void *, xfs_fileoff_t, int, int *); -typedef int (*xfs_iomap_write_direct_t)( - void *, xfs_off_t, size_t, int, - struct xfs_bmbt_irec *, int *, int); -typedef int (*xfs_iomap_write_delay_t)( - void *, xfs_off_t, size_t, int, - struct xfs_bmbt_irec *, int *); -typedef int (*xfs_iomap_write_allocate_t)( - void *, xfs_off_t, size_t, - struct xfs_bmbt_irec *, int *); -typedef int (*xfs_iomap_write_unwritten_t)( - void *, xfs_off_t, size_t); -typedef uint (*xfs_lck_map_shared_t)(void *); -typedef void (*xfs_lock_t)(void *, uint); -typedef void (*xfs_lock_demote_t)(void *, uint); -typedef int (*xfs_lock_nowait_t)(void *, uint); -typedef void (*xfs_unlk_t)(void *, unsigned int); -typedef xfs_fsize_t (*xfs_size_t)(void *); -typedef xfs_fsize_t (*xfs_iodone_t)(struct xfs_mount *); -typedef int (*xfs_swap_extents_t)(void *, void *, - struct xfs_swapext*); - -typedef struct xfs_ioops { - xfs_ioinit_t xfs_ioinit; - xfs_bmapi_t xfs_bmapi_func; - xfs_bunmapi_t xfs_bunmapi_func; - xfs_bmap_eof_t xfs_bmap_eof_func; - xfs_iomap_write_direct_t xfs_iomap_write_direct; - xfs_iomap_write_delay_t xfs_iomap_write_delay; - xfs_iomap_write_allocate_t xfs_iomap_write_allocate; - xfs_iomap_write_unwritten_t xfs_iomap_write_unwritten; - xfs_lock_t xfs_ilock; - xfs_lck_map_shared_t xfs_lck_map_shared; - xfs_lock_demote_t xfs_ilock_demote; - xfs_lock_nowait_t xfs_ilock_nowait; - xfs_unlk_t xfs_unlock; - xfs_size_t xfs_size_func; - xfs_iodone_t xfs_iodone; - xfs_swap_extents_t xfs_swap_extents_func; -} xfs_ioops_t; - -#define XFS_IOINIT(mp, args, flags) \ - (*(mp)->m_io_ops.xfs_ioinit)(mp, args, flags) -#define XFS_BMAPI(mp, trans,io,bno,len,f,first,tot,mval,nmap,flist,delta) \ - (*(mp)->m_io_ops.xfs_bmapi_func) \ - (trans,(io)->io_obj,bno,len,f,first,tot,mval,nmap,flist,delta) -#define XFS_BUNMAPI(mp, trans,io,bno,len,f,nexts,first,flist,delta,done) \ - (*(mp)->m_io_ops.xfs_bunmapi_func) \ - (trans,(io)->io_obj,bno,len,f,nexts,first,flist,delta,done) -#define XFS_BMAP_EOF(mp, io, endoff, whichfork, eof) \ - (*(mp)->m_io_ops.xfs_bmap_eof_func) \ - ((io)->io_obj, endoff, whichfork, eof) -#define XFS_IOMAP_WRITE_DIRECT(mp, io, offset, count, flags, mval, nmap, found)\ - (*(mp)->m_io_ops.xfs_iomap_write_direct) \ - ((io)->io_obj, offset, count, flags, mval, nmap, found) -#define XFS_IOMAP_WRITE_DELAY(mp, io, offset, count, flags, mval, nmap) \ - (*(mp)->m_io_ops.xfs_iomap_write_delay) \ - ((io)->io_obj, offset, count, flags, mval, nmap) -#define XFS_IOMAP_WRITE_ALLOCATE(mp, io, offset, count, mval, nmap) \ - (*(mp)->m_io_ops.xfs_iomap_write_allocate) \ - ((io)->io_obj, offset, count, mval, nmap) -#define XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count) \ - (*(mp)->m_io_ops.xfs_iomap_write_unwritten) \ - ((io)->io_obj, offset, count) -#define XFS_LCK_MAP_SHARED(mp, io) \ - (*(mp)->m_io_ops.xfs_lck_map_shared)((io)->io_obj) -#define XFS_ILOCK(mp, io, mode) \ - (*(mp)->m_io_ops.xfs_ilock)((io)->io_obj, mode) -#define XFS_ILOCK_NOWAIT(mp, io, mode) \ - (*(mp)->m_io_ops.xfs_ilock_nowait)((io)->io_obj, mode) -#define XFS_IUNLOCK(mp, io, mode) \ - (*(mp)->m_io_ops.xfs_unlock)((io)->io_obj, mode) -#define XFS_ILOCK_DEMOTE(mp, io, mode) \ - (*(mp)->m_io_ops.xfs_ilock_demote)((io)->io_obj, mode) -#define XFS_SIZE(mp, io) \ - (*(mp)->m_io_ops.xfs_size_func)((io)->io_obj) -#define XFS_IODONE(mp) \ - (*(mp)->m_io_ops.xfs_iodone)(mp) -#define XFS_SWAP_EXTENTS(mp, io, tio, sxp) \ - (*(mp)->m_io_ops.xfs_swap_extents_func) \ - ((io)->io_obj, (tio)->io_obj, sxp) - #ifdef HAVE_PERCPU_SB /* @@ -326,14 +219,20 @@ extern void xfs_icsb_sync_counters_flags(struct xfs_mount *, int); #define xfs_icsb_sync_counters_flags(mp, flags) do { } while (0) #endif +typedef struct xfs_ail { + xfs_ail_entry_t xa_ail; + uint xa_gen; + struct task_struct *xa_task; + xfs_lsn_t xa_target; +} xfs_ail_t; + typedef struct xfs_mount { struct super_block *m_super; xfs_tid_t m_tid; /* next unused tid for fs */ - AIL_LOCK_T m_ail_lock; /* fs AIL mutex */ - xfs_ail_entry_t m_ail; /* fs active log item list */ - uint m_ail_gen; /* fs AIL generation count */ + spinlock_t m_ail_lock; /* fs AIL mutex */ + xfs_ail_t m_ail; /* fs active log item list */ xfs_sb_t m_sb; /* copy of fs superblock */ - lock_t m_sb_lock; /* sb counter mutex */ + spinlock_t m_sb_lock; /* sb counter lock */ struct xfs_buf *m_sb_bp; /* buffer for superblock */ char *m_fsname; /* filesystem name */ int m_fsname_len; /* strlen of fs name */ @@ -342,7 +241,7 @@ typedef struct xfs_mount { int m_bsize; /* fs logical block size */ xfs_agnumber_t m_agfrotor; /* last ag where space found */ xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ - lock_t m_agirotor_lock;/* .. and lock protecting it */ + spinlock_t m_agirotor_lock;/* .. and lock protecting it */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ struct xfs_inode *m_inodes; /* active inode list */ struct list_head m_del_inodes; /* inodes to reclaim */ @@ -423,7 +322,6 @@ typedef struct xfs_mount { * hash table */ struct xfs_dmops *m_dm_ops; /* vector of DMI ops */ struct xfs_qmops *m_qm_ops; /* vector of XQM ops */ - struct xfs_ioops m_io_ops; /* vector of I/O ops */ atomic_t m_active_trans; /* number trans frozen */ #ifdef HAVE_PERCPU_SB xfs_icsb_cnts_t *m_sb_cnts; /* per-cpu superblock counters */ @@ -610,8 +508,6 @@ typedef struct xfs_mod_sb { #define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) #define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) -#define XFS_SB_LOCK(mp) mutex_spinlock(&(mp)->m_sb_lock) -#define XFS_SB_UNLOCK(mp,s) mutex_spinunlock(&(mp)->m_sb_lock,(s)) extern xfs_mount_t *xfs_mount_init(void); extern void xfs_mod_sb(xfs_trans_t *, __int64_t); @@ -646,7 +542,6 @@ extern int xfs_qmops_get(struct xfs_mount *, struct xfs_mount_args *); extern void xfs_qmops_put(struct xfs_mount *); extern struct xfs_dmops xfs_dmcore_xfs; -extern struct xfs_ioops xfs_iocore_xfs; extern int xfs_init(void); extern void xfs_cleanup(void); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index e0b358c1c53..a0b2c0a2589 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -225,10 +225,14 @@ _xfs_mru_cache_list_insert( * list need to be deleted. For each element this involves removing it from the * data store, removing it from the reap list, calling the client's free * function and deleting the element from the element zone. + * + * We get called holding the mru->lock, which we drop and then reacquire. + * Sparse need special help with this to tell it we know what we are doing. */ STATIC void _xfs_mru_cache_clear_reap_list( - xfs_mru_cache_t *mru) + xfs_mru_cache_t *mru) __releases(mru->lock) __acquires(mru->lock) + { xfs_mru_cache_elem_t *elem, *next; struct list_head tmp; @@ -245,7 +249,7 @@ _xfs_mru_cache_clear_reap_list( */ list_move(&elem->list_node, &tmp); } - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); list_for_each_entry_safe(elem, next, &tmp, list_node) { @@ -259,7 +263,7 @@ _xfs_mru_cache_clear_reap_list( kmem_zone_free(xfs_mru_elem_zone, elem); } - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); } /* @@ -280,7 +284,7 @@ _xfs_mru_cache_reap( if (!mru || !mru->lists) return; - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); next = _xfs_mru_cache_migrate(mru, jiffies); _xfs_mru_cache_clear_reap_list(mru); @@ -294,7 +298,7 @@ _xfs_mru_cache_reap( queue_delayed_work(xfs_mru_reap_wq, &mru->work, next); } - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); } int @@ -368,7 +372,7 @@ xfs_mru_cache_create( */ INIT_RADIX_TREE(&mru->store, GFP_ATOMIC); INIT_LIST_HEAD(&mru->reap_list); - spinlock_init(&mru->lock, "xfs_mru_cache"); + spin_lock_init(&mru->lock); INIT_DELAYED_WORK(&mru->work, _xfs_mru_cache_reap); mru->grp_time = grp_time; @@ -398,17 +402,17 @@ xfs_mru_cache_flush( if (!mru || !mru->lists) return; - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); if (mru->queued) { - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); } _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time); _xfs_mru_cache_clear_reap_list(mru); - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); } void @@ -454,13 +458,13 @@ xfs_mru_cache_insert( elem->key = key; elem->value = value; - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); radix_tree_insert(&mru->store, key, elem); radix_tree_preload_end(); _xfs_mru_cache_list_insert(mru, elem); - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); return 0; } @@ -483,14 +487,14 @@ xfs_mru_cache_remove( if (!mru || !mru->lists) return NULL; - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); elem = radix_tree_delete(&mru->store, key); if (elem) { value = elem->value; list_del(&elem->list_node); } - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); if (elem) kmem_zone_free(xfs_mru_elem_zone, elem); @@ -528,6 +532,10 @@ xfs_mru_cache_delete( * * If the element isn't found, this function returns NULL and the spinlock is * released. xfs_mru_cache_done() should NOT be called when this occurs. + * + * Because sparse isn't smart enough to know about conditional lock return + * status, we need to help it get it right by annotating the path that does + * not release the lock. */ void * xfs_mru_cache_lookup( @@ -540,14 +548,14 @@ xfs_mru_cache_lookup( if (!mru || !mru->lists) return NULL; - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); elem = radix_tree_lookup(&mru->store, key); if (elem) { list_del(&elem->list_node); _xfs_mru_cache_list_insert(mru, elem); - } - else - mutex_spinunlock(&mru->lock, 0); + __release(mru_lock); /* help sparse not be stupid */ + } else + spin_unlock(&mru->lock); return elem ? elem->value : NULL; } @@ -571,10 +579,12 @@ xfs_mru_cache_peek( if (!mru || !mru->lists) return NULL; - mutex_spinlock(&mru->lock); + spin_lock(&mru->lock); elem = radix_tree_lookup(&mru->store, key); if (!elem) - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); + else + __release(mru_lock); /* help sparse not be stupid */ return elem ? elem->value : NULL; } @@ -586,7 +596,7 @@ xfs_mru_cache_peek( */ void xfs_mru_cache_done( - xfs_mru_cache_t *mru) + xfs_mru_cache_t *mru) __releases(mru->lock) { - mutex_spinunlock(&mru->lock, 0); + spin_unlock(&mru->lock); } diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c index 2ec1d8a2735..a294e58db8d 100644 --- a/fs/xfs/xfs_qmops.c +++ b/fs/xfs/xfs_qmops.c @@ -49,18 +49,17 @@ xfs_mount_reset_sbqflags(xfs_mount_t *mp) { int error; xfs_trans_t *tp; - unsigned long s; mp->m_qflags = 0; /* * It is OK to look at sb_qflags here in mount path, - * without SB_LOCK. + * without m_sb_lock. */ if (mp->m_sb.sb_qflags == 0) return 0; - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); mp->m_sb.sb_qflags = 0; - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); /* * if the fs is readonly, let the incore superblock run diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 44ea0ba3647..7eb157a59f9 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -39,6 +39,7 @@ #include "xfs_refcache.h" #include "xfs_utils.h" #include "xfs_trans_space.h" +#include "xfs_vnodeops.h" /* @@ -118,7 +119,7 @@ xfs_lock_for_rename( inum1 = ip1->i_ino; ASSERT(ip1); - ITRACE(ip1); + xfs_itrace_ref(ip1); /* * Unlock dp1 and lock dp2 if they are different. @@ -141,7 +142,7 @@ xfs_lock_for_rename( IRELE (ip1); return error; } else { - ITRACE(ip2); + xfs_itrace_ref(ip2); } /* @@ -247,8 +248,8 @@ xfs_rename( int src_namelen = VNAMELEN(src_vname); int target_namelen = VNAMELEN(target_vname); - vn_trace_entry(src_dp, "xfs_rename", (inst_t *)__return_address); - vn_trace_entry(xfs_vtoi(target_dir_vp), "xfs_rename", (inst_t *)__return_address); + xfs_itrace_entry(src_dp); + xfs_itrace_entry(xfs_vtoi(target_dir_vp)); /* * Find the XFS behavior descriptor for the target directory diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 47082c01872..ca83ddf72af 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -73,18 +73,6 @@ STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int, */ /* - * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set. - */ -STATIC int -xfs_lowbit32( - __uint32_t v) -{ - if (v) - return ffs(v) - 1; - return -1; -} - -/* * Allocate space to the bitmap or summary file, and zero it, for growfs. */ STATIC int /* error */ @@ -444,6 +432,7 @@ xfs_rtallocate_extent_near( } bbno = XFS_BITTOBLOCK(mp, bno); i = 0; + ASSERT(minlen != 0); log2len = xfs_highbit32(minlen); /* * Loop over all bitmap blocks (bbno + i is current block). @@ -612,6 +601,8 @@ xfs_rtallocate_extent_size( xfs_suminfo_t sum; /* summary information for extents */ ASSERT(minlen % prod == 0 && maxlen % prod == 0); + ASSERT(maxlen != 0); + /* * Loop over all the levels starting with maxlen. * At each level, look at all the bitmap blocks, to see if there @@ -669,6 +660,9 @@ xfs_rtallocate_extent_size( *rtblock = NULLRTBLOCK; return 0; } + ASSERT(minlen != 0); + ASSERT(maxlen != 0); + /* * Loop over sizes, from maxlen down to minlen. * This time, when we do the allocations, allow smaller ones @@ -1954,6 +1948,7 @@ xfs_growfs_rt( nsbp->sb_blocksize * nsbp->sb_rextsize); nsbp->sb_rextents = nsbp->sb_rblocks; do_div(nsbp->sb_rextents, nsbp->sb_rextsize); + ASSERT(nsbp->sb_rextents != 0); nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents); nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; nrsumsize = diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 799c1f87126..8d8dcd21571 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -21,8 +21,6 @@ struct xfs_mount; struct xfs_trans; -#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) - /* Min and max rt extent sizes, specified in bytes */ #define XFS_MAX_RTEXTSIZE (1024 * 1024 * 1024) /* 1GB */ #define XFS_DFL_RTEXTSIZE (64 * 1024) /* 64KB */ diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index 49875e1d129..f87db5344ce 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -32,18 +32,10 @@ struct xfs_mount; static inline xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) { - return (((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) ? \ + return (XFS_IS_REALTIME_INODE(ip) ? \ (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); } -#define XFS_FSB_TO_DB_IO(io,fsb) xfs_fsb_to_db_io(io,fsb) -static inline xfs_daddr_t -xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb) -{ - return (((io)->io_flags & XFS_IOCORE_RT) ? \ - XFS_FSB_TO_BB((io)->io_mount, (fsb)) : \ - XFS_FSB_TO_DADDR((io)->io_mount, (fsb))); -} /* * Flags for xfs_free_eofblocks @@ -61,7 +53,7 @@ xfs_get_extsz_hint( { xfs_extlen_t extsz; - if (unlikely(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { + if (unlikely(XFS_IS_REALTIME_INODE(ip))) { extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) ? ip->i_d.di_extsize : ip->i_mount->m_sb.sb_rextsize; diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 8878322ee79..71e4c8dcc69 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1322,7 +1322,6 @@ xfs_trans_chunk_committed( xfs_lsn_t item_lsn; struct xfs_mount *mp; int i; - SPLDECL(s); lidp = licp->lic_descs; for (i = 0; i < licp->lic_unused; i++, lidp++) { @@ -1363,7 +1362,7 @@ xfs_trans_chunk_committed( * the test below. */ mp = lip->li_mountp; - AIL_LOCK(mp,s); + spin_lock(&mp->m_ail_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { /* * This will set the item's lsn to item_lsn @@ -1372,9 +1371,9 @@ xfs_trans_chunk_committed( * * xfs_trans_update_ail() drops the AIL lock. */ - xfs_trans_update_ail(mp, lip, item_lsn, s); + xfs_trans_update_ail(mp, lip, item_lsn); } else { - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 0e26e729023..7f40628d85c 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -992,8 +992,9 @@ int _xfs_trans_commit(xfs_trans_t *, int *); #define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) void xfs_trans_cancel(xfs_trans_t *, int); -void xfs_trans_ail_init(struct xfs_mount *); -xfs_lsn_t xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); +int xfs_trans_ail_init(struct xfs_mount *); +void xfs_trans_ail_destroy(struct xfs_mount *); +void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); xfs_lsn_t xfs_trans_tail_ail(struct xfs_mount *); void xfs_trans_unlocked_item(struct xfs_mount *, xfs_log_item_t *); @@ -1001,6 +1002,8 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx); +extern kmem_zone_t *xfs_trans_zone; + #endif /* __KERNEL__ */ #endif /* __XFS_TRANS_H__ */ diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 5b2ff59f19c..4d6330eddc8 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -34,9 +34,9 @@ STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_entry_t *); STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_entry_t *, xfs_log_item_t *); #ifdef DEBUG -STATIC void xfs_ail_check(xfs_ail_entry_t *); +STATIC void xfs_ail_check(xfs_ail_entry_t *, xfs_log_item_t *); #else -#define xfs_ail_check(a) +#define xfs_ail_check(a,l) #endif /* DEBUG */ @@ -55,16 +55,15 @@ xfs_trans_tail_ail( { xfs_lsn_t lsn; xfs_log_item_t *lip; - SPLDECL(s); - AIL_LOCK(mp,s); - lip = xfs_ail_min(&(mp->m_ail)); + spin_lock(&mp->m_ail_lock); + lip = xfs_ail_min(&(mp->m_ail.xa_ail)); if (lip == NULL) { lsn = (xfs_lsn_t)0; } else { lsn = lip->li_lsn; } - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); return lsn; } @@ -72,120 +71,185 @@ xfs_trans_tail_ail( /* * xfs_trans_push_ail * - * This routine is called to move the tail of the AIL - * forward. It does this by trying to flush items in the AIL - * whose lsns are below the given threshold_lsn. + * This routine is called to move the tail of the AIL forward. It does this by + * trying to flush items in the AIL whose lsns are below the given + * threshold_lsn. * - * The routine returns the lsn of the tail of the log. + * the push is run asynchronously in a separate thread, so we return the tail + * of the log right now instead of the tail after the push. This means we will + * either continue right away, or we will sleep waiting on the async thread to + * do it's work. + * + * We do this unlocked - we only need to know whether there is anything in the + * AIL at the time we are called. We don't need to access the contents of + * any of the objects, so the lock is not needed. */ -xfs_lsn_t +void xfs_trans_push_ail( xfs_mount_t *mp, xfs_lsn_t threshold_lsn) { - xfs_lsn_t lsn; xfs_log_item_t *lip; - int gen; - int restarts; - int lock_result; - int flush_log; - SPLDECL(s); -#define XFS_TRANS_PUSH_AIL_RESTARTS 1000 + lip = xfs_ail_min(&mp->m_ail.xa_ail); + if (lip && !XFS_FORCED_SHUTDOWN(mp)) { + if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0) + xfsaild_wakeup(mp, threshold_lsn); + } +} + +/* + * Return the item in the AIL with the current lsn. + * Return the current tree generation number for use + * in calls to xfs_trans_next_ail(). + */ +STATIC xfs_log_item_t * +xfs_trans_first_push_ail( + xfs_mount_t *mp, + int *gen, + xfs_lsn_t lsn) +{ + xfs_log_item_t *lip; + + lip = xfs_ail_min(&(mp->m_ail.xa_ail)); + *gen = (int)mp->m_ail.xa_gen; + if (lsn == 0) + return lip; + + while (lip && (XFS_LSN_CMP(lip->li_lsn, lsn) < 0)) + lip = lip->li_ail.ail_forw; - AIL_LOCK(mp,s); - lip = xfs_trans_first_ail(mp, &gen); - if (lip == NULL || XFS_FORCED_SHUTDOWN(mp)) { + return lip; +} + +/* + * Function that does the work of pushing on the AIL + */ +long +xfsaild_push( + xfs_mount_t *mp, + xfs_lsn_t *last_lsn) +{ + long tout = 1000; /* milliseconds */ + xfs_lsn_t last_pushed_lsn = *last_lsn; + xfs_lsn_t target = mp->m_ail.xa_target; + xfs_lsn_t lsn; + xfs_log_item_t *lip; + int gen; + int restarts; + int flush_log, count, stuck; + +#define XFS_TRANS_PUSH_AIL_RESTARTS 10 + + spin_lock(&mp->m_ail_lock); + lip = xfs_trans_first_push_ail(mp, &gen, *last_lsn); + if (!lip || XFS_FORCED_SHUTDOWN(mp)) { /* - * Just return if the AIL is empty. + * AIL is empty or our push has reached the end. */ - AIL_UNLOCK(mp, s); - return (xfs_lsn_t)0; + spin_unlock(&mp->m_ail_lock); + last_pushed_lsn = 0; + goto out; } XFS_STATS_INC(xs_push_ail); /* * While the item we are looking at is below the given threshold - * try to flush it out. Make sure to limit the number of times - * we allow xfs_trans_next_ail() to restart scanning from the - * beginning of the list. We'd like not to stop until we've at least + * try to flush it out. We'd like not to stop until we've at least * tried to push on everything in the AIL with an LSN less than - * the given threshold. However, we may give up before that if - * we realize that we've been holding the AIL_LOCK for 'too long', - * blocking interrupts. Currently, too long is < 500us roughly. + * the given threshold. + * + * However, we will stop after a certain number of pushes and wait + * for a reduced timeout to fire before pushing further. This + * prevents use from spinning when we can't do anything or there is + * lots of contention on the AIL lists. */ - flush_log = 0; - restarts = 0; - while (((restarts < XFS_TRANS_PUSH_AIL_RESTARTS) && - (XFS_LSN_CMP(lip->li_lsn, threshold_lsn) < 0))) { + tout = 10; + lsn = lip->li_lsn; + flush_log = stuck = count = restarts = 0; + while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { + int lock_result; /* - * If we can lock the item without sleeping, unlock - * the AIL lock and flush the item. Then re-grab the - * AIL lock so we can look for the next item on the - * AIL. Since we unlock the AIL while we flush the - * item, the next routine may start over again at the - * the beginning of the list if anything has changed. - * That is what the generation count is for. + * If we can lock the item without sleeping, unlock the AIL + * lock and flush the item. Then re-grab the AIL lock so we + * can look for the next item on the AIL. List changes are + * handled by the AIL lookup functions internally * - * If we can't lock the item, either its holder will flush - * it or it is already being flushed or it is being relogged. - * In any of these case it is being taken care of and we - * can just skip to the next item in the list. + * If we can't lock the item, either its holder will flush it + * or it is already being flushed or it is being relogged. In + * any of these case it is being taken care of and we can just + * skip to the next item in the list. */ lock_result = IOP_TRYLOCK(lip); + spin_unlock(&mp->m_ail_lock); switch (lock_result) { - case XFS_ITEM_SUCCESS: - AIL_UNLOCK(mp, s); + case XFS_ITEM_SUCCESS: XFS_STATS_INC(xs_push_ail_success); IOP_PUSH(lip); - AIL_LOCK(mp,s); + last_pushed_lsn = lsn; break; - case XFS_ITEM_PUSHBUF: - AIL_UNLOCK(mp, s); + case XFS_ITEM_PUSHBUF: XFS_STATS_INC(xs_push_ail_pushbuf); -#ifdef XFSRACEDEBUG - delay_for_intr(); - delay(300); -#endif - ASSERT(lip->li_ops->iop_pushbuf); - ASSERT(lip); IOP_PUSHBUF(lip); - AIL_LOCK(mp,s); + last_pushed_lsn = lsn; break; - case XFS_ITEM_PINNED: + case XFS_ITEM_PINNED: XFS_STATS_INC(xs_push_ail_pinned); + stuck++; flush_log = 1; break; - case XFS_ITEM_LOCKED: + case XFS_ITEM_LOCKED: XFS_STATS_INC(xs_push_ail_locked); + last_pushed_lsn = lsn; + stuck++; break; - case XFS_ITEM_FLUSHING: + case XFS_ITEM_FLUSHING: XFS_STATS_INC(xs_push_ail_flushing); + last_pushed_lsn = lsn; + stuck++; break; - default: + default: ASSERT(0); break; } - lip = xfs_trans_next_ail(mp, lip, &gen, &restarts); - if (lip == NULL) { + spin_lock(&mp->m_ail_lock); + /* should we bother continuing? */ + if (XFS_FORCED_SHUTDOWN(mp)) + break; + ASSERT(mp->m_log); + + count++; + + /* + * Are there too many items we can't do anything with? + * If we we are skipping too many items because we can't flush + * them or they are already being flushed, we back off and + * given them time to complete whatever operation is being + * done. i.e. remove pressure from the AIL while we can't make + * progress so traversals don't slow down further inserts and + * removals to/from the AIL. + * + * The value of 100 is an arbitrary magic number based on + * observation. + */ + if (stuck > 100) break; - } - if (XFS_FORCED_SHUTDOWN(mp)) { - /* - * Just return if we shut down during the last try. - */ - AIL_UNLOCK(mp, s); - return (xfs_lsn_t)0; - } + lip = xfs_trans_next_ail(mp, lip, &gen, &restarts); + if (lip == NULL) + break; + if (restarts > XFS_TRANS_PUSH_AIL_RESTARTS) + break; + lsn = lip->li_lsn; } + spin_unlock(&mp->m_ail_lock); if (flush_log) { /* @@ -193,22 +257,35 @@ xfs_trans_push_ail( * push out the log so it will become unpinned and * move forward in the AIL. */ - AIL_UNLOCK(mp, s); XFS_STATS_INC(xs_push_ail_flush); xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - AIL_LOCK(mp, s); } - lip = xfs_ail_min(&(mp->m_ail)); - if (lip == NULL) { - lsn = (xfs_lsn_t)0; - } else { - lsn = lip->li_lsn; + /* + * We reached the target so wait a bit longer for I/O to complete and + * remove pushed items from the AIL before we start the next scan from + * the start of the AIL. + */ + if ((XFS_LSN_CMP(lsn, target) >= 0)) { + tout += 20; + last_pushed_lsn = 0; + } else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) || + (count && ((stuck * 100) / count > 90))) { + /* + * Either there is a lot of contention on the AIL or we + * are stuck due to operations in progress. "Stuck" in this + * case is defined as >90% of the items we tried to push + * were stuck. + * + * Backoff a bit more to allow some I/O to complete before + * continuing from where we were. + */ + tout += 10; } - - AIL_UNLOCK(mp, s); - return lsn; -} /* xfs_trans_push_ail */ +out: + *last_lsn = last_pushed_lsn; + return tout; +} /* xfsaild_push */ /* @@ -249,7 +326,7 @@ xfs_trans_unlocked_item( * the call to xfs_log_move_tail() doesn't do anything if there's * not enough free space to wake people up so we're safe calling it. */ - min_lip = xfs_ail_min(&mp->m_ail); + min_lip = xfs_ail_min(&mp->m_ail.xa_ail); if (min_lip == lip) xfs_log_move_tail(mp, 1); @@ -269,21 +346,19 @@ xfs_trans_unlocked_item( * has changed. * * This function must be called with the AIL lock held. The lock - * is dropped before returning, so the caller must pass in the - * cookie returned by AIL_LOCK. + * is dropped before returning. */ void xfs_trans_update_ail( xfs_mount_t *mp, xfs_log_item_t *lip, - xfs_lsn_t lsn, - unsigned long s) __releases(mp->m_ail_lock) + xfs_lsn_t lsn) __releases(mp->m_ail_lock) { xfs_ail_entry_t *ailp; xfs_log_item_t *dlip=NULL; xfs_log_item_t *mlip; /* ptr to minimum lip */ - ailp = &(mp->m_ail); + ailp = &(mp->m_ail.xa_ail); mlip = xfs_ail_min(ailp); if (lip->li_flags & XFS_LI_IN_AIL) { @@ -296,14 +371,14 @@ xfs_trans_update_ail( lip->li_lsn = lsn; xfs_ail_insert(ailp, lip); - mp->m_ail_gen++; + mp->m_ail.xa_gen++; if (mlip == dlip) { - mlip = xfs_ail_min(&(mp->m_ail)); - AIL_UNLOCK(mp, s); + mlip = xfs_ail_min(&(mp->m_ail.xa_ail)); + spin_unlock(&mp->m_ail_lock); xfs_log_move_tail(mp, mlip->li_lsn); } else { - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } @@ -322,21 +397,19 @@ xfs_trans_update_ail( * has changed. * * This function must be called with the AIL lock held. The lock - * is dropped before returning, so the caller must pass in the - * cookie returned by AIL_LOCK. + * is dropped before returning. */ void xfs_trans_delete_ail( xfs_mount_t *mp, - xfs_log_item_t *lip, - unsigned long s) __releases(mp->m_ail_lock) + xfs_log_item_t *lip) __releases(mp->m_ail_lock) { xfs_ail_entry_t *ailp; xfs_log_item_t *dlip; xfs_log_item_t *mlip; if (lip->li_flags & XFS_LI_IN_AIL) { - ailp = &(mp->m_ail); + ailp = &(mp->m_ail.xa_ail); mlip = xfs_ail_min(ailp); dlip = xfs_ail_delete(ailp, lip); ASSERT(dlip == lip); @@ -344,14 +417,14 @@ xfs_trans_delete_ail( lip->li_flags &= ~XFS_LI_IN_AIL; lip->li_lsn = 0; - mp->m_ail_gen++; + mp->m_ail.xa_gen++; if (mlip == dlip) { - mlip = xfs_ail_min(&(mp->m_ail)); - AIL_UNLOCK(mp, s); + mlip = xfs_ail_min(&(mp->m_ail.xa_ail)); + spin_unlock(&mp->m_ail_lock); xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); } else { - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); } } else { @@ -360,12 +433,12 @@ xfs_trans_delete_ail( * serious trouble if we get to this stage. */ if (XFS_FORCED_SHUTDOWN(mp)) - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); else { xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, "%s: attempting to delete a log item that is not in the AIL", __FUNCTION__); - AIL_UNLOCK(mp, s); + spin_unlock(&mp->m_ail_lock); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } } @@ -385,10 +458,10 @@ xfs_trans_first_ail( { xfs_log_item_t *lip; - lip = xfs_ail_min(&(mp->m_ail)); - *gen = (int)mp->m_ail_gen; + lip = xfs_ail_min(&(mp->m_ail.xa_ail)); + *gen = (int)mp->m_ail.xa_gen; - return (lip); + return lip; } /* @@ -408,11 +481,11 @@ xfs_trans_next_ail( xfs_log_item_t *nlip; ASSERT(mp && lip && gen); - if (mp->m_ail_gen == *gen) { - nlip = xfs_ail_next(&(mp->m_ail), lip); + if (mp->m_ail.xa_gen == *gen) { + nlip = xfs_ail_next(&(mp->m_ail.xa_ail), lip); } else { - nlip = xfs_ail_min(&(mp->m_ail)); - *gen = (int)mp->m_ail_gen; + nlip = xfs_ail_min(&(mp->m_ail).xa_ail); + *gen = (int)mp->m_ail.xa_gen; if (restarts != NULL) { XFS_STATS_INC(xs_push_ail_restarts); (*restarts)++; @@ -437,12 +510,20 @@ xfs_trans_next_ail( /* * Initialize the doubly linked list to point only to itself. */ -void +int xfs_trans_ail_init( xfs_mount_t *mp) { - mp->m_ail.ail_forw = (xfs_log_item_t*)&(mp->m_ail); - mp->m_ail.ail_back = (xfs_log_item_t*)&(mp->m_ail); + mp->m_ail.xa_ail.ail_forw = (xfs_log_item_t*)&mp->m_ail.xa_ail; + mp->m_ail.xa_ail.ail_back = (xfs_log_item_t*)&mp->m_ail.xa_ail; + return xfsaild_start(mp); +} + +void +xfs_trans_ail_destroy( + xfs_mount_t *mp) +{ + xfsaild_stop(mp); } /* @@ -482,7 +563,7 @@ xfs_ail_insert( next_lip->li_ail.ail_forw = lip; lip->li_ail.ail_forw->li_ail.ail_back = lip; - xfs_ail_check(base); + xfs_ail_check(base, lip); return; } @@ -496,12 +577,12 @@ xfs_ail_delete( xfs_log_item_t *lip) /* ARGSUSED */ { + xfs_ail_check(base, lip); lip->li_ail.ail_forw->li_ail.ail_back = lip->li_ail.ail_back; lip->li_ail.ail_back->li_ail.ail_forw = lip->li_ail.ail_forw; lip->li_ail.ail_forw = NULL; lip->li_ail.ail_back = NULL; - xfs_ail_check(base); return lip; } @@ -545,13 +626,13 @@ xfs_ail_next( */ STATIC void xfs_ail_check( - xfs_ail_entry_t *base) + xfs_ail_entry_t *base, + xfs_log_item_t *lip) { - xfs_log_item_t *lip; xfs_log_item_t *prev_lip; - lip = base->ail_forw; - if (lip == (xfs_log_item_t*)base) { + prev_lip = base->ail_forw; + if (prev_lip == (xfs_log_item_t*)base) { /* * Make sure the pointers are correct when the list * is empty. @@ -561,9 +642,27 @@ xfs_ail_check( } /* + * Check the next and previous entries are valid. + */ + ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); + prev_lip = lip->li_ail.ail_back; + if (prev_lip != (xfs_log_item_t*)base) { + ASSERT(prev_lip->li_ail.ail_forw == lip); + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); + } + prev_lip = lip->li_ail.ail_forw; + if (prev_lip != (xfs_log_item_t*)base) { + ASSERT(prev_lip->li_ail.ail_back == lip); + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); + } + + +#ifdef XFS_TRANS_DEBUG + /* * Walk the list checking forward and backward pointers, * lsn ordering, and that every entry has the XFS_LI_IN_AIL - * flag set. + * flag set. This is really expensive, so only do it when + * specifically debugging the transaction subsystem. */ prev_lip = (xfs_log_item_t*)base; while (lip != (xfs_log_item_t*)base) { @@ -578,5 +677,6 @@ xfs_ail_check( } ASSERT(lip == (xfs_log_item_t*)base); ASSERT(base->ail_back == prev_lip); +#endif /* XFS_TRANS_DEBUG */ } #endif /* DEBUG */ diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index 2912aac07c7..66a09f0d894 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c @@ -21,6 +21,7 @@ #include "xfs_log.h" #include "xfs_inum.h" #include "xfs_trans.h" +#include "xfs_trans_priv.h" STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *, int, int, xfs_lsn_t); diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 447ac4308c9..3c748c456ed 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -47,15 +47,22 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, * From xfs_trans_ail.c */ void xfs_trans_update_ail(struct xfs_mount *mp, - struct xfs_log_item *lip, xfs_lsn_t lsn, - unsigned long s) + struct xfs_log_item *lip, xfs_lsn_t lsn) __releases(mp->m_ail_lock); void xfs_trans_delete_ail(struct xfs_mount *mp, - struct xfs_log_item *lip, unsigned long s) + struct xfs_log_item *lip) __releases(mp->m_ail_lock); struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *); struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *, struct xfs_log_item *, int *, int *); +/* + * AIL push thread support + */ +long xfsaild_push(struct xfs_mount *, xfs_lsn_t *); +void xfsaild_wakeup(struct xfs_mount *, xfs_lsn_t); +int xfsaild_start(struct xfs_mount *); +void xfsaild_stop(struct xfs_mount *); + #endif /* __XFS_TRANS_PRIV_H__ */ diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 673b405eaa3..45d740df53b 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -73,7 +73,7 @@ xfs_dir_lookup_int( { int error; - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); error = xfs_dir_lookup(NULL, dp, VNAME(dentry), VNAMELEN(dentry), inum); if (!error) { @@ -302,6 +302,7 @@ xfs_droplink( ASSERT (ip->i_d.di_nlink > 0); ip->i_d.di_nlink--; + drop_nlink(ip->i_vnode); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = 0; @@ -330,7 +331,6 @@ xfs_bump_ino_vers2( xfs_inode_t *ip) { xfs_mount_t *mp; - unsigned long s; ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1); @@ -340,13 +340,13 @@ xfs_bump_ino_vers2( memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); mp = tp->t_mountp; if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { - s = XFS_SB_LOCK(mp); + spin_lock(&mp->m_sb_lock); if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { XFS_SB_VERSION_ADDNLINK(&mp->m_sb); - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); xfs_mod_sb(tp, XFS_SB_VERSIONNUM); } else { - XFS_SB_UNLOCK(mp, s); + spin_unlock(&mp->m_sb_lock); } } /* Caller must log the inode */ @@ -366,6 +366,7 @@ xfs_bumplink( ASSERT(ip->i_d.di_nlink > 0); ip->i_d.di_nlink++; + inc_nlink(ip->i_vnode); if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) && (ip->i_d.di_nlink > XFS_MAXLINK_1)) { /* diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index a00b26d8840..f857fcccb72 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h @@ -20,8 +20,6 @@ #define IRELE(ip) VN_RELE(XFS_ITOV(ip)) #define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) -#define ITRACE(ip) vn_trace_ref(ip, __FILE__, __LINE__, \ - (inst_t *)__return_address) extern int xfs_get_dir_entry (bhv_vname_t *, xfs_inode_t **); extern int xfs_dir_lookup_int (xfs_inode_t *, uint, bhv_vname_t *, xfs_ino_t *, diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index a1544597bcd..413587f0215 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -58,17 +58,12 @@ #include "xfs_vfsops.h" -int +int __init xfs_init(void) { - extern kmem_zone_t *xfs_bmap_free_item_zone; - extern kmem_zone_t *xfs_btree_cur_zone; - extern kmem_zone_t *xfs_trans_zone; - extern kmem_zone_t *xfs_buf_item_zone; - extern kmem_zone_t *xfs_dabuf_zone; #ifdef XFS_DABUF_DEBUG - extern lock_t xfs_dabuf_global_lock; - spinlock_init(&xfs_dabuf_global_lock, "xfsda"); + extern spinlock_t xfs_dabuf_global_lock; + spin_lock_init(&xfs_dabuf_global_lock); #endif /* @@ -152,18 +147,12 @@ xfs_init(void) return 0; } -void +void __exit xfs_cleanup(void) { - extern kmem_zone_t *xfs_bmap_free_item_zone; - extern kmem_zone_t *xfs_btree_cur_zone; extern kmem_zone_t *xfs_inode_zone; - extern kmem_zone_t *xfs_trans_zone; - extern kmem_zone_t *xfs_da_state_zone; - extern kmem_zone_t *xfs_dabuf_zone; extern kmem_zone_t *xfs_efd_zone; extern kmem_zone_t *xfs_efi_zone; - extern kmem_zone_t *xfs_buf_item_zone; extern kmem_zone_t *xfs_icluster_zone; xfs_cleanup_procfs(); @@ -449,8 +438,6 @@ xfs_mount( if (error) return error; - mp->m_io_ops = xfs_iocore_xfs; - if (args->flags & XFSMNT_QUIET) flags |= XFS_MFSI_QUIET; @@ -544,7 +531,7 @@ xfs_mount( if ((error = xfs_filestream_mount(mp))) goto error2; - error = XFS_IOINIT(mp, args, flags); + error = xfs_mountfs(mp, flags); if (error) goto error2; @@ -694,7 +681,7 @@ xfs_quiesce_fs( * care of the metadata. New transactions are already blocked, so we need to * wait for any remaining transactions to drain out before proceding. */ -STATIC void +void xfs_attr_quiesce( xfs_mount_t *mp) { @@ -821,80 +808,6 @@ fscorrupt_out2: } /* - * xfs_root extracts the root vnode from a vfs. - * - * vfsp -- the vfs struct for the desired file system - * vpp -- address of the caller's vnode pointer which should be - * set to the desired fs root vnode - */ -int -xfs_root( - xfs_mount_t *mp, - bhv_vnode_t **vpp) -{ - bhv_vnode_t *vp; - - vp = XFS_ITOV(mp->m_rootip); - VN_HOLD(vp); - *vpp = vp; - return 0; -} - -/* - * xfs_statvfs - * - * Fill in the statvfs structure for the given file system. We use - * the superblock lock in the mount structure to ensure a consistent - * snapshot of the counters returned. - */ -int -xfs_statvfs( - xfs_mount_t *mp, - bhv_statvfs_t *statp, - bhv_vnode_t *vp) -{ - __uint64_t fakeinos; - xfs_extlen_t lsize; - xfs_sb_t *sbp; - unsigned long s; - - sbp = &(mp->m_sb); - - statp->f_type = XFS_SB_MAGIC; - - xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); - s = XFS_SB_LOCK(mp); - statp->f_bsize = sbp->sb_blocksize; - lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; - statp->f_blocks = sbp->sb_dblocks - lsize; - statp->f_bfree = statp->f_bavail = - sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); - fakeinos = statp->f_bfree << sbp->sb_inopblog; -#if XFS_BIG_INUMS - fakeinos += mp->m_inoadd; -#endif - statp->f_files = - MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); - if (mp->m_maxicount) -#if XFS_BIG_INUMS - if (!mp->m_inoadd) -#endif - statp->f_files = min_t(typeof(statp->f_files), - statp->f_files, - mp->m_maxicount); - statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); - XFS_SB_UNLOCK(mp, s); - - xfs_statvfs_fsid(statp, mp); - statp->f_namelen = MAXNAMELEN - 1; - - if (vp) - XFS_QM_DQSTATVFS(xfs_vtoi(vp), statp); - return 0; -} - - -/* * xfs_sync flushes any pending I/O to file system vfsp. * * This routine is called by vfs_sync() to make sure that things make it @@ -981,8 +894,6 @@ xfs_sync_inodes( int *bypassed) { xfs_inode_t *ip = NULL; - xfs_inode_t *ip_next; - xfs_buf_t *bp; bhv_vnode_t *vp = NULL; int error; int last_error; @@ -992,7 +903,6 @@ xfs_sync_inodes( boolean_t mount_locked; boolean_t vnode_refed; int preempt; - xfs_dinode_t *dip; xfs_iptr_t *ipointer; #ifdef DEBUG boolean_t ipointer_in = B_FALSE; @@ -1045,6 +955,8 @@ xfs_sync_inodes( #define XFS_PREEMPT_MASK 0x7f + ASSERT(!(flags & SYNC_BDFLUSH)); + if (bypassed) *bypassed = 0; if (mp->m_flags & XFS_MOUNT_RDONLY) @@ -1057,7 +969,7 @@ xfs_sync_inodes( ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP); fflag = XFS_B_ASYNC; /* default is don't wait */ - if (flags & (SYNC_BDFLUSH | SYNC_DELWRI)) + if (flags & SYNC_DELWRI) fflag = XFS_B_DELWRI; if (flags & SYNC_WAIT) fflag = 0; /* synchronous overrides all */ @@ -1147,24 +1059,6 @@ xfs_sync_inodes( } /* - * If this is just vfs_sync() or pflushd() calling - * then we can skip inodes for which it looks like - * there is nothing to do. Since we don't have the - * inode locked this is racy, but these are periodic - * calls so it doesn't matter. For the others we want - * to know for sure, so we at least try to lock them. - */ - if (flags & SYNC_BDFLUSH) { - if (((ip->i_itemp == NULL) || - !(ip->i_itemp->ili_format.ilf_fields & - XFS_ILOG_ALL)) && - (ip->i_update_core == 0)) { - ip = ip->i_mnext; - continue; - } - } - - /* * Try to lock without sleeping. We're out of order with * the inode list lock here, so if we fail we need to drop * the mount lock and try again. If we're called from @@ -1181,7 +1075,7 @@ xfs_sync_inodes( * it. */ if (xfs_ilock_nowait(ip, lock_flags) == 0) { - if ((flags & SYNC_BDFLUSH) || (vp == NULL)) { + if (vp == NULL) { ip = ip->i_mnext; continue; } @@ -1242,160 +1136,27 @@ xfs_sync_inodes( xfs_ilock(ip, XFS_ILOCK_SHARED); } - if (flags & SYNC_BDFLUSH) { - if ((flags & SYNC_ATTR) && - ((ip->i_update_core) || - ((ip->i_itemp != NULL) && - (ip->i_itemp->ili_format.ilf_fields != 0)))) { - - /* Insert marker and drop lock if not already - * done. - */ - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } - - /* - * We don't want the periodic flushing of the - * inodes by vfs_sync() to interfere with - * I/O to the file, especially read I/O - * where it is only the access time stamp - * that is being flushed out. To prevent - * long periods where we have both inode - * locks held shared here while reading the - * inode's buffer in from disk, we drop the - * inode lock while reading in the inode - * buffer. We have to release the buffer - * and reacquire the inode lock so that they - * are acquired in the proper order (inode - * locks first). The buffer will go at the - * end of the lru chain, though, so we can - * expect it to still be there when we go - * for it again in xfs_iflush(). - */ - if ((xfs_ipincount(ip) == 0) && - xfs_iflock_nowait(ip)) { - - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - error = xfs_itobp(mp, NULL, ip, - &dip, &bp, 0, 0); - if (!error) { - xfs_buf_relse(bp); - } else { - /* Bailing out, remove the - * marker and free it. - */ - XFS_MOUNT_ILOCK(mp); - IPOINTER_REMOVE(ip, mp); - XFS_MOUNT_IUNLOCK(mp); - - ASSERT(!(lock_flags & - XFS_IOLOCK_SHARED)); - - kmem_free(ipointer, - sizeof(xfs_iptr_t)); - return (0); - } - - /* - * Since we dropped the inode lock, - * the inode may have been reclaimed. - * Therefore, we reacquire the mount - * lock and check to see if we were the - * inode reclaimed. If this happened - * then the ipointer marker will no - * longer point back at us. In this - * case, move ip along to the inode - * after the marker, remove the marker - * and continue. - */ - XFS_MOUNT_ILOCK(mp); - mount_locked = B_TRUE; - - if (ip != ipointer->ip_mprev) { - IPOINTER_REMOVE(ip, mp); - - ASSERT(!vnode_refed); - ASSERT(!(lock_flags & - XFS_IOLOCK_SHARED)); - continue; - } - - ASSERT(ip->i_mount == mp); - - if (xfs_ilock_nowait(ip, - XFS_ILOCK_SHARED) == 0) { - ASSERT(ip->i_mount == mp); - /* - * We failed to reacquire - * the inode lock without - * sleeping, so just skip - * the inode for now. We - * clear the ILOCK bit from - * the lock_flags so that we - * won't try to drop a lock - * we don't hold below. - */ - lock_flags &= ~XFS_ILOCK_SHARED; - IPOINTER_REMOVE(ip_next, mp); - } else if ((xfs_ipincount(ip) == 0) && - xfs_iflock_nowait(ip)) { - ASSERT(ip->i_mount == mp); - /* - * Since this is vfs_sync() - * calling we only flush the - * inode out if we can lock - * it without sleeping and - * it is not pinned. Drop - * the mount lock here so - * that we don't hold it for - * too long. We already have - * a marker in the list here. - */ - XFS_MOUNT_IUNLOCK(mp); - mount_locked = B_FALSE; - error = xfs_iflush(ip, - XFS_IFLUSH_DELWRI); - } else { - ASSERT(ip->i_mount == mp); - IPOINTER_REMOVE(ip_next, mp); - } - } - - } + if ((flags & SYNC_ATTR) && + (ip->i_update_core || + (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) { + if (mount_locked) + IPOINTER_INSERT(ip, mp); - } else { - if ((flags & SYNC_ATTR) && - ((ip->i_update_core) || - ((ip->i_itemp != NULL) && - (ip->i_itemp->ili_format.ilf_fields != 0)))) { - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } + if (flags & SYNC_WAIT) { + xfs_iflock(ip); + error = xfs_iflush(ip, XFS_IFLUSH_SYNC); - if (flags & SYNC_WAIT) { - xfs_iflock(ip); - error = xfs_iflush(ip, - XFS_IFLUSH_SYNC); - } else { - /* - * If we can't acquire the flush - * lock, then the inode is already - * being flushed so don't bother - * waiting. If we can lock it then - * do a delwri flush so we can - * combine multiple inode flushes - * in each disk write. - */ - if (xfs_iflock_nowait(ip)) { - error = xfs_iflush(ip, - XFS_IFLUSH_DELWRI); - } - else if (bypassed) - (*bypassed)++; - } + /* + * If we can't acquire the flush lock, then the inode + * is already being flushed so don't bother waiting. + * + * If we can lock it then do a delwri flush so we can + * combine multiple inode flushes in each disk write. + */ + } else if (xfs_iflock_nowait(ip)) { + error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); + } else if (bypassed) { + (*bypassed)++; } } @@ -1627,499 +1388,3 @@ xfs_syncsub( return XFS_ERROR(last_error); } - -/* - * xfs_vget - called by DMAPI and NFSD to get vnode from file handle - */ -int -xfs_vget( - xfs_mount_t *mp, - bhv_vnode_t **vpp, - xfs_fid_t *xfid) -{ - xfs_inode_t *ip; - int error; - xfs_ino_t ino; - unsigned int igen; - - /* - * Invalid. Since handles can be created in user space and passed in - * via gethandle(), this is not cause for a panic. - */ - if (xfid->fid_len != sizeof(*xfid) - sizeof(xfid->fid_len)) - return XFS_ERROR(EINVAL); - - ino = xfid->fid_ino; - igen = xfid->fid_gen; - - /* - * NFS can sometimes send requests for ino 0. Fail them gracefully. - */ - if (ino == 0) - return XFS_ERROR(ESTALE); - - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); - if (error) { - *vpp = NULL; - return error; - } - - if (ip == NULL) { - *vpp = NULL; - return XFS_ERROR(EIO); - } - - if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { - xfs_iput_new(ip, XFS_ILOCK_SHARED); - *vpp = NULL; - return XFS_ERROR(ENOENT); - } - - *vpp = XFS_ITOV(ip); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return 0; -} - - -#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ -#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ -#define MNTOPT_LOGDEV "logdev" /* log device */ -#define MNTOPT_RTDEV "rtdev" /* realtime I/O device */ -#define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */ -#define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */ -#define MNTOPT_INO64 "ino64" /* force inodes into 64-bit range */ -#define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */ -#define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */ -#define MNTOPT_SUNIT "sunit" /* data volume stripe unit */ -#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ -#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ -#define MNTOPT_MTPT "mtpt" /* filesystem mount point */ -#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */ -#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */ -#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ -#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ -#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ -#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ -#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and - * unwritten extent conversion */ -#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ -#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ -#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ -#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ -#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ -#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ -#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes - * in stat(). */ -#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ -#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ -#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */ -#define MNTOPT_QUOTA "quota" /* disk quotas (user) */ -#define MNTOPT_NOQUOTA "noquota" /* no quotas */ -#define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */ -#define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */ -#define MNTOPT_PRJQUOTA "prjquota" /* project quota enabled */ -#define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */ -#define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */ -#define MNTOPT_PQUOTA "pquota" /* project quota (IRIX variant) */ -#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */ -#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ -#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ -#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ -#define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ -#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ -#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ - -STATIC unsigned long -suffix_strtoul(char *s, char **endp, unsigned int base) -{ - int last, shift_left_factor = 0; - char *value = s; - - last = strlen(value) - 1; - if (value[last] == 'K' || value[last] == 'k') { - shift_left_factor = 10; - value[last] = '\0'; - } - if (value[last] == 'M' || value[last] == 'm') { - shift_left_factor = 20; - value[last] = '\0'; - } - if (value[last] == 'G' || value[last] == 'g') { - shift_left_factor = 30; - value[last] = '\0'; - } - - return simple_strtoul((const char *)s, endp, base) << shift_left_factor; -} - -int -xfs_parseargs( - struct xfs_mount *mp, - char *options, - struct xfs_mount_args *args, - int update) -{ - char *this_char, *value, *eov; - int dsunit, dswidth, vol_dsunit, vol_dswidth; - int iosize; - int ikeep = 0; - - args->flags |= XFSMNT_BARRIER; - args->flags2 |= XFSMNT2_COMPAT_IOSIZE; - - if (!options) - goto done; - - iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0; - - while ((this_char = strsep(&options, ",")) != NULL) { - if (!*this_char) - continue; - if ((value = strchr(this_char, '=')) != NULL) - *value++ = 0; - - if (!strcmp(this_char, MNTOPT_LOGBUFS)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - args->logbufs = simple_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - args->logbufsize = suffix_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - strncpy(args->logname, value, MAXNAMELEN); - } else if (!strcmp(this_char, MNTOPT_MTPT)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - strncpy(args->mtpt, value, MAXNAMELEN); - } else if (!strcmp(this_char, MNTOPT_RTDEV)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - strncpy(args->rtname, value, MAXNAMELEN); - } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - iosize = simple_strtoul(value, &eov, 10); - args->flags |= XFSMNT_IOSIZE; - args->iosizelog = (uint8_t) iosize; - } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - iosize = suffix_strtoul(value, &eov, 10); - args->flags |= XFSMNT_IOSIZE; - args->iosizelog = ffs(iosize) - 1; - } else if (!strcmp(this_char, MNTOPT_GRPID) || - !strcmp(this_char, MNTOPT_BSDGROUPS)) { - mp->m_flags |= XFS_MOUNT_GRPID; - } else if (!strcmp(this_char, MNTOPT_NOGRPID) || - !strcmp(this_char, MNTOPT_SYSVGROUPS)) { - mp->m_flags &= ~XFS_MOUNT_GRPID; - } else if (!strcmp(this_char, MNTOPT_WSYNC)) { - args->flags |= XFSMNT_WSYNC; - } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { - args->flags |= XFSMNT_OSYNCISOSYNC; - } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { - args->flags |= XFSMNT_NORECOVERY; - } else if (!strcmp(this_char, MNTOPT_INO64)) { - args->flags |= XFSMNT_INO64; -#if !XFS_BIG_INUMS - cmn_err(CE_WARN, - "XFS: %s option not allowed on this system", - this_char); - return EINVAL; -#endif - } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { - args->flags |= XFSMNT_NOALIGN; - } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { - args->flags |= XFSMNT_SWALLOC; - } else if (!strcmp(this_char, MNTOPT_SUNIT)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - dsunit = simple_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - dswidth = simple_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { - args->flags &= ~XFSMNT_32BITINODES; -#if !XFS_BIG_INUMS - cmn_err(CE_WARN, - "XFS: %s option not allowed on this system", - this_char); - return EINVAL; -#endif - } else if (!strcmp(this_char, MNTOPT_NOUUID)) { - args->flags |= XFSMNT_NOUUID; - } else if (!strcmp(this_char, MNTOPT_BARRIER)) { - args->flags |= XFSMNT_BARRIER; - } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { - args->flags &= ~XFSMNT_BARRIER; - } else if (!strcmp(this_char, MNTOPT_IKEEP)) { - ikeep = 1; - args->flags &= ~XFSMNT_IDELETE; - } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { - args->flags |= XFSMNT_IDELETE; - } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { - args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE; - } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { - args->flags2 |= XFSMNT2_COMPAT_IOSIZE; - } else if (!strcmp(this_char, MNTOPT_ATTR2)) { - args->flags |= XFSMNT_ATTR2; - } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { - args->flags &= ~XFSMNT_ATTR2; - } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { - args->flags2 |= XFSMNT2_FILESTREAMS; - } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { - args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA); - args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA); - } else if (!strcmp(this_char, MNTOPT_QUOTA) || - !strcmp(this_char, MNTOPT_UQUOTA) || - !strcmp(this_char, MNTOPT_USRQUOTA)) { - args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF; - } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || - !strcmp(this_char, MNTOPT_UQUOTANOENF)) { - args->flags |= XFSMNT_UQUOTA; - args->flags &= ~XFSMNT_UQUOTAENF; - } else if (!strcmp(this_char, MNTOPT_PQUOTA) || - !strcmp(this_char, MNTOPT_PRJQUOTA)) { - args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF; - } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { - args->flags |= XFSMNT_PQUOTA; - args->flags &= ~XFSMNT_PQUOTAENF; - } else if (!strcmp(this_char, MNTOPT_GQUOTA) || - !strcmp(this_char, MNTOPT_GRPQUOTA)) { - args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF; - } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { - args->flags |= XFSMNT_GQUOTA; - args->flags &= ~XFSMNT_GQUOTAENF; - } else if (!strcmp(this_char, MNTOPT_DMAPI)) { - args->flags |= XFSMNT_DMAPI; - } else if (!strcmp(this_char, MNTOPT_XDSM)) { - args->flags |= XFSMNT_DMAPI; - } else if (!strcmp(this_char, MNTOPT_DMI)) { - args->flags |= XFSMNT_DMAPI; - } else if (!strcmp(this_char, "ihashsize")) { - cmn_err(CE_WARN, - "XFS: ihashsize no longer used, option is deprecated."); - } else if (!strcmp(this_char, "osyncisdsync")) { - /* no-op, this is now the default */ - cmn_err(CE_WARN, - "XFS: osyncisdsync is now the default, option is deprecated."); - } else if (!strcmp(this_char, "irixsgid")) { - cmn_err(CE_WARN, - "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); - } else { - cmn_err(CE_WARN, - "XFS: unknown mount option [%s].", this_char); - return EINVAL; - } - } - - if (args->flags & XFSMNT_NORECOVERY) { - if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) { - cmn_err(CE_WARN, - "XFS: no-recovery mounts must be read-only."); - return EINVAL; - } - } - - if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { - cmn_err(CE_WARN, - "XFS: sunit and swidth options incompatible with the noalign option"); - return EINVAL; - } - - if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) { - cmn_err(CE_WARN, - "XFS: cannot mount with both project and group quota"); - return EINVAL; - } - - if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') { - printk("XFS: %s option needs the mount point option as well\n", - MNTOPT_DMAPI); - return EINVAL; - } - - if ((dsunit && !dswidth) || (!dsunit && dswidth)) { - cmn_err(CE_WARN, - "XFS: sunit and swidth must be specified together"); - return EINVAL; - } - - if (dsunit && (dswidth % dsunit != 0)) { - cmn_err(CE_WARN, - "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", - dswidth, dsunit); - return EINVAL; - } - - /* - * Applications using DMI filesystems often expect the - * inode generation number to be monotonically increasing. - * If we delete inode chunks we break this assumption, so - * keep unused inode chunks on disk for DMI filesystems - * until we come up with a better solution. - * Note that if "ikeep" or "noikeep" mount options are - * supplied, then they are honored. - */ - if (!(args->flags & XFSMNT_DMAPI) && !ikeep) - args->flags |= XFSMNT_IDELETE; - - if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { - if (dsunit) { - args->sunit = dsunit; - args->flags |= XFSMNT_RETERR; - } else { - args->sunit = vol_dsunit; - } - dswidth ? (args->swidth = dswidth) : - (args->swidth = vol_dswidth); - } else { - args->sunit = args->swidth = 0; - } - -done: - if (args->flags & XFSMNT_32BITINODES) - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - if (args->flags2) - args->flags |= XFSMNT_FLAGS2; - return 0; -} - -int -xfs_showargs( - struct xfs_mount *mp, - struct seq_file *m) -{ - static struct proc_xfs_info { - int flag; - char *str; - } xfs_info[] = { - /* the few simple ones we can get from the mount struct */ - { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC }, - { XFS_MOUNT_INO64, "," MNTOPT_INO64 }, - { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN }, - { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, - { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, - { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, - { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC }, - { 0, NULL } - }; - struct proc_xfs_info *xfs_infop; - - for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) { - if (mp->m_flags & xfs_infop->flag) - seq_puts(m, xfs_infop->str); - } - - if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) - seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", - (int)(1 << mp->m_writeio_log) >> 10); - - if (mp->m_logbufs > 0) - seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs); - if (mp->m_logbsize > 0) - seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); - - if (mp->m_logname) - seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); - if (mp->m_rtname) - seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); - - if (mp->m_dalign > 0) - seq_printf(m, "," MNTOPT_SUNIT "=%d", - (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); - if (mp->m_swidth > 0) - seq_printf(m, "," MNTOPT_SWIDTH "=%d", - (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); - - if (!(mp->m_flags & XFS_MOUNT_IDELETE)) - seq_printf(m, "," MNTOPT_IKEEP); - if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)) - seq_printf(m, "," MNTOPT_LARGEIO); - - if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS)) - seq_printf(m, "," MNTOPT_64BITINODE); - if (mp->m_flags & XFS_MOUNT_GRPID) - seq_printf(m, "," MNTOPT_GRPID); - - if (mp->m_qflags & XFS_UQUOTA_ACCT) { - if (mp->m_qflags & XFS_UQUOTA_ENFD) - seq_puts(m, "," MNTOPT_USRQUOTA); - else - seq_puts(m, "," MNTOPT_UQUOTANOENF); - } - - if (mp->m_qflags & XFS_PQUOTA_ACCT) { - if (mp->m_qflags & XFS_OQUOTA_ENFD) - seq_puts(m, "," MNTOPT_PRJQUOTA); - else - seq_puts(m, "," MNTOPT_PQUOTANOENF); - } - - if (mp->m_qflags & XFS_GQUOTA_ACCT) { - if (mp->m_qflags & XFS_OQUOTA_ENFD) - seq_puts(m, "," MNTOPT_GRPQUOTA); - else - seq_puts(m, "," MNTOPT_GQUOTANOENF); - } - - if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) - seq_puts(m, "," MNTOPT_NOQUOTA); - - if (mp->m_flags & XFS_MOUNT_DMAPI) - seq_puts(m, "," MNTOPT_DMAPI); - return 0; -} - -/* - * Second stage of a freeze. The data is already frozen so we only - * need to take care of themetadata. Once that's done write a dummy - * record to dirty the log in case of a crash while frozen. - */ -void -xfs_freeze( - xfs_mount_t *mp) -{ - xfs_attr_quiesce(mp); - xfs_fs_log_dummy(mp); -} diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h index a592fe02a33..1688817c55e 100644 --- a/fs/xfs/xfs_vfsops.h +++ b/fs/xfs/xfs_vfsops.h @@ -13,16 +13,9 @@ int xfs_mount(struct xfs_mount *mp, struct xfs_mount_args *args, int xfs_unmount(struct xfs_mount *mp, int flags, struct cred *credp); int xfs_mntupdate(struct xfs_mount *mp, int *flags, struct xfs_mount_args *args); -int xfs_root(struct xfs_mount *mp, bhv_vnode_t **vpp); -int xfs_statvfs(struct xfs_mount *mp, struct kstatfs *statp, - bhv_vnode_t *vp); int xfs_sync(struct xfs_mount *mp, int flags); -int xfs_vget(struct xfs_mount *mp, bhv_vnode_t **vpp, struct xfs_fid *xfid); -int xfs_parseargs(struct xfs_mount *mp, char *options, - struct xfs_mount_args *args, int update); -int xfs_showargs(struct xfs_mount *mp, struct seq_file *m); -void xfs_freeze(struct xfs_mount *mp); void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, int lnnum); +void xfs_attr_quiesce(struct xfs_mount *mp); #endif /* _XFS_VFSOPS_H */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index efd5aff9eaf..51305242ff8 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -88,7 +88,7 @@ xfs_getattr( bhv_vnode_t *vp = XFS_ITOV(ip); xfs_mount_t *mp = ip->i_mount; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -136,7 +136,7 @@ xfs_getattr( default: vap->va_rdev = 0; - if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { + if (!(XFS_IS_REALTIME_INODE(ip))) { vap->va_blocksize = xfs_preferred_iosize(mp); } else { @@ -228,7 +228,7 @@ xfs_setattr( int file_owner; int need_iolock = 1; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); @@ -508,7 +508,7 @@ xfs_setattr( */ if ((ip->i_d.di_nextents || ip->i_delayed_blks) && (mask & XFS_AT_XFLAGS) && - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != + (XFS_IS_REALTIME_INODE(ip)) != (vap->va_xflags & XFS_XFLAG_REALTIME)) { code = XFS_ERROR(EINVAL); /* EFBIG? */ goto error_return; @@ -520,7 +520,7 @@ xfs_setattr( if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) { xfs_extlen_t size; - if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) || + if (XFS_IS_REALTIME_INODE(ip) || ((mask & XFS_AT_XFLAGS) && (vap->va_xflags & XFS_XFLAG_REALTIME))) { size = mp->m_sb.sb_rextsize << @@ -804,12 +804,8 @@ xfs_setattr( if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT) di_flags |= XFS_DIFLAG_EXTSZINHERIT; } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { - if (vap->va_xflags & XFS_XFLAG_REALTIME) { + if (vap->va_xflags & XFS_XFLAG_REALTIME) di_flags |= XFS_DIFLAG_REALTIME; - ip->i_iocore.io_flags |= XFS_IOCORE_RT; - } else { - ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; - } if (vap->va_xflags & XFS_XFLAG_EXTSIZE) di_flags |= XFS_DIFLAG_EXTSIZE; } @@ -902,28 +898,6 @@ xfs_setattr( return code; } - -/* - * xfs_access - * Null conversion from vnode mode bits to inode mode bits, as in efs. - */ -int -xfs_access( - xfs_inode_t *ip, - int mode, - cred_t *credp) -{ - int error; - - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); - - xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_iaccess(ip, mode, credp); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return error; -} - - /* * The maximum pathlen is 1024 bytes. Since the minimum file system * blocksize is 512 bytes, we can get a max of 2 extents back from @@ -987,7 +961,7 @@ xfs_readlink( int pathlen; int error = 0; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -1033,7 +1007,7 @@ xfs_fsync( int error; int log_flushed = 0, changed = 1; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); ASSERT(start >= 0 && stop >= -1); @@ -1149,7 +1123,7 @@ xfs_fsync( * If this inode is on the RT dev we need to flush that * cache as well. */ - if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) + if (XFS_IS_REALTIME_INODE(ip)) xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); } @@ -1188,7 +1162,7 @@ xfs_free_eofblocks( nimaps = 1; xfs_ilock(ip, XFS_ILOCK_SHARED); - error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0, + error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, NULL, 0, &imap, &nimaps, NULL, NULL); xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -1562,9 +1536,6 @@ xfs_release( error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); if (error) return error; - /* Update linux inode block count after free above */ - vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, - ip->i_d.di_nblocks + ip->i_delayed_blks); } } @@ -1592,7 +1563,7 @@ xfs_inactive( int error; int truncate; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); /* * If the inode is already free, then there can be nothing @@ -1638,9 +1609,6 @@ xfs_inactive( error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); if (error) return VN_INACTIVE_CACHE; - /* Update linux inode block count after free above */ - vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, - ip->i_d.di_nblocks + ip->i_delayed_blks); } goto out; } @@ -1805,7 +1773,7 @@ xfs_lookup( int error; uint lock_mode; - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return XFS_ERROR(EIO); @@ -1814,7 +1782,7 @@ xfs_lookup( error = xfs_dir_lookup_int(dp, lock_mode, dentry, &e_inum, &ip); if (!error) { *vpp = XFS_ITOV(ip); - ITRACE(ip); + xfs_itrace_ref(ip); } xfs_iunlock_map_shared(dp, lock_mode); return error; @@ -1848,7 +1816,7 @@ xfs_create( int namelen; ASSERT(!*vpp); - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); namelen = VNAMELEN(dentry); @@ -1930,7 +1898,7 @@ xfs_create( goto error_return; goto abort_return; } - ITRACE(ip); + xfs_itrace_ref(ip); /* * At this point, we've gotten a newly allocated inode. @@ -2098,7 +2066,7 @@ again: e_inum = ip->i_ino; - ITRACE(ip); + xfs_itrace_ref(ip); /* * We want to lock in increasing inum. Since we've already @@ -2321,7 +2289,7 @@ xfs_remove( uint resblks; int namelen; - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -2364,9 +2332,8 @@ xfs_remove( dm_di_mode = ip->i_d.di_mode; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); - - ITRACE(ip); + xfs_itrace_entry(ip); + xfs_itrace_ref(ip); error = XFS_QM_DQATTACH(mp, dp, 0); if (!error && dp != ip) @@ -2498,8 +2465,7 @@ xfs_remove( if (link_zero && xfs_inode_is_filestream(ip)) xfs_filestream_deassociate(ip); - vn_trace_exit(ip, __FUNCTION__, (inst_t *)__return_address); - + xfs_itrace_exit(ip); IRELE(ip); /* Fall through to std_return with error = 0 */ @@ -2562,8 +2528,8 @@ xfs_link( char *target_name = VNAME(dentry); int target_namelen; - vn_trace_entry(tdp, __FUNCTION__, (inst_t *)__return_address); - vn_trace_entry(xfs_vtoi(src_vp), __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(tdp); + xfs_itrace_entry(xfs_vtoi(src_vp)); target_namelen = VNAMELEN(dentry); ASSERT(!VN_ISDIR(src_vp)); @@ -2744,7 +2710,7 @@ xfs_mkdir( /* Return through std_return after this point. */ - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); mp = dp->i_mount; udqp = gdqp = NULL; @@ -2810,7 +2776,7 @@ xfs_mkdir( goto error_return; goto abort_return; } - ITRACE(cdp); + xfs_itrace_ref(cdp); /* * Now we add the directory inode to the transaction. @@ -2936,7 +2902,7 @@ xfs_rmdir( int last_cdp_link; uint resblks; - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(dp); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -3041,7 +3007,7 @@ xfs_rmdir( VN_HOLD(dir_vp); } - ITRACE(cdp); + xfs_itrace_ref(cdp); xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); ASSERT(cdp->i_d.di_nlink >= 2); @@ -3189,8 +3155,7 @@ xfs_symlink( ip = NULL; tp = NULL; - vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); - + xfs_itrace_entry(dp); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -3317,7 +3282,7 @@ xfs_symlink( goto error_return; goto error1; } - ITRACE(ip); + xfs_itrace_ref(ip); /* * An error after we've joined dp to the transaction will result in the @@ -3465,27 +3430,6 @@ std_return: goto std_return; } - -int -xfs_fid2( - xfs_inode_t *ip, - xfs_fid_t *xfid) -{ - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); - - xfid->fid_len = sizeof(xfs_fid_t) - sizeof(xfid->fid_len); - xfid->fid_pad = 0; - /* - * use memcpy because the inode is a long long and there's no - * assurance that xfid->fid_ino is properly aligned. - */ - memcpy(&xfid->fid_ino, &ip->i_ino, sizeof(xfid->fid_ino)); - xfid->fid_gen = ip->i_d.di_gen; - - return 0; -} - - int xfs_rwlock( xfs_inode_t *ip, @@ -3558,11 +3502,11 @@ xfs_inode_flush( if (iip && iip->ili_last_lsn) { xlog_t *log = mp->m_log; xfs_lsn_t sync_lsn; - int s, log_flags = XFS_LOG_FORCE; + int log_flags = XFS_LOG_FORCE; - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); sync_lsn = log->l_last_sync_lsn; - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) { if (flags & FLUSH_SYNC) @@ -3637,8 +3581,8 @@ xfs_set_dmattrs( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - ip->i_iocore.io_dmevmask = ip->i_d.di_dmevmask = evmask; - ip->i_iocore.io_dmstate = ip->i_d.di_dmstate = state; + ip->i_d.di_dmevmask = evmask; + ip->i_d.di_dmstate = state; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); IHOLD(ip); @@ -3653,7 +3597,7 @@ xfs_reclaim( { bhv_vnode_t *vp = XFS_ITOV(ip); - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); ASSERT(!VN_MAPPED(vp)); @@ -3871,7 +3815,7 @@ xfs_alloc_file_space( int committed; int error; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -3976,7 +3920,7 @@ retry: * Issue the xfs_bmapi() call to allocate the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, + error = xfs_bmapi(tp, ip, startoffset_fsb, allocatesize_fsb, bmapi_flag, &firstfsb, 0, imapp, &nimaps, &free_list, NULL); @@ -4052,13 +3996,13 @@ xfs_zero_remaining_bytes( int error = 0; bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, - ip->i_d.di_flags & XFS_DIFLAG_REALTIME ? + XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp); for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; - error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0, + error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error || nimap < 1) break; @@ -4141,7 +4085,7 @@ xfs_free_file_space( vp = XFS_ITOV(ip); mp = ip->i_mount; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; @@ -4149,7 +4093,7 @@ xfs_free_file_space( error = 0; if (len <= 0) /* if nothing being freed */ return error; - rt = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME); + rt = XFS_IS_REALTIME_INODE(ip); startoffset_fsb = XFS_B_TO_FSB(mp, offset); end_dmi_offset = offset + len; endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); @@ -4172,15 +4116,12 @@ xfs_free_file_space( vn_iowait(ip); /* wait for the completion of any pending DIOs */ } - rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP); + rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); ioffset = offset & ~(rounding - 1); if (VN_CACHED(vp) != 0) { - xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, - ctooff(offtoct(ioffset)), -1); - error = xfs_flushinval_pages(ip, - ctooff(offtoct(ioffset)), - -1, FI_REMAPF_LOCKED); + xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1); + error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); if (error) goto out_unlock_iolock; } @@ -4193,7 +4134,7 @@ xfs_free_file_space( */ if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { nimap = 1; - error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb, + error = xfs_bmapi(NULL, ip, startoffset_fsb, 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) goto out_unlock_iolock; @@ -4208,7 +4149,7 @@ xfs_free_file_space( startoffset_fsb += mp->m_sb.sb_rextsize - mod; } nimap = 1; - error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1, + error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) goto out_unlock_iolock; @@ -4284,7 +4225,7 @@ xfs_free_file_space( * issue the bunmapi() call to free the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, + error = xfs_bunmapi(tp, ip, startoffset_fsb, endoffset_fsb - startoffset_fsb, 0, 2, &firstfsb, &free_list, NULL, &done); if (error) { @@ -4347,23 +4288,11 @@ xfs_change_file_space( xfs_trans_t *tp; bhv_vattr_t va; - vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); + xfs_itrace_entry(ip); - /* - * must be a regular file and have write permission - */ if (!S_ISREG(ip->i_d.di_mode)) return XFS_ERROR(EINVAL); - xfs_ilock(ip, XFS_ILOCK_SHARED); - - if ((error = xfs_iaccess(ip, S_IWUSR, credp))) { - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return error; - } - - xfs_iunlock(ip, XFS_ILOCK_SHARED); - switch (bf->l_whence) { case 0: /*SEEK_SET*/ break; diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index b7e461c40cf..4e3970f0e5e 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -18,7 +18,6 @@ int xfs_open(struct xfs_inode *ip); int xfs_getattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags); int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, struct cred *credp); -int xfs_access(struct xfs_inode *ip, int mode, struct cred *credp); int xfs_readlink(struct xfs_inode *ip, char *link); int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start, xfs_off_t stop); @@ -39,7 +38,6 @@ int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry, char *target_path, mode_t mode, bhv_vnode_t **vpp, struct cred *credp); -int xfs_fid2(struct xfs_inode *ip, struct xfs_fid *xfid); int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype); void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype); int xfs_inode_flush(struct xfs_inode *ip, int flags); |