diff options
author | Jeff Garzik <jeff@garzik.org> | 2006-09-30 23:45:41 -0400 |
---|---|---|
committer | Jeff Garzik <jeff@garzik.org> | 2006-09-30 23:45:41 -0400 |
commit | 1c7da74c4aab595a994beb5fe728ebf0d0b41f59 (patch) | |
tree | 64128abdf9550ebb51d8f3ee6732d7350b9c62f2 /fs | |
parent | aebb1153ac54ddbbd3d3f0481a193f4bf0ead53b (diff) | |
parent | 1bdfd554be94def718323659173517c5d4a69d25 (diff) |
Merge branch 'master' into upstream
Diffstat (limited to 'fs')
147 files changed, 2363 insertions, 1730 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 22f7ccd58d3..0f628041e3f 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -460,8 +460,10 @@ static int __init init_v9fs(void) ret = v9fs_mux_global_init(); if (!ret) - ret = register_filesystem(&v9fs_fs_type); - + return ret; + ret = register_filesystem(&v9fs_fs_type); + if (!ret) + v9fs_mux_global_exit(); return ret; } diff --git a/fs/Kconfig b/fs/Kconfig index d311198bba4..1453d2d164f 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -4,6 +4,8 @@ menu "File systems" +if BLOCK + config EXT2_FS tristate "Second extended fs support" help @@ -399,6 +401,8 @@ config ROMFS_FS If you don't know whether you need it, then you don't need it: answer N. +endif + config INOTIFY bool "Inotify file change notification support" default y @@ -530,6 +534,7 @@ config FUSE_FS If you want to develop a userspace FS, or if you want to use a filesystem based on FUSE, answer Y or M. +if BLOCK menu "CD-ROM/DVD Filesystems" config ISO9660_FS @@ -597,7 +602,9 @@ config UDF_NLS depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y) endmenu +endif +if BLOCK menu "DOS/FAT/NT Filesystems" config FAT_FS @@ -782,6 +789,7 @@ config NTFS_RW It is perfectly safe to say N here. endmenu +endif menu "Pseudo filesystems" @@ -881,6 +889,19 @@ config TMPFS See <file:Documentation/filesystems/tmpfs.txt> for details. +config TMPFS_POSIX_ACL + bool "Tmpfs POSIX Access Control Lists" + depends on TMPFS + select GENERIC_ACL + help + POSIX Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website <http://acl.bestbits.at/>. + + If you don't know what Access Control Lists are, say N. + config HUGETLBFS bool "HugeTLB file system support" depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN @@ -926,7 +947,7 @@ menu "Miscellaneous filesystems" config ADFS_FS tristate "ADFS file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL help The Acorn Disc Filing System is the standard file system of the RiscOS operating system which runs on Acorn's ARM-based Risc PC @@ -954,7 +975,7 @@ config ADFS_FS_RW config AFFS_FS tristate "Amiga FFS file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL help The Fast File System (FFS) is the common file system used on hard disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y @@ -976,7 +997,7 @@ config AFFS_FS config HFS_FS tristate "Apple Macintosh file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL select NLS help If you say Y here, you will be able to mount Macintosh-formatted @@ -989,6 +1010,7 @@ config HFS_FS config HFSPLUS_FS tristate "Apple Extended HFS file system support" + depends on BLOCK select NLS select NLS_UTF8 help @@ -1002,7 +1024,7 @@ config HFSPLUS_FS config BEFS_FS tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL select NLS help The BeOS File System (BeFS) is the native file system of Be, Inc's @@ -1029,7 +1051,7 @@ config BEFS_DEBUG config BFS_FS tristate "BFS file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL help Boot File System (BFS) is a file system used under SCO UnixWare to allow the bootloader access to the kernel image and other important @@ -1051,7 +1073,7 @@ config BFS_FS config EFS_FS tristate "EFS file system support (read only) (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL help EFS is an older file system used for non-ISO9660 CD-ROMs and hard disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer @@ -1066,7 +1088,7 @@ config EFS_FS config JFFS_FS tristate "Journalling Flash File System (JFFS) support" - depends on MTD + depends on MTD && BLOCK help JFFS is the Journaling Flash File System developed by Axis Communications in Sweden, aimed at providing a crash/powerdown-safe @@ -1251,6 +1273,7 @@ endchoice config CRAMFS tristate "Compressed ROM file system support (cramfs)" + depends on BLOCK select ZLIB_INFLATE help Saying Y here includes support for CramFs (Compressed ROM File @@ -1270,6 +1293,7 @@ config CRAMFS config VXFS_FS tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)" + depends on BLOCK help FreeVxFS is a file system driver that support the VERITAS VxFS(TM) file system format. VERITAS VxFS(TM) is the standard file system @@ -1287,6 +1311,7 @@ config VXFS_FS config HPFS_FS tristate "OS/2 HPFS file system support" + depends on BLOCK help OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS is the file system used for organizing files on OS/2 hard disk @@ -1303,6 +1328,7 @@ config HPFS_FS config QNX4FS_FS tristate "QNX4 file system support (read only)" + depends on BLOCK help This is the file system used by the real-time operating systems QNX 4 and QNX 6 (the latter is also called QNX RTP). @@ -1330,6 +1356,7 @@ config QNX4FS_RW config SYSV_FS tristate "System V/Xenix/V7/Coherent file system support" + depends on BLOCK help SCO, Xenix and Coherent are commercial Unix systems for Intel machines, and Version 7 was used on the DEC PDP-11. Saying Y @@ -1368,6 +1395,7 @@ config SYSV_FS config UFS_FS tristate "UFS file system support (read only)" + depends on BLOCK help BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD, OpenBSD and NeXTstep) use a file system called UFS. Some System V @@ -1940,13 +1968,19 @@ config 9P_FS If unsure, say N. +config GENERIC_ACL + bool + select FS_POSIX_ACL + endmenu +if BLOCK menu "Partition Types" source "fs/partitions/Kconfig" endmenu +endif source "fs/nls/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index 89135428a53..a503e6ce0f3 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -5,12 +5,18 @@ # Rewritten to use lists instead of if-statements. # -obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ - block_dev.o char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ +obj-y := open.o read_write.o file_table.o super.o \ + char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ - seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ - ioprio.o pnode.o drop_caches.o splice.o sync.o + seq_file.o xattr.o libfs.o fs-writeback.o \ + pnode.o drop_caches.o splice.o sync.o + +ifeq ($(CONFIG_BLOCK),y) +obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o +else +obj-y += no-block.o +endif obj-$(CONFIG_INOTIFY) += inotify.o obj-$(CONFIG_INOTIFY_USER) += inotify_user.o @@ -35,6 +41,7 @@ obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o obj-$(CONFIG_NFS_COMMON) += nfs_common/ +obj-$(CONFIG_GENERIC_ACL) += generic_acl.o obj-$(CONFIG_QUOTA) += dquot.o obj-$(CONFIG_QFMT_V1) += quota_v1.o diff --git a/fs/afs/file.c b/fs/afs/file.c index 67d6634101f..2e8c42639ea 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -16,7 +16,6 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> -#include <linux/buffer_head.h> #include "volume.h" #include "vnode.h" #include <rxrpc/call.h> @@ -37,7 +36,6 @@ struct inode_operations afs_file_inode_operations = { const struct address_space_operations afs_fs_aops = { .readpage = afs_file_readpage, - .sync_page = block_sync_page, .set_page_dirty = __set_page_dirty_nobuffers, .releasepage = afs_file_releasepage, .invalidatepage = afs_file_invalidatepage, diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 101d21b6c03..86463ec9ccb 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -775,6 +775,7 @@ static int afs_proc_cell_servers_release(struct inode *inode, * first item */ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) + __acquires(m->private->sv_lock) { struct list_head *_p; struct afs_cell *cell = m->private; @@ -823,6 +824,7 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, * clean up after reading from the cells list */ static void afs_proc_cell_servers_stop(struct seq_file *p, void *v) + __releases(p->private->sv_lock) { struct afs_cell *cell = p->private; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 27e17f96cad..563ef9d7da9 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -281,9 +281,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) DPRINTK("mount done status=%d", status); - if (status && dentry->d_inode) - return status; /* Try to get the kernel to invalidate this dentry */ - /* Turn this into a real negative dentry? */ if (status == -ENOENT) { spin_lock(&dentry->d_lock); @@ -359,7 +356,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) * don't try to mount it again. */ spin_lock(&dcache_lock); - if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { + if (!d_mountpoint(dentry) && __simple_empty(dentry)) { spin_unlock(&dcache_lock); status = try_to_fill_dentry(dentry, 0); @@ -540,6 +537,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s return ERR_PTR(-ERESTARTNOINTR); } } + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; + spin_unlock(&dentry->d_lock); } /* diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index f312103434d..517e111bb7e 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -278,6 +278,13 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) return -ENOEXEC; } + /* + * Requires a mmap handler. This prevents people from using a.out + * as part of an exploit attack against /proc-related vulnerabilities. + */ + if (!bprm->file->f_op || !bprm->file->f_op->mmap) + return -ENOEXEC; + fd_offset = N_TXTOFF(ex); /* Check initial limits. This avoids letting people circumvent @@ -476,6 +483,13 @@ static int load_aout_library(struct file *file) goto out; } + /* + * Requires a mmap handler. This prevents people from using a.out + * as part of an exploit attack against /proc-related vulnerabilities. + */ + if (!file->f_op || !file->f_op->mmap) + goto out; + if (N_FLAGS(ex)) goto out; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index dfd8cfb7fb5..bad52433de6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -46,7 +46,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); static int load_elf_library(struct file *); static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); -extern int dump_fpu (struct pt_regs *, elf_fpregset_t *); #ifndef elf_addr_t #define elf_addr_t unsigned long @@ -1038,10 +1037,8 @@ out_free_interp: out_free_file: sys_close(elf_exec_fileno); out_free_fh: - if (files) { - put_files_struct(current->files); - current->files = files; - } + if (files) + reset_files_struct(current, files); out_free_ph: kfree(elf_phdata); goto out; @@ -1481,20 +1478,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) if (signr) { struct elf_thread_status *tmp; - read_lock(&tasklist_lock); + rcu_read_lock(); do_each_thread(g,p) if (current->mm == p->mm && current != p) { tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); if (!tmp) { - read_unlock(&tasklist_lock); + rcu_read_unlock(); goto cleanup; } - INIT_LIST_HEAD(&tmp->list); tmp->thread = p; list_add(&tmp->list, &thread_list); } while_each_thread(g,p); - read_unlock(&tasklist_lock); + rcu_read_unlock(); list_for_each(t, &thread_list) { struct elf_thread_status *tmp; int sz; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 2f336582922..f86d5c9ce5e 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1597,20 +1597,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, if (signr) { struct elf_thread_status *tmp; - read_lock(&tasklist_lock); + rcu_read_lock(); do_each_thread(g,p) if (current->mm == p->mm && current != p) { tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); if (!tmp) { - read_unlock(&tasklist_lock); + rcu_read_unlock(); goto cleanup; } - INIT_LIST_HEAD(&tmp->list); tmp->thread = p; list_add(&tmp->list, &thread_list); } while_each_thread(g,p); - read_unlock(&tasklist_lock); + rcu_read_unlock(); list_for_each(t, &thread_list) { struct elf_thread_status *tmp; int sz; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 66ba137f866..1713c48fef5 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -215,10 +215,8 @@ _error: bprm->interp_flags = 0; bprm->interp_data = 0; _unshare: - if (files) { - put_files_struct(current->files); - current->files = files; - } + if (files) + reset_files_struct(current, files); goto _ret; } @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001 Jens Axboe <axboe@suse.de> + * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1142,7 +1142,7 @@ static int biovec_create_pools(struct bio_set *bs, int pool_entries, int scale) struct biovec_slab *bp = bvec_slabs + i; mempool_t **bvp = bs->bvec_pools + i; - if (i >= scale) + if (pool_entries > 1 && i >= scale) pool_entries >>= 1; *bvp = mempool_create_slab_pool(pool_entries, bp->slab); diff --git a/fs/block_dev.c b/fs/block_dev.c index 045f98854f1..0c361ea7e5a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -17,11 +17,13 @@ #include <linux/module.h> #include <linux/blkpg.h> #include <linux/buffer_head.h> +#include <linux/writeback.h> #include <linux/mpage.h> #include <linux/mount.h> #include <linux/uio.h> #include <linux/namei.h> #include <asm/uaccess.h> +#include "internal.h" struct bdev_inode { struct block_device bdev; @@ -543,11 +545,11 @@ static struct kobject *bdev_get_holder(struct block_device *bdev) return kobject_get(bdev->bd_disk->holder_dir); } -static void add_symlink(struct kobject *from, struct kobject *to) +static int add_symlink(struct kobject *from, struct kobject *to) { if (!from || !to) - return; - sysfs_create_link(from, to, kobject_name(to)); + return 0; + return sysfs_create_link(from, to, kobject_name(to)); } static void del_symlink(struct kobject *from, struct kobject *to) @@ -648,30 +650,38 @@ static void free_bd_holder(struct bd_holder *bo) * If there is no matching entry with @bo in @bdev->bd_holder_list, * add @bo to the list, create symlinks. * - * Returns 1 if @bo was added to the list. - * Returns 0 if @bo wasn't used by any reason and should be freed. + * Returns 0 if symlinks are created or already there. + * Returns -ve if something fails and @bo can be freed. */ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) { struct bd_holder *tmp; + int ret; if (!bo) - return 0; + return -EINVAL; list_for_each_entry(tmp, &bdev->bd_holder_list, list) { if (tmp->sdir == bo->sdir) { tmp->count++; + /* We've already done what we need to do here. */ + free_bd_holder(bo); return 0; } } if (!bd_holder_grab_dirs(bdev, bo)) - return 0; + return -EBUSY; - add_symlink(bo->sdir, bo->sdev); - add_symlink(bo->hdir, bo->hdev); - list_add_tail(&bo->list, &bdev->bd_holder_list); - return 1; + ret = add_symlink(bo->sdir, bo->sdev); + if (ret == 0) { + ret = add_symlink(bo->hdir, bo->hdev); + if (ret) + del_symlink(bo->sdir, bo->sdev); + } + if (ret == 0) + list_add_tail(&bo->list, &bdev->bd_holder_list); + return ret; } /** @@ -741,7 +751,9 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); res = bd_claim(bdev, holder); - if (res || !add_bd_holder(bdev, bo)) + if (res == 0) + res = add_bd_holder(bdev, bo); + if (res) free_bd_holder(bo); mutex_unlock(&bdev->bd_mutex); @@ -1021,7 +1033,7 @@ do_open(struct block_device *bdev, struct file *file, unsigned int subclass) rescan_partitions(bdev->bd_disk, bdev); } else { mutex_lock_nested(&bdev->bd_contains->bd_mutex, - BD_MUTEX_PARTITION); + BD_MUTEX_WHOLE); bdev->bd_contains->bd_part_count++; mutex_unlock(&bdev->bd_contains->bd_mutex); } @@ -1303,3 +1315,24 @@ void close_bdev_excl(struct block_device *bdev) } EXPORT_SYMBOL(close_bdev_excl); + +int __invalidate_device(struct block_device *bdev) +{ + struct super_block *sb = get_super(bdev); + int res = 0; + + if (sb) { + /* + * no need to lock the super, get_super holds the + * read mutex so the filesystem cannot go away + * under us (->put_super runs with the write lock + * hold). + */ + shrink_dcache_sb(sb); + res = invalidate_inodes(sb); + drop_super(sb); + } + invalidate_bdev(bdev, 0); + return res; +} +EXPORT_SYMBOL(__invalidate_device); diff --git a/fs/buffer.c b/fs/buffer.c index 3b6d701073e..16cfbcd254f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -159,31 +159,6 @@ int sync_blockdev(struct block_device *bdev) } EXPORT_SYMBOL(sync_blockdev); -static void __fsync_super(struct super_block *sb) -{ - sync_inodes_sb(sb, 0); - DQUOT_SYNC(sb); - lock_super(sb); - if (sb->s_dirt && sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); - sync_inodes_sb(sb, 1); -} - -/* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block - * device. Takes the superblock lock. - */ -int fsync_super(struct super_block *sb) -{ - __fsync_super(sb); - return sync_blockdev(sb->s_bdev); -} - /* * Write out and wait upon all dirty data associated with this * device. Filesystem data as well as the underlying block @@ -260,118 +235,6 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb) EXPORT_SYMBOL(thaw_bdev); /* - * sync everything. Start out by waking pdflush, because that writes back - * all queues in parallel. - */ -static void do_sync(unsigned long wait) -{ - wakeup_pdflush(0); - sync_inodes(0); /* All mappings, inodes and their blockdevs */ - DQUOT_SYNC(NULL); - sync_supers(); /* Write the superblocks */ - sync_filesystems(0); /* Start syncing the filesystems */ - sync_filesystems(wait); /* Waitingly sync the filesystems */ - sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */ - if (!wait) - printk("Emergency Sync complete\n"); - if (unlikely(laptop_mode)) - laptop_sync_completion(); -} - -asmlinkage long sys_sync(void) -{ - do_sync(1); - return 0; -} - -void emergency_sync(void) -{ - pdflush_operation(do_sync, 0); -} - -/* - * Generic function to fsync a file. - * - * filp may be NULL if called via the msync of a vma. - */ - -int file_fsync(struct file *filp, struct dentry *dentry, int datasync) -{ - struct inode * inode = dentry->d_inode; - struct super_block * sb; - int ret, err; - - /* sync the inode to buffers */ - ret = write_inode_now(inode, 0); - - /* sync the superblock to buffers */ - sb = inode->i_sb; - lock_super(sb); - if (sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); - - /* .. finally sync the buffers to disk */ - err = sync_blockdev(sb->s_bdev); - if (!ret) - ret = err; - return ret; -} - -long do_fsync(struct file *file, int datasync) -{ - int ret; - int err; - struct address_space *mapping = file->f_mapping; - - if (!file->f_op || !file->f_op->fsync) { - /* Why? We can still call filemap_fdatawrite */ - ret = -EINVAL; - goto out; - } - - ret = filemap_fdatawrite(mapping); - - /* - * We need to protect against concurrent writers, which could cause - * livelocks in fsync_buffers_list(). - */ - mutex_lock(&mapping->host->i_mutex); - err = file->f_op->fsync(file, file->f_dentry, datasync); - if (!ret) - ret = err; - mutex_unlock(&mapping->host->i_mutex); - err = filemap_fdatawait(mapping); - if (!ret) - ret = err; -out: - return ret; -} - -static long __do_fsync(unsigned int fd, int datasync) -{ - struct file *file; - int ret = -EBADF; - - file = fget(fd); - if (file) { - ret = do_fsync(file, datasync); - fput(file); - } - return ret; -} - -asmlinkage long sys_fsync(unsigned int fd) -{ - return __do_fsync(fd, 0); -} - -asmlinkage long sys_fdatasync(unsigned int fd) -{ - return __do_fsync(fd, 1); -} - -/* * Various filesystems appear to want __find_get_block to be non-blocking. * But it's the page lock which protects the buffers. To get around this, * we get exclusion from try_to_free_buffers with the blockdev mapping's @@ -1551,35 +1414,6 @@ static void discard_buffer(struct buffer_head * bh) } /** - * try_to_release_page() - release old fs-specific metadata on a page - * - * @page: the page which the kernel is trying to free - * @gfp_mask: memory allocation flags (and I/O mode) - * - * The address_space is to try to release any data against the page - * (presumably at page->private). If the release was successful, return `1'. - * Otherwise return zero. - * - * The @gfp_mask argument specifies whether I/O may be performed to release - * this page (__GFP_IO), and whether the call may block (__GFP_WAIT). - * - * NOTE: @gfp_mask may go away, and this function may become non-blocking. - */ -int try_to_release_page(struct page *page, gfp_t gfp_mask) -{ - struct address_space * const mapping = page->mapping; - - BUG_ON(!PageLocked(page)); - if (PageWriteback(page)) - return 0; - - if (mapping && mapping->a_ops->releasepage) - return mapping->a_ops->releasepage(page, gfp_mask); - return try_to_free_buffers(page); -} -EXPORT_SYMBOL(try_to_release_page); - -/** * block_invalidatepage - invalidate part of all of a buffer-backed page * * @page: the page which is affected @@ -1630,14 +1464,6 @@ out: } EXPORT_SYMBOL(block_invalidatepage); -void do_invalidatepage(struct page *page, unsigned long offset) -{ - void (*invalidatepage)(struct page *, unsigned long); - invalidatepage = page->mapping->a_ops->invalidatepage ? : - block_invalidatepage; - (*invalidatepage)(page, offset); -} - /* * We attach and possibly dirty the buffers atomically wrt * __set_page_dirty_buffers() via private_lock. try_to_free_buffers diff --git a/fs/char_dev.c b/fs/char_dev.c index 0009346d827..a885f46ca00 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -24,6 +24,7 @@ #ifdef CONFIG_KMOD #include <linux/kmod.h> #endif +#include "internal.h" /* * capabilities for /dev/mem, /dev/kmem and similar directly mappable character @@ -128,13 +129,31 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next) if ((*cp)->major > major || - ((*cp)->major == major && (*cp)->baseminor >= baseminor)) + ((*cp)->major == major && + (((*cp)->baseminor >= baseminor) || + ((*cp)->baseminor + (*cp)->minorct > baseminor)))) break; - if (*cp && (*cp)->major == major && - (*cp)->baseminor < baseminor + minorct) { - ret = -EBUSY; - goto out; + + /* Check for overlapping minor ranges. */ + if (*cp && (*cp)->major == major) { + int old_min = (*cp)->baseminor; + int old_max = (*cp)->baseminor + (*cp)->minorct - 1; + int new_min = baseminor; + int new_max = baseminor + minorct - 1; + + /* New driver overlaps from the left. */ + if (new_max >= old_min && new_max <= old_max) { + ret = -EBUSY; + goto out; + } + + /* New driver overlaps from the right. */ + if (new_min <= old_max && new_min >= old_min) { + ret = -EBUSY; + goto out; + } } + cd->next = *cp; *cp = cd; mutex_unlock(&chrdevs_lock); @@ -165,6 +184,15 @@ __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct) return cd; } +/** + * register_chrdev_region() - register a range of device numbers + * @from: the first in the desired range of device numbers; must include + * the major number. + * @count: the number of consecutive device numbers required + * @name: the name of the device or driver. + * + * Return value is zero on success, a negative error code on failure. + */ int register_chrdev_region(dev_t from, unsigned count, const char *name) { struct char_device_struct *cd; @@ -190,6 +218,17 @@ fail: return PTR_ERR(cd); } +/** + * alloc_chrdev_region() - register a range of char device numbers + * @dev: output parameter for first assigned number + * @baseminor: first of the requested range of minor numbers + * @count: the number of minor numbers required + * @name: the name of the associated device or driver + * + * Allocates a range of char device numbers. The major number will be + * chosen dynamically, and returned (along with the first minor number) + * in @dev. Returns zero or a negative error code. + */ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, const char *name) { @@ -259,6 +298,15 @@ out2: return err; } +/** + * unregister_chrdev_region() - return a range of device numbers + * @from: the first in the range of numbers to unregister + * @count: the number of device numbers to unregister + * + * This function will unregister a range of @count device numbers, + * starting with @from. The caller should normally be the one who + * allocated those numbers in the first place... + */ void unregister_chrdev_region(dev_t from, unsigned count) { dev_t to = from + count; @@ -396,6 +444,16 @@ static int exact_lock(dev_t dev, void *data) return cdev_get(p) ? 0 : -1; } +/** + * cdev_add() - add a char device to the system + * @p: the cdev structure for the device + * @dev: the first device number for which this device is responsible + * @count: the number of consecutive minor numbers corresponding to this + * device + * + * cdev_add() adds the device represented by @p to the system, making it + * live immediately. A negative error code is returned on failure. + */ int cdev_add(struct cdev *p, dev_t dev, unsigned count) { p->dev = dev; @@ -408,6 +466,13 @@ static void cdev_unmap(dev_t dev, unsigned count) kobj_unmap(cdev_map, dev, count); } +/** + * cdev_del() - remove a cdev from the system + * @p: the cdev structure to be removed + * + * cdev_del() removes @p from the system, possibly freeing the structure + * itself. + */ void cdev_del(struct cdev *p) { cdev_unmap(p->dev, p->count); @@ -436,6 +501,11 @@ static struct kobj_type ktype_cdev_dynamic = { .release = cdev_dynamic_release, }; +/** + * cdev_alloc() - allocate a cdev structure + * + * Allocates and returns a cdev structure, or NULL on failure. + */ struct cdev *cdev_alloc(void) { struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL); @@ -447,6 +517,14 @@ struct cdev *cdev_alloc(void) return p; } +/** + * cdev_init() - initialize a cdev structure + * @cdev: the structure to initialize + * @fops: the file_operations for this device + * + * Initializes @cdev, remembering @fops, making it ready to add to the + * system with cdev_add(). + */ void cdev_init(struct cdev *cdev, const struct file_operations *fops) { memset(cdev, 0, sizeof *cdev); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ddb012a6802..976a691c5a6 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -25,7 +25,6 @@ #include <linux/backing-dev.h> #include <linux/stat.h> #include <linux/fcntl.h> -#include <linux/mpage.h> #include <linux/pagemap.h> #include <linux/pagevec.h> #include <linux/smp_lock.h> diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b88147c1dc2..05f874c7441 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -19,7 +19,6 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> -#include <linux/buffer_head.h> #include <linux/stat.h> #include <linux/pagemap.h> #include <asm/div64.h> diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index b0ea6687ab5..e34c7db00f6 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -22,7 +22,6 @@ */ #include <linux/fs.h> -#include <linux/ext2_fs.h> #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" @@ -74,7 +73,7 @@ int cifs_ioctl (struct inode * inode, struct file * filep, } break; #ifdef CONFIG_CIFS_POSIX - case EXT2_IOC_GETFLAGS: + case FS_IOC_GETFLAGS: if(CIFS_UNIX_EXTATTR_CAP & caps) { if (pSMBFile == NULL) break; @@ -82,12 +81,12 @@ int cifs_ioctl (struct inode * inode, struct file * filep, &ExtAttrBits, &ExtAttrMask); if(rc == 0) rc = put_user(ExtAttrBits & - EXT2_FL_USER_VISIBLE, + FS_FL_USER_VISIBLE, (int __user *)arg); } break; - case EXT2_IOC_SETFLAGS: + case FS_IOC_SETFLAGS: if(CIFS_UNIX_EXTATTR_CAP & caps) { if(get_user(ExtAttrBits,(int __user *)arg)) { rc = -EFAULT; diff --git a/fs/compat.c b/fs/compat.c index ce982f6e8c8..122b4e3992b 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -52,11 +52,12 @@ #include <asm/uaccess.h> #include <asm/mmu_context.h> #include <asm/ioctls.h> - -extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); +#include "internal.h" int compat_log = 1; +extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); + int compat_printk(const char *fmt, ...) { va_list ap; @@ -313,9 +314,6 @@ out: #define IOCTL_HASHSIZE 256 static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE]; -extern struct ioctl_trans ioctl_start[]; -extern int ioctl_table_size; - static inline unsigned long ioctl32_hash(unsigned long cmd) { return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE; @@ -838,8 +836,6 @@ static int do_nfs4_super_data_conv(void *raw_data) return 0; } -extern int copy_mount_options (const void __user *, unsigned long *); - #define SMBFS_NAME "smbfs" #define NCPFS_NAME "ncpfs" #define NFS4_NAME "nfs4" diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 4063a939697..64b34533ede 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -40,15 +40,11 @@ #include <linux/if_pppox.h> #include <linux/mtio.h> #include <linux/cdrom.h> -#include <linux/loop.h> #include <linux/auto_fs.h> #include <linux/auto_fs4.h> #include <linux/tty.h> #include <linux/vt_kern.h> #include <linux/fb.h> -#include <linux/ext2_fs.h> -#include <linux/ext3_jbd.h> -#include <linux/ext3_fs.h> #include <linux/videodev.h> #include <linux/netdevice.h> #include <linux/raw.h> @@ -60,7 +56,6 @@ #include <linux/pci.h> #include <linux/module.h> #include <linux/serial.h> -#include <linux/reiserfs_fs.h> #include <linux/if_tun.h> #include <linux/ctype.h> #include <linux/ioctl32.h> @@ -113,7 +108,6 @@ #include <linux/nbd.h> #include <linux/random.h> #include <linux/filter.h> -#include <linux/msdos_fs.h> #include <linux/pktcdvd.h> #include <linux/hiddev.h> @@ -124,21 +118,6 @@ #include <linux/dvb/video.h> #include <linux/lp.h> -/* Aiee. Someone does not find a difference between int and long */ -#define EXT2_IOC32_GETFLAGS _IOR('f', 1, int) -#define EXT2_IOC32_SETFLAGS _IOW('f', 2, int) -#define EXT3_IOC32_GETVERSION _IOR('f', 3, int) -#define EXT3_IOC32_SETVERSION _IOW('f', 4, int) -#define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int) -#define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int) -#define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) -#ifdef CONFIG_JBD_DEBUG -#define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) -#endif - -#define EXT2_IOC32_GETVERSION _IOR('v', 1, int) -#define EXT2_IOC32_SETVERSION _IOW('v', 2, int) - static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *f) { @@ -176,34 +155,6 @@ static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg) return err; } -static int do_ext2_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - /* These are just misnamed, they actually get/put from/to user an int */ - switch (cmd) { - case EXT2_IOC32_GETFLAGS: cmd = EXT2_IOC_GETFLAGS; break; - case EXT2_IOC32_SETFLAGS: cmd = EXT2_IOC_SETFLAGS; break; - case EXT2_IOC32_GETVERSION: cmd = EXT2_IOC_GETVERSION; break; - case EXT2_IOC32_SETVERSION: cmd = EXT2_IOC_SETVERSION; break; - } - return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg)); -} - -static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - /* These are just misnamed, they actually get/put from/to user an int */ - switch (cmd) { - case EXT3_IOC32_GETVERSION: cmd = EXT3_IOC_GETVERSION; break; - case EXT3_IOC32_SETVERSION: cmd = EXT3_IOC_SETVERSION; break; - case EXT3_IOC32_GETRSVSZ: cmd = EXT3_IOC_GETRSVSZ; break; - case EXT3_IOC32_SETRSVSZ: cmd = EXT3_IOC_SETRSVSZ; break; - case EXT3_IOC32_GROUP_EXTEND: cmd = EXT3_IOC_GROUP_EXTEND; break; -#ifdef CONFIG_JBD_DEBUG - case EXT3_IOC32_WAIT_FOR_READONLY: cmd = EXT3_IOC_WAIT_FOR_READONLY; break; -#endif - } - return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg)); -} - struct compat_video_event { int32_t type; compat_time_t timestamp; @@ -694,6 +645,7 @@ out: } #endif +#ifdef CONFIG_BLOCK struct hd_geometry32 { unsigned char heads; unsigned char sectors; @@ -918,6 +870,7 @@ static int sg_grt_trans(unsigned int fd, unsigned int cmd, unsigned long arg) } return err; } +#endif /* CONFIG_BLOCK */ struct sock_fprog32 { unsigned short len; @@ -1041,6 +994,7 @@ static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) } +#ifdef CONFIG_BLOCK struct mtget32 { compat_long_t mt_type; compat_long_t mt_resid; @@ -1213,73 +1167,7 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar return err; } - -struct loop_info32 { - compat_int_t lo_number; /* ioctl r/o */ - compat_dev_t lo_device; /* ioctl r/o */ - compat_ulong_t lo_inode; /* ioctl r/o */ - compat_dev_t lo_rdevice; /* ioctl r/o */ - compat_int_t lo_offset; - compat_int_t lo_encrypt_type; - compat_int_t lo_encrypt_key_size; /* ioctl w/o */ - compat_int_t lo_flags; /* ioctl r/o */ - char lo_name[LO_NAME_SIZE]; - unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ - compat_ulong_t lo_init[2]; - char reserved[4]; -}; - -static int loop_status(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - mm_segment_t old_fs = get_fs(); - struct loop_info l; - struct loop_info32 __user *ul; - int err = -EINVAL; - - ul = compat_ptr(arg); - switch(cmd) { - case LOOP_SET_STATUS: - err = get_user(l.lo_number, &ul->lo_number); - err |= __get_user(l.lo_device, &ul->lo_device); - err |= __get_user(l.lo_inode, &ul->lo_inode); - err |= __get_user(l.lo_rdevice, &ul->lo_rdevice); - err |= __copy_from_user(&l.lo_offset, &ul->lo_offset, - 8 + (unsigned long)l.lo_init - (unsigned long)&l.lo_offset); - if (err) { - err = -EFAULT; - } else { - set_fs (KERNEL_DS); - err = sys_ioctl (fd, cmd, (unsigned long)&l); - set_fs (old_fs); - } - break; - case LOOP_GET_STATUS: - set_fs (KERNEL_DS); - err = sys_ioctl (fd, cmd, (unsigned long)&l); - set_fs (old_fs); - if (!err) { - err = put_user(l.lo_number, &ul->lo_number); - err |= __put_user(l.lo_device, &ul->lo_device); - err |= __put_user(l.lo_inode, &ul->lo_inode); - err |= __put_user(l.lo_rdevice, &ul->lo_rdevice); - err |= __copy_to_user(&ul->lo_offset, &l.lo_offset, - (unsigned long)l.lo_init - (unsigned long)&l.lo_offset); - if (err) - err = -EFAULT; - } - break; - default: { - static int count; - if (++count <= 20) - printk("%s: Unknown loop ioctl cmd, fd(%d) " - "cmd(%08x) arg(%08lx)\n", - __FUNCTION__, fd, cmd, arg); - } - } - return err; -} - -extern int tty_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg); +#endif /* CONFIG_BLOCK */ #ifdef CONFIG_VT @@ -1607,6 +1495,7 @@ ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg) return -EINVAL; } +#ifdef CONFIG_BLOCK static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg) { /* The mkswap binary hard codes it to Intel value :-((( */ @@ -1641,12 +1530,14 @@ static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar return sys_ioctl(fd, cmd, (unsigned long)a); } +#endif static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg) { return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg); } +#ifdef CONFIG_BLOCK /* Fix sizeof(sizeof()) breakage */ #define BLKBSZGET_32 _IOR(0x12,112,int) #define BLKBSZSET_32 _IOW(0x12,113,int) @@ -1667,6 +1558,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd, { return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg)); } +#endif /* Bluetooth ioctls */ #define HCIUARTSETPROTO _IOW('U', 200, int) @@ -1687,6 +1579,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd, #define HIDPGETCONNLIST _IOR('H', 210, int) #define HIDPGETCONNINFO _IOR('H', 211, int) +#ifdef CONFIG_BLOCK struct floppy_struct32 { compat_uint_t size; compat_uint_t sect; @@ -2011,6 +1904,7 @@ out: kfree(karg); return err; } +#endif struct mtd_oob_buf32 { u_int32_t start; @@ -2052,61 +1946,7 @@ static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg) return err; } -#define VFAT_IOCTL_READDIR_BOTH32 _IOR('r', 1, struct compat_dirent[2]) -#define VFAT_IOCTL_READDIR_SHORT32 _IOR('r', 2, struct compat_dirent[2]) - -static long -put_dirent32 (struct dirent *d, struct compat_dirent __user *d32) -{ - if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent))) - return -EFAULT; - - __put_user(d->d_ino, &d32->d_ino); - __put_user(d->d_off, &d32->d_off); - __put_user(d->d_reclen, &d32->d_reclen); - if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen)) - return -EFAULT; - - return 0; -} - -static int vfat_ioctl32(unsigned fd, unsigned cmd, unsigned long arg) -{ - struct compat_dirent __user *p = compat_ptr(arg); - int ret; - mm_segment_t oldfs = get_fs(); - struct dirent d[2]; - - switch(cmd) - { - case VFAT_IOCTL_READDIR_BOTH32: - cmd = VFAT_IOCTL_READDIR_BOTH; - break; - case VFAT_IOCTL_READDIR_SHORT32: - cmd = VFAT_IOCTL_READDIR_SHORT; - break; - } - - set_fs(KERNEL_DS); - ret = sys_ioctl(fd,cmd,(unsigned long)&d); - set_fs(oldfs); - if (ret >= 0) { - ret |= put_dirent32(&d[0], p); - ret |= put_dirent32(&d[1], p + 1); - } - return ret; -} - -#define REISERFS_IOC_UNPACK32 _IOW(0xCD,1,int) - -static int reiserfs_ioctl32(unsigned fd, unsigned cmd, unsigned long ptr) -{ - if (cmd == REISERFS_IOC_UNPACK32) - cmd = REISERFS_IOC_UNPACK; - - return sys_ioctl(fd,cmd,ptr); -} - +#ifdef CONFIG_BLOCK struct raw32_config_request { compat_int_t raw_minor; @@ -2171,6 +2011,7 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg) } return ret; } +#endif /* CONFIG_BLOCK */ struct serial_struct32 { compat_int_t type; @@ -2777,6 +2618,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc) HANDLE_IOCTL(SIOCRTMSG, ret_einval) HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp) #endif +#ifdef CONFIG_BLOCK HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo) HANDLE_IOCTL(BLKRAGET, w_long) HANDLE_IOCTL(BLKGETSIZE, w_long) @@ -2802,16 +2644,17 @@ HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans) HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans) HANDLE_IOCTL(SG_IO,sg_ioctl_trans) HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans) +#endif HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans) HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans) HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans) HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans) +#ifdef CONFIG_BLOCK HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans) HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans) HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans) HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans) -HANDLE_IOCTL(LOOP_SET_STATUS, loop_status) -HANDLE_IOCTL(LOOP_GET_STATUS, loop_status) +#endif #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int) HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout) #ifdef CONFIG_VT @@ -2821,19 +2664,6 @@ HANDLE_IOCTL(PIO_UNIMAP, do_unimap_ioctl) HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl) HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl) #endif -HANDLE_IOCTL(EXT2_IOC32_GETFLAGS, do_ext2_ioctl) -HANDLE_IOCTL(EXT2_IOC32_SETFLAGS, do_ext2_ioctl) -HANDLE_IOCTL(EXT2_IOC32_GETVERSION, do_ext2_ioctl) -HANDLE_IOCTL(EXT2_IOC32_SETVERSION, do_ext2_ioctl) -HANDLE_IOCTL(EXT3_IOC32_GETVERSION, do_ext3_ioctl) -HANDLE_IOCTL(EXT3_IOC32_SETVERSION, do_ext3_ioctl) -HANDLE_IOCTL(EXT3_IOC32_GETRSVSZ, do_ext3_ioctl) -HANDLE_IOCTL(EXT3_IOC32_SETRSVSZ, do_ext3_ioctl) -HANDLE_IOCTL(EXT3_IOC32_GROUP_EXTEND, do_ext3_ioctl) -COMPATIBLE_IOCTL(EXT3_IOC_GROUP_ADD) -#ifdef CONFIG_JBD_DEBUG -HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl) -#endif /* One SMB ioctl needs translations. */ #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t) HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid) @@ -2863,16 +2693,14 @@ HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl) HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl) HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) /* block stuff */ +#ifdef CONFIG_BLOCK HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget) HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset) HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64) -/* vfat */ -HANDLE_IOCTL(VFAT_IOCTL_READDIR_BOTH32, vfat_ioctl32) -HANDLE_IOCTL(VFAT_IOCTL_READDIR_SHORT32, vfat_ioctl32) -HANDLE_IOCTL(REISERFS_IOC_UNPACK32, reiserfs_ioctl32) /* Raw devices */ HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) +#endif /* Serial */ HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl) HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl) diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index ad96b699071..a624c3ec818 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -543,8 +543,15 @@ static struct file_system_type cramfs_fs_type = { static int __init init_cramfs_fs(void) { - cramfs_uncompress_init(); - return register_filesystem(&cramfs_fs_type); + int rv; + + rv = cramfs_uncompress_init(); + if (rv < 0) + return rv; + rv = register_filesystem(&cramfs_fs_type); + if (rv < 0) + cramfs_uncompress_exit(); + return rv; } static void __exit exit_cramfs_fs(void) diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c index 8def89f2c43..fc3ccb74626 100644 --- a/fs/cramfs/uncompress.c +++ b/fs/cramfs/uncompress.c @@ -68,11 +68,10 @@ int cramfs_uncompress_init(void) return 0; } -int cramfs_uncompress_exit(void) +void cramfs_uncompress_exit(void) { if (!--initialized) { zlib_inflateEnd(&stream); vfree(stream.workspace); } - return 0; } diff --git a/fs/dcache.c b/fs/dcache.c index 17b392a2049..fc2faa44f8d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -32,6 +32,7 @@ #include <linux/seqlock.h> #include <linux/swap.h> #include <linux/bootmem.h> +#include "internal.h" int sysctl_vfs_cache_pressure __read_mostly = 100; @@ -1877,9 +1878,6 @@ kmem_cache_t *filp_cachep __read_mostly; EXPORT_SYMBOL(d_genocide); -extern void bdev_cache_init(void); -extern void chrdev_init(void); - void __init vfs_caches_init_early(void) { dcache_init_early(); diff --git a/fs/dquot.c b/fs/dquot.c index 0122a279106..9af789567e5 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -834,6 +834,9 @@ static void print_warning(struct dquot *dquot, const char warntype) if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags))) return; + mutex_lock(&tty_mutex); + if (!current->signal->tty) + goto out_lock; tty_write_message(current->signal->tty, dquot->dq_sb->s_id); if (warntype == ISOFTWARN || warntype == BSOFTWARN) tty_write_message(current->signal->tty, ": warning, "); @@ -861,6 +864,8 @@ static void print_warning(struct dquot *dquot, const char warntype) break; } tty_write_message(current->signal->tty, msg); +out_lock: + mutex_unlock(&tty_mutex); } static inline void flush_warnings(struct dquot **dquots, char *warntype) diff --git a/fs/exec.c b/fs/exec.c index 97df6e0aeae..a8efe35176b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -898,8 +898,7 @@ int flush_old_exec(struct linux_binprm * bprm) return 0; mmap_failed: - put_files_struct(current->files); - current->files = files; + reset_files_struct(current, files); out: return retval; } diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 92ea8265d7d..3e7a84a1e50 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -661,5 +661,8 @@ const struct file_operations ext2_dir_operations = { .read = generic_read_dir, .readdir = ext2_readdir, .ioctl = ext2_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext2_compat_ioctl, +#endif .fsync = ext2_sync_file, }; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index e65a019fc7a..c19ac153f56 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -137,6 +137,7 @@ extern void ext2_set_inode_flags(struct inode *inode); /* ioctl.c */ extern int ext2_ioctl (struct inode *, struct file *, unsigned int, unsigned long); +extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long); /* namei.c */ struct dentry *ext2_get_parent(struct dentry *child); diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 23e2c7ccec1..e8bbed9dd26 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -46,6 +46,9 @@ const struct file_operations ext2_file_operations = { .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .ioctl = ext2_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext2_compat_ioctl, +#endif .mmap = generic_file_mmap, .open = generic_file_open, .release = ext2_release_file, @@ -63,6 +66,9 @@ const struct file_operations ext2_xip_file_operations = { .read = xip_file_read, .write = xip_file_write, .ioctl = ext2_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext2_compat_ioctl, +#endif .mmap = xip_file_mmap, .open = generic_file_open, .release = ext2_release_file, diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 3ca9afdf713..1dfba77eab1 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -11,6 +11,8 @@ #include <linux/capability.h> #include <linux/time.h> #include <linux/sched.h> +#include <linux/compat.h> +#include <linux/smp_lock.h> #include <asm/current.h> #include <asm/uaccess.h> @@ -80,3 +82,33 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, return -ENOTTY; } } + +#ifdef CONFIG_COMPAT +long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file->f_dentry->d_inode; + int ret; + + /* These are just misnamed, they actually get/put from/to user an int */ + switch (cmd) { + case EXT2_IOC32_GETFLAGS: + cmd = EXT2_IOC_GETFLAGS; + break; + case EXT2_IOC32_SETFLAGS: + cmd = EXT2_IOC_SETFLAGS; + break; + case EXT2_IOC32_GETVERSION: + cmd = EXT2_IOC_GETVERSION; + break; + case EXT2_IOC32_SETVERSION: + cmd = EXT2_IOC_SETVERSION; + break; + default: + return -ENOIOCTLCMD; + } + lock_kernel(); + ret = ext2_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; +} +#endif diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 429acbb4e06..d0b54f30b91 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -44,6 +44,9 @@ const struct file_operations ext3_dir_operations = { .read = generic_read_dir, .readdir = ext3_readdir, /* we take BKL. needed?*/ .ioctl = ext3_ioctl, /* BKL held */ +#ifdef CONFIG_COMPAT + .compat_ioctl = ext3_compat_ioctl, +#endif .fsync = ext3_sync_file, /* BKL held */ #ifdef CONFIG_EXT3_INDEX .release = ext3_release_dir, diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 994efd189f4..74ff20f9d09 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -114,6 +114,9 @@ const struct file_operations ext3_file_operations = { .readv = generic_file_readv, .writev = generic_file_writev, .ioctl = ext3_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext3_compat_ioctl, +#endif .mmap = generic_file_mmap, .open = generic_file_open, .release = ext3_release_file, diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index dcf4f1dd108..03ba5bcab18 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -36,6 +36,7 @@ #include <linux/writeback.h> #include <linux/mpage.h> #include <linux/uio.h> +#include <linux/bio.h> #include "xattr.h" #include "acl.h" @@ -1073,7 +1074,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, return bh; if (buffer_uptodate(bh)) return bh; - ll_rw_block(READ, 1, &bh); + ll_rw_block(READ_META, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -2540,7 +2541,7 @@ make_io: */ get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); + submit_bh(READ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { ext3_error(inode->i_sb, "ext3_get_inode_loc", diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 3a6b012d120..12daa686957 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -13,9 +13,10 @@ #include <linux/ext3_fs.h> #include <linux/ext3_jbd.h> #include <linux/time.h> +#include <linux/compat.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> - int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned long arg) { @@ -252,3 +253,55 @@ flags_err: return -ENOTTY; } } + +#ifdef CONFIG_COMPAT +long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file->f_dentry->d_inode; + int ret; + + /* These are just misnamed, they actually get/put from/to user an int */ + switch (cmd) { + case EXT3_IOC32_GETFLAGS: + cmd = EXT3_IOC_GETFLAGS; + break; + case EXT3_IOC32_SETFLAGS: + cmd = EXT3_IOC_SETFLAGS; + break; + case EXT3_IOC32_GETVERSION: + cmd = EXT3_IOC_GETVERSION; + break; + case EXT3_IOC32_SETVERSION: + cmd = EXT3_IOC_SETVERSION; + break; + case EXT3_IOC32_GROUP_EXTEND: + cmd = EXT3_IOC_GROUP_EXTEND; + break; + case EXT3_IOC32_GETVERSION_OLD: + cmd = EXT3_IOC_GETVERSION_OLD; + break; + case EXT3_IOC32_SETVERSION_OLD: + cmd = EXT3_IOC_SETVERSION_OLD; + break; +#ifdef CONFIG_JBD_DEBUG + case EXT3_IOC32_WAIT_FOR_READONLY: + cmd = EXT3_IOC_WAIT_FOR_READONLY; + break; +#endif + case EXT3_IOC32_GETRSVSZ: + cmd = EXT3_IOC_GETRSVSZ; + break; + case EXT3_IOC32_SETRSVSZ: + cmd = EXT3_IOC_SETRSVSZ; + break; + case EXT3_IOC_GROUP_ADD: + break; + default: + return -ENOIOCTLCMD; + } + lock_kernel(); + ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; +} +#endif diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 85d132c37ee..235e77b52ea 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -35,6 +35,7 @@ #include <linux/string.h> #include <linux/quotaops.h> #include <linux/buffer_head.h> +#include <linux/bio.h> #include <linux/smp_lock.h> #include "namei.h" @@ -870,7 +871,7 @@ restart: bh = ext3_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; if (bh) - ll_rw_block(READ, 1, &bh); + ll_rw_block(READ_META, 1, &bh); } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 698b85bb1dd..3e50a416628 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -20,6 +20,7 @@ #include <linux/dirent.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> +#include <linux/compat.h> #include <asm/uaccess.h> static inline loff_t fat_make_i_pos(struct super_block *sb, @@ -741,10 +742,65 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp, return ret; } +#ifdef CONFIG_COMPAT +#define VFAT_IOCTL_READDIR_BOTH32 _IOR('r', 1, struct compat_dirent[2]) +#define VFAT_IOCTL_READDIR_SHORT32 _IOR('r', 2, struct compat_dirent[2]) + +static long fat_compat_put_dirent32(struct dirent *d, + struct compat_dirent __user *d32) +{ + if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent))) + return -EFAULT; + + __put_user(d->d_ino, &d32->d_ino); + __put_user(d->d_off, &d32->d_off); + __put_user(d->d_reclen, &d32->d_reclen); + if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen)) + return -EFAULT; + + return 0; +} + +static long fat_compat_dir_ioctl(struct file *file, unsigned cmd, + unsigned long arg) +{ + struct compat_dirent __user *p = compat_ptr(arg); + int ret; + mm_segment_t oldfs = get_fs(); + struct dirent d[2]; + + switch (cmd) { + case VFAT_IOCTL_READDIR_BOTH32: + cmd = VFAT_IOCTL_READDIR_BOTH; + break; + case VFAT_IOCTL_READDIR_SHORT32: + cmd = VFAT_IOCTL_READDIR_SHORT; + break; + default: + return -ENOIOCTLCMD; + } + + set_fs(KERNEL_DS); + lock_kernel(); + ret = fat_dir_ioctl(file->f_dentry->d_inode, file, + cmd, (unsigned long) &d); + unlock_kernel(); + set_fs(oldfs); + if (ret >= 0) { + ret |= fat_compat_put_dirent32(&d[0], p); + ret |= fat_compat_put_dirent32(&d[1], p + 1); + } + return ret; +} +#endif /* CONFIG_COMPAT */ + const struct file_operations fat_dir_operations = { .read = generic_read_dir, .readdir = fat_readdir, .ioctl = fat_dir_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = fat_compat_dir_ioctl, +#endif .fsync = file_fsync, }; diff --git a/fs/fat/file.c b/fs/fat/file.c index 1ee25232e6a..d50fc47169c 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -13,6 +13,7 @@ #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/writeback.h> +#include <linux/blkdev.h> int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) @@ -112,6 +113,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, } } +static int fat_file_release(struct inode *inode, struct file *filp) +{ + if ((filp->f_mode & FMODE_WRITE) && + MSDOS_SB(inode->i_sb)->options.flush) { + fat_flush_inodes(inode->i_sb, inode, NULL); + blk_congestion_wait(WRITE, HZ/10); + } + return 0; +} + const struct file_operations fat_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -121,6 +132,7 @@ const struct file_operations fat_file_operations = { .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, + .release = fat_file_release, .ioctl = fat_generic_ioctl, .fsync = file_fsync, .sendfile = generic_file_sendfile, @@ -289,6 +301,7 @@ void fat_truncate(struct inode *inode) lock_kernel(); fat_free(inode, nr_clusters); unlock_kernel(); + fat_flush_inodes(inode->i_sb, inode, NULL); } struct inode_operations fat_file_inode_operations = { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index ab96ae82375..045738032a8 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -24,6 +24,7 @@ #include <linux/vfs.h> #include <linux/parser.h> #include <linux/uio.h> +#include <linux/writeback.h> #include <asm/unaligned.h> #ifndef CONFIG_FAT_DEFAULT_IOCHARSET @@ -853,7 +854,7 @@ enum { Opt_charset, Opt_shortname_lower, Opt_shortname_win95, Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, - Opt_obsolate, Opt_err, + Opt_obsolate, Opt_flush, Opt_err, }; static match_table_t fat_tokens = { @@ -885,7 +886,8 @@ static match_table_t fat_tokens = { {Opt_obsolate, "cvf_format=%20s"}, {Opt_obsolate, "cvf_options=%100s"}, {Opt_obsolate, "posix"}, - {Opt_err, NULL} + {Opt_flush, "flush"}, + {Opt_err, NULL}, }; static match_table_t msdos_tokens = { {Opt_nodots, "nodots"}, @@ -1026,6 +1028,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, return 0; opts->codepage = option; break; + case Opt_flush: + opts->flush = 1; + break; /* msdos specific */ case Opt_dots: @@ -1425,6 +1430,56 @@ out_fail: EXPORT_SYMBOL_GPL(fat_fill_super); +/* + * helper function for fat_flush_inodes. This writes both the inode + * and the file data blocks, waiting for in flight data blocks before + * the start of the call. It does not wait for any io started + * during the call + */ +static int writeback_inode(struct inode *inode) +{ + + int ret; + struct address_space *mapping = inode->i_mapping; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = 0, + }; + /* if we used WB_SYNC_ALL, sync_inode waits for the io for the + * inode to finish. So WB_SYNC_NONE is sent down to sync_inode + * and filemap_fdatawrite is used for the data blocks + */ + ret = sync_inode(inode, &wbc); + if (!ret) + ret = filemap_fdatawrite(mapping); + return ret; +} + +/* + * write data and metadata corresponding to i1 and i2. The io is + * started but we do not wait for any of it to finish. + * + * filemap_flush is used for the block device, so if there is a dirty + * page for a block already in flight, we will not wait and start the + * io over again + */ +int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2) +{ + int ret = 0; + if (!MSDOS_SB(sb)->options.flush) + return 0; + if (i1) + ret = writeback_inode(i1); + if (!ret && i2) + ret = writeback_inode(i2); + if (!ret && sb) { + struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; + ret = filemap_flush(mapping); + } + return ret; +} +EXPORT_SYMBOL_GPL(fat_flush_inodes); + static int __init init_fat_fs(void) { int err; diff --git a/fs/file.c b/fs/file.c index 8d3bfca7714..8e81775c5dc 100644 --- a/fs/file.c +++ b/fs/file.c @@ -288,71 +288,63 @@ out: } /* - * Expands the file descriptor table - it will allocate a new fdtable and - * both fd array and fdset. It is expected to be called with the - * files_lock held. + * Expand the file descriptor table. + * This function will allocate a new fdtable and both fd array and fdset, of + * the given size. + * Return <0 error code on error; 1 on successful completion. + * The files->file_lock should be held on entry, and will be held on exit. */ static int expand_fdtable(struct files_struct *files, int nr) __releases(files->file_lock) __acquires(files->file_lock) { - int error = 0; - struct fdtable *fdt; - struct fdtable *nfdt = NULL; + struct fdtable *new_fdt, *cur_fdt; spin_unlock(&files->file_lock); - nfdt = alloc_fdtable(nr); - if (!nfdt) { - error = -ENOMEM; - spin_lock(&files->file_lock); - goto out; - } - + new_fdt = alloc_fdtable(nr); spin_lock(&files->file_lock); - fdt = files_fdtable(files); + if (!new_fdt) + return -ENOMEM; /* - * Check again since another task may have expanded the - * fd table while we dropped the lock + * Check again since another task may have expanded the fd table while + * we dropped the lock */ - if (nr >= fdt->max_fds || nr >= fdt->max_fdset) { - copy_fdtable(nfdt, fdt); + cur_fdt = files_fdtable(files); + if (nr >= cur_fdt->max_fds || nr >= cur_fdt->max_fdset) { + /* Continue as planned */ + copy_fdtable(new_fdt, cur_fdt); + rcu_assign_pointer(files->fdt, new_fdt); + free_fdtable(cur_fdt); } else { - /* Somebody expanded while we dropped file_lock */ - spin_unlock(&files->file_lock); - __free_fdtable(nfdt); - spin_lock(&files->file_lock); - goto out; + /* Somebody else expanded, so undo our attempt */ + __free_fdtable(new_fdt); } - rcu_assign_pointer(files->fdt, nfdt); - free_fdtable(fdt); -out: - return error; + return 1; } /* * Expand files. - * Return <0 on error; 0 nothing done; 1 files expanded, we may have blocked. - * Should be called with the files->file_lock spinlock held for write. + * This function will expand the file structures, if the requested size exceeds + * the current capacity and there is room for expansion. + * Return <0 error code on error; 0 when nothing done; 1 when files were + * expanded and execution may have blocked. + * The files->file_lock should be held on entry, and will be held on exit. */ int expand_files(struct files_struct *files, int nr) { - int err, expand = 0; struct fdtable *fdt; fdt = files_fdtable(files); - if (nr >= fdt->max_fdset || nr >= fdt->max_fds) { - if (fdt->max_fdset >= NR_OPEN || - fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) { - err = -EMFILE; - goto out; - } - expand = 1; - if ((err = expand_fdtable(files, nr))) - goto out; - } - err = expand; -out: - return err; + /* Do we need to expand? */ + if (nr < fdt->max_fdset && nr < fdt->max_fds) + return 0; + /* Can we expand? */ + if (fdt->max_fdset >= NR_OPEN || fdt->max_fds >= NR_OPEN || + nr >= NR_OPEN) + return -EMFILE; + + /* All good, so we try */ + return expand_fdtable(files, nr); } static void __devinit fdtable_defer_list_init(int cpu) diff --git a/fs/filesystems.c b/fs/filesystems.c index 9f1072836c8..e3fa77c6ed5 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -69,8 +69,6 @@ int register_filesystem(struct file_system_type * fs) int res = 0; struct file_system_type ** p; - if (!fs) - return -EINVAL; if (fs->next) return -EBUSY; INIT_LIST_HEAD(&fs->fs_supers); diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index b74b791fc23..ac28b0835ff 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -260,12 +260,17 @@ static struct file_system_type vxfs_fs_type = { static int __init vxfs_init(void) { + int rv; + vxfs_inode_cachep = kmem_cache_create("vxfs_inode", sizeof(struct vxfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL); - if (vxfs_inode_cachep) - return register_filesystem(&vxfs_fs_type); - return -ENOMEM; + if (!vxfs_inode_cachep) + return -ENOMEM; + rv = register_filesystem(&vxfs_fs_type); + if (rv < 0) + kmem_cache_destroy(vxfs_inode_cachep); + return rv; } static void __exit diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 892643dc9af..c403b66ec83 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -22,8 +22,7 @@ #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/buffer_head.h> - -extern struct super_block *blockdev_superblock; +#include "internal.h" /** * __mark_inode_dirty - internal function @@ -320,7 +319,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) if (!bdi_cap_writeback_dirty(bdi)) { list_move(&inode->i_list, &sb->s_dirty); - if (sb == blockdev_superblock) { + if (sb_is_blkdev_sb(sb)) { /* * Dirty memory-backed blockdev: the ramdisk * driver does this. Skip just this inode @@ -337,14 +336,14 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; - if (sb != blockdev_superblock) + if (!sb_is_blkdev_sb(sb)) break; /* Skip a congested fs */ list_move(&inode->i_list, &sb->s_dirty); continue; /* Skip a congested blockdev */ } if (wbc->bdi && bdi != wbc->bdi) { - if (sb != blockdev_superblock) + if (!sb_is_blkdev_sb(sb)) break; /* fs has the wrong queue */ list_move(&inode->i_list, &sb->s_dirty); continue; /* blockdev has wrong queue */ diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 1e2006caf15..4fc557c40cc 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -212,6 +212,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) * Called with fc->lock, unlocks it */ static void request_end(struct fuse_conn *fc, struct fuse_req *req) + __releases(fc->lock) { void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; req->end = NULL; @@ -640,6 +641,7 @@ static void request_wait(struct fuse_conn *fc) */ static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req, const struct iovec *iov, unsigned long nr_segs) + __releases(fc->lock) { struct fuse_copy_state cs; struct fuse_in_header ih; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 409ce6a7cca..f85b2a282f1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -776,7 +776,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO)) return -EACCES; - if (nd && (nd->flags & LOOKUP_ACCESS)) + if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR))) return fuse_access(inode, mask); return 0; } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index cb7cadb0b79..7d0a9aee01f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -251,6 +251,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) memset(&outarg, 0, sizeof(outarg)); req->in.numargs = 0; req->in.h.opcode = FUSE_STATFS; + req->in.h.nodeid = get_node_id(dentry->d_inode); req->out.numargs = 1; req->out.args[0].size = fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg); diff --git a/fs/generic_acl.c b/fs/generic_acl.c new file mode 100644 index 00000000000..9ccb7894717 --- /dev/null +++ b/fs/generic_acl.c @@ -0,0 +1,197 @@ +/* + * fs/generic_acl.c + * + * (C) 2005 Andreas Gruenbacher <agruen@suse.de> + * + * This file is released under the GPL. + */ + +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/generic_acl.h> + +/** + * generic_acl_list - Generic xattr_handler->list() operation + * @ops: Filesystem specific getacl and setacl callbacks + */ +size_t +generic_acl_list(struct inode *inode, struct generic_acl_operations *ops, + int type, char *list, size_t list_size) +{ + struct posix_acl *acl; + const char *name; + size_t size; + + acl = ops->getacl(inode, type); + if (!acl) + return 0; + posix_acl_release(acl); + + switch(type) { + case ACL_TYPE_ACCESS: + name = POSIX_ACL_XATTR_ACCESS; + break; + + case ACL_TYPE_DEFAULT: + name = POSIX_ACL_XATTR_DEFAULT; + break; + + default: + return 0; + } + size = strlen(name) + 1; + if (list && size <= list_size) + memcpy(list, name, size); + return size; +} + +/** + * generic_acl_get - Generic xattr_handler->get() operation + * @ops: Filesystem specific getacl and setacl callbacks + */ +int +generic_acl_get(struct inode *inode, struct generic_acl_operations *ops, + int type, void *buffer, size_t size) +{ + struct posix_acl *acl; + int error; + + acl = ops->getacl(inode, type); + if (!acl) + return -ENODATA; + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +/** + * generic_acl_set - Generic xattr_handler->set() operation + * @ops: Filesystem specific getacl and setacl callbacks + */ +int +generic_acl_set(struct inode *inode, struct generic_acl_operations *ops, + int type, const void *value, size_t size) +{ + struct posix_acl *acl = NULL; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) + return -EPERM; + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (acl) { + mode_t mode; + + error = posix_acl_valid(acl); + if (error) + goto failed; + switch(type) { + case ACL_TYPE_ACCESS: + mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + if (error < 0) + goto failed; + inode->i_mode = mode; + if (error == 0) { + posix_acl_release(acl); + acl = NULL; + } + break; + + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) { + error = -EINVAL; + goto failed; + } + break; + } + } + ops->setacl(inode, type, acl); + error = 0; +failed: + posix_acl_release(acl); + return error; +} + +/** + * generic_acl_init - Take care of acl inheritance at @inode create time + * @ops: Filesystem specific getacl and setacl callbacks + * + * Files created inside a directory with a default ACL inherit the + * directory's default ACL. + */ +int +generic_acl_init(struct inode *inode, struct inode *dir, + struct generic_acl_operations *ops) +{ + struct posix_acl *acl = NULL; + mode_t mode = inode->i_mode; + int error; + + inode->i_mode = mode & ~current->fs->umask; + if (!S_ISLNK(inode->i_mode)) + acl = ops->getacl(dir, ACL_TYPE_DEFAULT); + if (acl) { + struct posix_acl *clone; + + if (S_ISDIR(inode->i_mode)) { + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto cleanup; + ops->setacl(inode, ACL_TYPE_DEFAULT, clone); + posix_acl_release(clone); + } + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto cleanup; + error = posix_acl_create_masq(clone, &mode); + if (error >= 0) { + inode->i_mode = mode; + if (error > 0) + ops->setacl(inode, ACL_TYPE_ACCESS, clone); + } + posix_acl_release(clone); + } + error = 0; + +cleanup: + posix_acl_release(acl); + return error; +} + +/** + * generic_acl_chmod - change the access acl of @inode upon chmod() + * @ops: FIlesystem specific getacl and setacl callbacks + * + * A chmod also changes the permissions of the owner, group/mask, and + * other ACL entries. + */ +int +generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops) +{ + struct posix_acl *acl, *clone; + int error = 0; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + acl = ops->getacl(inode, ACL_TYPE_ACCESS); + if (acl) { + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + error = posix_acl_chmod_masq(clone, inode->i_mode); + if (!error) + ops->setacl(inode, ACL_TYPE_ACCESS, clone); + posix_acl_release(clone); + } + return error; +} diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 8a1ca5ef7ad..3915635b447 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -246,12 +246,8 @@ struct hfsplus_readdir_data { /* ext2 ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) to support * chattr/lsattr */ -#define HFSPLUS_IOC_EXT2_GETFLAGS _IOR('f', 1, long) -#define HFSPLUS_IOC_EXT2_SETFLAGS _IOW('f', 2, long) - -#define EXT2_FLAG_IMMUTABLE 0x00000010 /* Immutable file */ -#define EXT2_FLAG_APPEND 0x00000020 /* writes to file may only append */ -#define EXT2_FLAG_NODUMP 0x00000040 /* do not dump file */ +#define HFSPLUS_IOC_EXT2_GETFLAGS FS_IOC_GETFLAGS +#define HFSPLUS_IOC_EXT2_SETFLAGS FS_IOC_SETFLAGS /* diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index 13cf848ac83..79fd10402ea 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -28,11 +28,11 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, case HFSPLUS_IOC_EXT2_GETFLAGS: flags = 0; if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE) - flags |= EXT2_FLAG_IMMUTABLE; /* EXT2_IMMUTABLE_FL */ + flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */ if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND) - flags |= EXT2_FLAG_APPEND; /* EXT2_APPEND_FL */ + flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */ if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP) - flags |= EXT2_FLAG_NODUMP; /* EXT2_NODUMP_FL */ + flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ return put_user(flags, (int __user *)arg); case HFSPLUS_IOC_EXT2_SETFLAGS: { if (IS_RDONLY(inode)) @@ -44,32 +44,31 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, if (get_user(flags, (int __user *)arg)) return -EFAULT; - if (flags & (EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND) || + if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } /* don't silently ignore unsupported ext2 flags */ - if (flags & ~(EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND| - EXT2_FLAG_NODUMP)) + if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) return -EOPNOTSUPP; - if (flags & EXT2_FLAG_IMMUTABLE) { /* EXT2_IMMUTABLE_FL */ + if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ inode->i_flags |= S_IMMUTABLE; HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; } else { inode->i_flags &= ~S_IMMUTABLE; HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE; } - if (flags & EXT2_FLAG_APPEND) { /* EXT2_APPEND_FL */ + if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */ inode->i_flags |= S_APPEND; HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND; } else { inode->i_flags &= ~S_APPEND; HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND; } - if (flags & EXT2_FLAG_NODUMP) /* EXT2_NODUMP_FL */ + if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */ HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP; else HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index e025a31b4c6..f5b8f329aca 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -229,7 +229,7 @@ static void hugetlbfs_delete_inode(struct inode *inode) clear_inode(inode); } -static void hugetlbfs_forget_inode(struct inode *inode) +static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) { struct super_block *sb = inode->i_sb; diff --git a/fs/inode.c b/fs/inode.c index f5c04dd9ae8..ada7643104e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -133,7 +133,6 @@ static struct inode *alloc_inode(struct super_block *sb) inode->i_bdev = NULL; inode->i_cdev = NULL; inode->i_rdev = 0; - inode->i_security = NULL; inode->dirtied_when = 0; if (security_inode_alloc(inode)) { if (inode->i_sb->s_op->destroy_inode) @@ -363,27 +362,6 @@ int invalidate_inodes(struct super_block * sb) } EXPORT_SYMBOL(invalidate_inodes); - -int __invalidate_device(struct block_device *bdev) -{ - struct super_block *sb = get_super(bdev); - int res = 0; - - if (sb) { - /* - * no need to lock the super, get_super holds the - * read mutex so the filesystem cannot go away - * under us (->put_super runs with the write lock - * hold). - */ - shrink_dcache_sb(sb); - res = invalidate_inodes(sb); - drop_super(sb); - } - invalidate_bdev(bdev, 0); - return res; -} -EXPORT_SYMBOL(__invalidate_device); static int can_unuse(struct inode *inode) { diff --git a/fs/internal.h b/fs/internal.h new file mode 100644 index 00000000000..ea00126c9a5 --- /dev/null +++ b/fs/internal.h @@ -0,0 +1,55 @@ +/* fs/ internal definitions + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/ioctl32.h> + +struct super_block; + +/* + * block_dev.c + */ +#ifdef CONFIG_BLOCK +extern struct super_block *blockdev_superblock; +extern void __init bdev_cache_init(void); + +static inline int sb_is_blkdev_sb(struct super_block *sb) +{ + return sb == blockdev_superblock; +} + +#else +static inline void bdev_cache_init(void) +{ +} + +static inline int sb_is_blkdev_sb(struct super_block *sb) +{ + return 0; +} +#endif + +/* + * char_dev.c + */ +extern void __init chrdev_init(void); + +/* + * compat_ioctl.c + */ +#ifdef CONFIG_COMPAT +extern struct ioctl_trans ioctl_start[]; +extern int ioctl_table_size; +#endif + +/* + * namespace.c + */ +extern int copy_mount_options(const void __user *, unsigned long *); diff --git a/fs/ioprio.c b/fs/ioprio.c index 78b1deae3fa..6dc6721d9e8 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -1,7 +1,7 @@ /* * fs/ioprio.c * - * Copyright (C) 2004 Jens Axboe <axboe@suse.de> + * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk> * * Helper functions for setting/querying io priorities of processes. The * system calls closely mimmick getpriority/setpriority, see the man page for @@ -47,8 +47,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) /* see wmb() in current_io_context() */ smp_read_barrier_depends(); - if (ioc && ioc->set_ioprio) - ioc->set_ioprio(ioc, ioprio); + if (ioc) + ioc->ioprio_changed = 1; task_unlock(task); return 0; @@ -81,7 +81,12 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) } ret = -ESRCH; - read_lock_irq(&tasklist_lock); + /* + * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic", + * so we can't use rcu_read_lock(). See re-copy of ->ioprio + * in copy_process(). + */ + read_lock(&tasklist_lock); switch (which) { case IOPRIO_WHO_PROCESS: if (!who) @@ -124,7 +129,7 @@ free_uid: ret = -EINVAL; } - read_unlock_irq(&tasklist_lock); + read_unlock(&tasklist_lock); return ret; } @@ -170,7 +175,7 @@ asmlinkage long sys_ioprio_get(int which, int who) int ret = -ESRCH; int tmpio; - read_lock_irq(&tasklist_lock); + read_lock(&tasklist_lock); switch (which) { case IOPRIO_WHO_PROCESS: if (!who) @@ -221,7 +226,7 @@ asmlinkage long sys_ioprio_get(int which, int who) ret = -EINVAL; } - read_unlock_irq(&tasklist_lock); + read_unlock(&tasklist_lock); return ret; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 4527692f432..c34b862cdbf 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -960,30 +960,30 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, goto abort; } - if (nextblk) { - while (b_off >= (offset + sect_size)) { - struct inode *ninode; - - offset += sect_size; - if (nextblk == 0) - goto abort; - ninode = isofs_iget(inode->i_sb, nextblk, nextoff); - if (!ninode) - goto abort; - firstext = ISOFS_I(ninode)->i_first_extent; - sect_size = ISOFS_I(ninode)->i_section_size >> ISOFS_BUFFER_BITS(ninode); - nextblk = ISOFS_I(ninode)->i_next_section_block; - nextoff = ISOFS_I(ninode)->i_next_section_offset; - iput(ninode); - - if (++section > 100) { - printk("isofs_get_blocks: More than 100 file sections ?!?, aborting...\n"); - printk("isofs_get_blocks: block=%ld firstext=%u sect_size=%u " - "nextblk=%lu nextoff=%lu\n", - iblock, firstext, (unsigned) sect_size, - nextblk, nextoff); - goto abort; - } + /* On the last section, nextblk == 0, section size is likely to + * exceed sect_size by a partial block, and access beyond the + * end of the file will reach beyond the section size, too. + */ + while (nextblk && (b_off >= (offset + sect_size))) { + struct inode *ninode; + + offset += sect_size; + ninode = isofs_iget(inode->i_sb, nextblk, nextoff); + if (!ninode) + goto abort; + firstext = ISOFS_I(ninode)->i_first_extent; + sect_size = ISOFS_I(ninode)->i_section_size >> ISOFS_BUFFER_BITS(ninode); + nextblk = ISOFS_I(ninode)->i_next_section_block; + nextoff = ISOFS_I(ninode)->i_next_section_offset; + iput(ninode); + + if (++section > 100) { + printk("isofs_get_blocks: More than 100 file sections ?!?, aborting...\n"); + printk("isofs_get_blocks: block=%ld firstext=%u sect_size=%u " + "nextblk=%lu nextoff=%lu\n", + iblock, firstext, (unsigned) sect_size, + nextblk, nextoff); + goto abort; } } diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 2fc66c3e668..7af6099c911 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -715,18 +715,8 @@ journal_t * journal_init_dev(struct block_device *bdev, if (!journal) return NULL; - journal->j_dev = bdev; - journal->j_fs_dev = fs_dev; - journal->j_blk_offset = start; - journal->j_maxlen = len; - journal->j_blocksize = blocksize; - - bh = __getblk(journal->j_dev, start, journal->j_blocksize); - J_ASSERT(bh != NULL); - journal->j_sb_buffer = bh; - journal->j_superblock = (journal_superblock_t *)bh->b_data; - /* journal descriptor can store up to n blocks -bzzz */ + journal->j_blocksize = blocksize; n = journal->j_blocksize / sizeof(journal_block_tag_t); journal->j_wbufsize = n; journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); @@ -736,6 +726,15 @@ journal_t * journal_init_dev(struct block_device *bdev, kfree(journal); journal = NULL; } + journal->j_dev = bdev; + journal->j_fs_dev = fs_dev; + journal->j_blk_offset = start; + journal->j_maxlen = len; + + bh = __getblk(journal->j_dev, start, journal->j_blocksize); + J_ASSERT(bh != NULL); + journal->j_sb_buffer = bh; + journal->j_superblock = (journal_superblock_t *)bh->b_data; return journal; } diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 445eed6ce5d..11563fe2a52 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -46,7 +46,7 @@ static int scan_revoke_records(journal_t *, struct buffer_head *, #ifdef __KERNEL__ /* Release readahead buffers after use */ -void journal_brelse_array(struct buffer_head *b[], int n) +static void journal_brelse_array(struct buffer_head *b[], int n) { while (--n >= 0) brelse (b[n]); diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index 67b3774820e..37db5248826 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -6,7 +6,6 @@ */ #include <linux/fs.h> -#include <linux/ext2_fs.h> #include <linux/ctype.h> #include <linux/capability.h> #include <linux/time.h> @@ -22,13 +21,13 @@ static struct { long jfs_flag; long ext2_flag; } jfs_map[] = { - {JFS_NOATIME_FL, EXT2_NOATIME_FL}, - {JFS_DIRSYNC_FL, EXT2_DIRSYNC_FL}, - {JFS_SYNC_FL, EXT2_SYNC_FL}, - {JFS_SECRM_FL, EXT2_SECRM_FL}, - {JFS_UNRM_FL, EXT2_UNRM_FL}, - {JFS_APPEND_FL, EXT2_APPEND_FL}, - {JFS_IMMUTABLE_FL, EXT2_IMMUTABLE_FL}, + {JFS_NOATIME_FL, FS_NOATIME_FL}, + {JFS_DIRSYNC_FL, FS_DIRSYNC_FL}, + {JFS_SYNC_FL, FS_SYNC_FL}, + {JFS_SECRM_FL, FS_SECRM_FL}, + {JFS_UNRM_FL, FS_UNRM_FL}, + {JFS_APPEND_FL, FS_APPEND_FL}, + {JFS_IMMUTABLE_FL, FS_IMMUTABLE_FL}, {0, 0}, }; diff --git a/fs/libfs.c b/fs/libfs.c index 8db5afb7b0a..3793aaa1457 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -317,17 +317,9 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, int simple_readpage(struct file *file, struct page *page) { - void *kaddr; - - if (PageUptodate(page)) - goto out; - - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr, 0, PAGE_CACHE_SIZE); - kunmap_atomic(kaddr, KM_USER0); + clear_highpage(page); flush_dcache_page(page); SetPageUptodate(page); -out: unlock_page(page); return 0; } diff --git a/fs/mbcache.c b/fs/mbcache.c index e4fde1ab22c..0ff71256e65 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -160,6 +160,7 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) static void __mb_cache_entry_release_unlock(struct mb_cache_entry *ce) + __releases(mb_cache_spinlock) { /* Wake up all processes queuing for this cache entry. */ if (ce->e_queued) diff --git a/fs/mpage.c b/fs/mpage.c index 1e4598247d0..692a3e578fc 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -693,6 +693,8 @@ out: * the call was made get new I/O started against them. If wbc->sync_mode is * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. + * + * If you fix this you should check generic_writepages() also! */ int mpage_writepages(struct address_space *mapping, diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 9e44158a754..d220165d491 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -280,7 +280,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { struct super_block *sb = dir->i_sb; - struct inode *inode; + struct inode *inode = NULL; struct fat_slot_info sinfo; struct timespec ts; unsigned char msdos_name[MSDOS_NAME]; @@ -316,6 +316,8 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, d_instantiate(dentry, inode); out: unlock_kernel(); + if (!err) + err = fat_flush_inodes(sb, dir, inode); return err; } @@ -348,6 +350,8 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) fat_detach(inode); out: unlock_kernel(); + if (!err) + err = fat_flush_inodes(inode->i_sb, dir, inode); return err; } @@ -401,6 +405,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); unlock_kernel(); + fat_flush_inodes(sb, dir, inode); return 0; out_free: @@ -430,6 +435,8 @@ static int msdos_unlink(struct inode *dir, struct dentry *dentry) fat_detach(inode); out: unlock_kernel(); + if (!err) + err = fat_flush_inodes(inode->i_sb, dir, inode); return err; } @@ -635,6 +642,8 @@ static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, new_dir, new_msdos_name, new_dentry, is_hid); out: unlock_kernel(); + if (!err) + err = fat_flush_inodes(old_dir->i_sb, old_dir, new_dir); return err; } diff --git a/fs/namei.c b/fs/namei.c index 808e4ea2bb9..2892e68d3a8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -518,18 +518,20 @@ static int __emul_lookup_dentry(const char *, struct nameidata *); static __always_inline int walk_init_root(const char *name, struct nameidata *nd) { - read_lock(¤t->fs->lock); - if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { - nd->mnt = mntget(current->fs->altrootmnt); - nd->dentry = dget(current->fs->altroot); - read_unlock(¤t->fs->lock); + struct fs_struct *fs = current->fs; + + read_lock(&fs->lock); + if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) { + nd->mnt = mntget(fs->altrootmnt); + nd->dentry = dget(fs->altroot); + read_unlock(&fs->lock); if (__emul_lookup_dentry(name,nd)) return 0; - read_lock(¤t->fs->lock); + read_lock(&fs->lock); } - nd->mnt = mntget(current->fs->rootmnt); - nd->dentry = dget(current->fs->root); - read_unlock(¤t->fs->lock); + nd->mnt = mntget(fs->rootmnt); + nd->dentry = dget(fs->root); + read_unlock(&fs->lock); return 1; } @@ -724,17 +726,19 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry) static __always_inline void follow_dotdot(struct nameidata *nd) { + struct fs_struct *fs = current->fs; + while(1) { struct vfsmount *parent; struct dentry *old = nd->dentry; - read_lock(¤t->fs->lock); - if (nd->dentry == current->fs->root && - nd->mnt == current->fs->rootmnt) { - read_unlock(¤t->fs->lock); + read_lock(&fs->lock); + if (nd->dentry == fs->root && + nd->mnt == fs->rootmnt) { + read_unlock(&fs->lock); break; } - read_unlock(¤t->fs->lock); + read_unlock(&fs->lock); spin_lock(&dcache_lock); if (nd->dentry != nd->mnt->mnt_root) { nd->dentry = dget(nd->dentry->d_parent); @@ -1042,15 +1046,17 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd) struct vfsmount *old_mnt = nd->mnt; struct qstr last = nd->last; int last_type = nd->last_type; + struct fs_struct *fs = current->fs; + /* - * NAME was not found in alternate root or it's a directory. Try to find - * it in the normal root: + * NAME was not found in alternate root or it's a directory. + * Try to find it in the normal root: */ nd->last_type = LAST_ROOT; - read_lock(¤t->fs->lock); - nd->mnt = mntget(current->fs->rootmnt); - nd->dentry = dget(current->fs->root); - read_unlock(¤t->fs->lock); + read_lock(&fs->lock); + nd->mnt = mntget(fs->rootmnt); + nd->dentry = dget(fs->root); + read_unlock(&fs->lock); if (path_walk(name, nd) == 0) { if (nd->dentry->d_inode) { dput(old_dentry); @@ -1074,6 +1080,7 @@ void set_fs_altroot(void) struct vfsmount *mnt = NULL, *oldmnt; struct dentry *dentry = NULL, *olddentry; int err; + struct fs_struct *fs = current->fs; if (!emul) goto set_it; @@ -1083,12 +1090,12 @@ void set_fs_altroot(void) dentry = nd.dentry; } set_it: - write_lock(¤t->fs->lock); - oldmnt = current->fs->altrootmnt; - olddentry = current->fs->altroot; - current->fs->altrootmnt = mnt; - current->fs->altroot = dentry; - write_unlock(¤t->fs->lock); + write_lock(&fs->lock); + oldmnt = fs->altrootmnt; + olddentry = fs->altroot; + fs->altrootmnt = mnt; + fs->altroot = dentry; + write_unlock(&fs->lock); if (olddentry) { dput(olddentry); mntput(oldmnt); @@ -1102,29 +1109,30 @@ static int fastcall do_path_lookup(int dfd, const char *name, int retval = 0; int fput_needed; struct file *file; + struct fs_struct *fs = current->fs; nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags; nd->depth = 0; if (*name=='/') { - read_lock(¤t->fs->lock); - if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { - nd->mnt = mntget(current->fs->altrootmnt); - nd->dentry = dget(current->fs->altroot); - read_unlock(¤t->fs->lock); + read_lock(&fs->lock); + if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) { + nd->mnt = mntget(fs->altrootmnt); + nd->dentry = dget(fs->altroot); + read_unlock(&fs->lock); if (__emul_lookup_dentry(name,nd)) goto out; /* found in altroot */ - read_lock(¤t->fs->lock); + read_lock(&fs->lock); } - nd->mnt = mntget(current->fs->rootmnt); - nd->dentry = dget(current->fs->root); - read_unlock(¤t->fs->lock); + nd->mnt = mntget(fs->rootmnt); + nd->dentry = dget(fs->root); + read_unlock(&fs->lock); } else if (dfd == AT_FDCWD) { - read_lock(¤t->fs->lock); - nd->mnt = mntget(current->fs->pwdmnt); - nd->dentry = dget(current->fs->pwd); - read_unlock(¤t->fs->lock); + read_lock(&fs->lock); + nd->mnt = mntget(fs->pwdmnt); + nd->dentry = dget(fs->pwd); + read_unlock(&fs->lock); } else { struct dentry *dentry; diff --git a/fs/namespace.c b/fs/namespace.c index 36d18085813..66d921e14fe 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -13,6 +13,7 @@ #include <linux/sched.h> #include <linux/smp_lock.h> #include <linux/init.h> +#include <linux/kernel.h> #include <linux/quotaops.h> #include <linux/acct.h> #include <linux/capability.h> @@ -23,12 +24,11 @@ #include <linux/namei.h> #include <linux/security.h> #include <linux/mount.h> +#include <linux/ramfs.h> #include <asm/uaccess.h> #include <asm/unistd.h> #include "pnode.h" -extern int __init init_rootfs(void); - /* spinlock for vfsmount related operations, inplace of dcache_lock */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); @@ -1813,6 +1813,7 @@ void __init mnt_init(unsigned long mempages) struct list_head *d; unsigned int nr_hash; int i; + int err; init_rwsem(&namespace_sem); @@ -1853,8 +1854,14 @@ void __init mnt_init(unsigned long mempages) d++; i--; } while (i); - sysfs_init(); - subsystem_register(&fs_subsys); + err = sysfs_init(); + if (err) + printk(KERN_WARNING "%s: sysfs_init error: %d\n", + __FUNCTION__, err); + err = subsystem_register(&fs_subsys); + if (err) + printk(KERN_WARNING "%s: subsystem_register error: %d\n", + __FUNCTION__, err); init_rootfs(); init_mount_tree(); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b674462793d..f6675d2c386 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -51,7 +51,6 @@ #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/file.h> -#include <linux/mpage.h> #include <linux/writeback.h> #include <linux/sunrpc/clnt.h> diff --git a/fs/no-block.c b/fs/no-block.c new file mode 100644 index 00000000000..d269a93d346 --- /dev/null +++ b/fs/no-block.c @@ -0,0 +1,22 @@ +/* no-block.c: implementation of routines required for non-BLOCK configuration + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/fs.h> + +static int no_blkdev_open(struct inode * inode, struct file * filp) +{ + return -ENODEV; +} + +const struct file_operations def_blk_fops = { + .open = no_blkdev_open, +}; diff --git a/fs/open.c b/fs/open.c index 303f06d2a7b..304c1c7814c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -546,7 +546,8 @@ asmlinkage long sys_chdir(const char __user * filename) struct nameidata nd; int error; - error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); + error = __user_walk(filename, + LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd); if (error) goto out; @@ -1172,6 +1173,7 @@ asmlinkage long sys_close(unsigned int fd) struct file * filp; struct files_struct *files = current->files; struct fdtable *fdt; + int retval; spin_lock(&files->file_lock); fdt = files_fdtable(files); @@ -1184,7 +1186,16 @@ asmlinkage long sys_close(unsigned int fd) FD_CLR(fd, fdt->close_on_exec); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); - return filp_close(filp, files); + retval = filp_close(filp, files); + + /* can't restart close syscall because file table entry was cleared */ + if (unlikely(retval == -ERESTARTSYS || + retval == -ERESTARTNOINTR || + retval == -ERESTARTNOHAND || + retval == -ERESTART_RESTARTBLOCK)) + retval = -EINTR; + + return retval; out_unlock: spin_unlock(&files->file_lock); diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile index d713ce6b3e1..67e665fdb7f 100644 --- a/fs/partitions/Makefile +++ b/fs/partitions/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -obj-y := check.o +obj-$(CONFIG_BLOCK) := check.o obj-$(CONFIG_ACORN_PARTITION) += acorn.o obj-$(CONFIG_AMIGA_PARTITION) += amiga.o diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 8f12587c312..4f8df71e49d 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c @@ -58,6 +58,31 @@ msdos_magic_present(unsigned char *p) return (p[0] == MSDOS_LABEL_MAGIC1 && p[1] == MSDOS_LABEL_MAGIC2); } +/* Value is EBCDIC 'IBMA' */ +#define AIX_LABEL_MAGIC1 0xC9 +#define AIX_LABEL_MAGIC2 0xC2 +#define AIX_LABEL_MAGIC3 0xD4 +#define AIX_LABEL_MAGIC4 0xC1 +static int aix_magic_present(unsigned char *p, struct block_device *bdev) +{ + Sector sect; + unsigned char *d; + int ret = 0; + + if (p[0] != AIX_LABEL_MAGIC1 && + p[1] != AIX_LABEL_MAGIC2 && + p[2] != AIX_LABEL_MAGIC3 && + p[3] != AIX_LABEL_MAGIC4) + return 0; + d = read_dev_sector(bdev, 7, §); + if (d) { + if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M') + ret = 1; + put_dev_sector(sect); + }; + return ret; +} + /* * Create devices for each logical partition in an extended partition. * The logical partitions form a linked list, with each entry being @@ -393,6 +418,12 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) return 0; } + if (aix_magic_present(data, bdev)) { + put_dev_sector(sect); + printk( " [AIX]"); + return 0; + } + /* * Now that the 55aa signature is present, this is probably * either the boot sector of a FAT filesystem or a DOS-type diff --git a/fs/proc/array.c b/fs/proc/array.c index 0b615d62a15..c0e554971df 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -347,6 +347,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) sigemptyset(&sigign); sigemptyset(&sigcatch); cutime = cstime = utime = stime = cputime_zero; + + mutex_lock(&tty_mutex); read_lock(&tasklist_lock); if (task->sighand) { spin_lock_irq(&task->sighand->siglock); @@ -388,6 +390,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) } ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; read_unlock(&tasklist_lock); + mutex_unlock(&tty_mutex); if (!whole || num_threads<2) wchan = get_wchan(task); diff --git a/fs/proc/base.c b/fs/proc/base.c index fe8d55fb17c..89c20d9d50b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -797,7 +797,7 @@ out_no_task: static ssize_t mem_write(struct file * file, const char * buf, size_t count, loff_t *ppos) { - int copied = 0; + int copied; char *page; struct task_struct *task = get_proc_task(file->f_dentry->d_inode); unsigned long dst = *ppos; @@ -814,6 +814,7 @@ static ssize_t mem_write(struct file * file, const char * buf, if (!page) goto out; + copied = 0; while (count > 0) { int this_len, retval; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 3ceff385727..1294eda4aca 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -100,7 +100,7 @@ static int notesize(struct memelfnote *en) int sz; sz = sizeof(struct elf_note); - sz += roundup(strlen(en->name), 4); + sz += roundup((strlen(en->name) + 1), 4); sz += roundup(en->datasz, 4); return sz; @@ -116,7 +116,7 @@ static char *storenote(struct memelfnote *men, char *bufp) #define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0) - en.n_namesz = strlen(men->name); + en.n_namesz = strlen(men->name) + 1; en.n_descsz = men->datasz; en.n_type = men->type; diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 5bbd6089605..66bc425f2f3 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -277,12 +277,15 @@ static int devinfo_show(struct seq_file *f, void *v) if (i == 0) seq_printf(f, "Character devices:\n"); chrdev_show(f, i); - } else { + } +#ifdef CONFIG_BLOCK + else { i -= CHRDEV_MAJOR_HASH_SIZE; if (i == 0) seq_printf(f, "\nBlock devices:\n"); blkdev_show(f, i); } +#endif return 0; } @@ -355,6 +358,7 @@ static int stram_read_proc(char *page, char **start, off_t off, } #endif +#ifdef CONFIG_BLOCK extern struct seq_operations partitions_op; static int partitions_open(struct inode *inode, struct file *file) { @@ -378,6 +382,7 @@ static struct file_operations proc_diskstats_operations = { .llseek = seq_lseek, .release = seq_release, }; +#endif #ifdef CONFIG_MODULES extern struct seq_operations modules_op; @@ -695,7 +700,9 @@ void __init proc_misc_init(void) entry->proc_fops = &proc_kmsg_operations; create_seq_entry("devices", 0, &proc_devinfo_operations); create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); +#ifdef CONFIG_BLOCK create_seq_entry("partitions", 0, &proc_partitions_operations); +#endif create_seq_entry("stat", 0, &proc_stat_operations); create_seq_entry("interrupts", 0, &proc_interrupts_operations); #ifdef CONFIG_SLAB @@ -707,7 +714,9 @@ void __init proc_misc_init(void) create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); +#ifdef CONFIG_BLOCK create_seq_entry("diskstats", 0, &proc_diskstats_operations); +#endif #ifdef CONFIG_MODULES create_seq_entry("modules", 0, &proc_modules_operations); #endif diff --git a/fs/quota.c b/fs/quota.c index d6a2be826e2..b9dae76a0b6 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -338,6 +338,34 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void } /* + * look up a superblock on which quota ops will be performed + * - use the name of a block device to find the superblock thereon + */ +static inline struct super_block *quotactl_block(const char __user *special) +{ +#ifdef CONFIG_BLOCK + struct block_device *bdev; + struct super_block *sb; + char *tmp = getname(special); + + if (IS_ERR(tmp)) + return ERR_PTR(PTR_ERR(tmp)); + bdev = lookup_bdev(tmp); + putname(tmp); + if (IS_ERR(bdev)) + return ERR_PTR(PTR_ERR(bdev)); + sb = get_super(bdev); + bdput(bdev); + if (!sb) + return ERR_PTR(-ENODEV); + + return sb; +#else + return ERR_PTR(-ENODEV); +#endif +} + +/* * This is the system call interface. This communicates with * the user-level programs. Currently this only supports diskquota * calls. Maybe we need to add the process quotas etc. in the future, @@ -347,25 +375,15 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t { uint cmds, type; struct super_block *sb = NULL; - struct block_device *bdev; - char *tmp; int ret; cmds = cmd >> SUBCMDSHIFT; type = cmd & SUBCMDMASK; if (cmds != Q_SYNC || special) { - tmp = getname(special); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - bdev = lookup_bdev(tmp); - putname(tmp); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); - sb = get_super(bdev); - bdput(bdev); - if (!sb) - return -ENODEV; + sb = quotactl_block(special); + if (IS_ERR(sb)) + return PTR_ERR(sb); } ret = check_quotactl_valid(sb, type, cmds, id); diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile index 3a59309f3ca..0eb7ac08048 100644 --- a/fs/reiserfs/Makefile +++ b/fs/reiserfs/Makefile @@ -28,7 +28,7 @@ endif # will work around it. If any other architecture displays this behavior, # add it here. ifeq ($(CONFIG_PPC32),y) -EXTRA_CFLAGS := -O1 +EXTRA_CFLAGS := $(call cc-ifversion, -lt, 0400, -O1) endif TAGS: diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 9aabcc0ccd2..657050ad743 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -22,6 +22,9 @@ const struct file_operations reiserfs_dir_operations = { .readdir = reiserfs_readdir, .fsync = reiserfs_dir_fsync, .ioctl = reiserfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = reiserfs_compat_ioctl, +#endif }; static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 1627edd5081..3e08f7161a3 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -2,6 +2,7 @@ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ +#include <linux/config.h> #include <linux/time.h> #include <linux/reiserfs_fs.h> #include <linux/reiserfs_acl.h> @@ -130,7 +131,7 @@ static int reiserfs_sync_file(struct file *p_s_filp, reiserfs_write_lock(p_s_inode->i_sb); barrier_done = reiserfs_commit_for_inode(p_s_inode); reiserfs_write_unlock(p_s_inode->i_sb); - if (barrier_done != 1) + if (barrier_done != 1 && reiserfs_barrier_flush(p_s_inode->i_sb)) blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL); if (barrier_done < 0) return barrier_done; @@ -1568,6 +1569,9 @@ const struct file_operations reiserfs_file_operations = { .read = generic_file_read, .write = reiserfs_file_write, .ioctl = reiserfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = reiserfs_compat_ioctl, +#endif .mmap = generic_file_mmap, .release = reiserfs_file_release, .fsync = reiserfs_sync_file, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 8810fda0da4..7e5a2f5ebeb 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1127,9 +1127,9 @@ static void init_inode(struct inode *inode, struct path *path) REISERFS_I(inode)->i_prealloc_count = 0; REISERFS_I(inode)->i_trans_id = 0; REISERFS_I(inode)->i_jl = NULL; - REISERFS_I(inode)->i_acl_access = NULL; - REISERFS_I(inode)->i_acl_default = NULL; - init_rwsem(&REISERFS_I(inode)->xattr_sem); + reiserfs_init_acl_access(inode); + reiserfs_init_acl_default(inode); + reiserfs_init_xattr_rwsem(inode); if (stat_data_v1(ih)) { struct stat_data_v1 *sd = @@ -1834,9 +1834,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, REISERFS_I(inode)->i_attrs = REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); - REISERFS_I(inode)->i_acl_access = NULL; - REISERFS_I(inode)->i_acl_default = NULL; - init_rwsem(&REISERFS_I(inode)->xattr_sem); + reiserfs_init_acl_access(inode); + reiserfs_init_acl_default(inode); + reiserfs_init_xattr_rwsem(inode); if (old_format_only(sb)) make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, @@ -1974,11 +1974,13 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, * iput doesn't deadlock in reiserfs_delete_xattrs. The locking * code really needs to be reworked, but this will take care of it * for now. -jeffm */ +#ifdef CONFIG_REISERFS_FS_POSIX_ACL if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) { reiserfs_write_unlock_xattrs(dir->i_sb); iput(inode); reiserfs_write_lock_xattrs(dir->i_sb); } else +#endif iput(inode); return err; } diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index a986b5e1e28..9c57578cb83 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -9,6 +9,7 @@ #include <asm/uaccess.h> #include <linux/pagemap.h> #include <linux/smp_lock.h> +#include <linux/compat.h> static int reiserfs_unpack(struct inode *inode, struct file *filp); @@ -94,6 +95,40 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, } } +#ifdef CONFIG_COMPAT +long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct inode *inode = file->f_dentry->d_inode; + int ret; + + /* These are just misnamed, they actually get/put from/to user an int */ + switch (cmd) { + case REISERFS_IOC32_UNPACK: + cmd = REISERFS_IOC_UNPACK; + break; + case REISERFS_IOC32_GETFLAGS: + cmd = REISERFS_IOC_GETFLAGS; + break; + case REISERFS_IOC32_SETFLAGS: + cmd = REISERFS_IOC_SETFLAGS; + break; + case REISERFS_IOC32_GETVERSION: + cmd = REISERFS_IOC_GETVERSION; + break; + case REISERFS_IOC32_SETVERSION: + cmd = REISERFS_IOC_SETVERSION; + break; + default: + return -ENOIOCTLCMD; + } + lock_kernel(); + ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; +} +#endif + /* ** reiserfs_unpack ** Function try to convert tail from direct item into indirect. diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 9b3672d6936..e6b5ccf23f1 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1186,6 +1186,21 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct return NULL; } +static int newer_jl_done(struct reiserfs_journal_cnode *cn) +{ + struct super_block *sb = cn->sb; + b_blocknr_t blocknr = cn->blocknr; + + cn = cn->hprev; + while (cn) { + if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist && + atomic_read(&cn->jlist->j_commit_left) != 0) + return 0; + cn = cn->hprev; + } + return 1; +} + static void remove_journal_hash(struct super_block *, struct reiserfs_journal_cnode **, struct reiserfs_journal_list *, unsigned long, @@ -1604,6 +1619,31 @@ static int flush_journal_list(struct super_block *s, return err; } +static int test_transaction(struct super_block *s, + struct reiserfs_journal_list *jl) +{ + struct reiserfs_journal_cnode *cn; + + if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) + return 1; + + cn = jl->j_realblock; + while (cn) { + /* if the blocknr == 0, this has been cleared from the hash, + ** skip it + */ + if (cn->blocknr == 0) { + goto next; + } + if (cn->bh && !newer_jl_done(cn)) + return 0; + next: + cn = cn->next; + cond_resched(); + } + return 0; +} + static int write_one_transaction(struct super_block *s, struct reiserfs_journal_list *jl, struct buffer_chunk *chunk) @@ -3433,16 +3473,6 @@ static void flush_async_commits(void *p) flush_commit_list(p_s_sb, jl, 1); } unlock_kernel(); - /* - * this is a little racey, but there's no harm in missing - * the filemap_fdata_write - */ - if (!atomic_read(&journal->j_async_throttle) - && !reiserfs_is_journal_aborted(journal)) { - atomic_inc(&journal->j_async_throttle); - filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping); - atomic_dec(&journal->j_async_throttle); - } } /* @@ -3844,7 +3874,9 @@ static void flush_old_journal_lists(struct super_block *s) entry = journal->j_journal_list.next; jl = JOURNAL_LIST_ENTRY(entry); /* this check should always be run, to send old lists to disk */ - if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) { + if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) && + atomic_read(&jl->j_commit_left) == 0 && + test_transaction(s, jl)) { flush_used_journal_lists(s, jl); } else { break; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b40d4d64d59..80fc3b32802 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -510,8 +510,10 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) SLAB_CTOR_CONSTRUCTOR) { INIT_LIST_HEAD(&ei->i_prealloc_list); inode_init_once(&ei->vfs_inode); +#ifdef CONFIG_REISERFS_FS_POSIX_ACL ei->i_acl_access = NULL; ei->i_acl_default = NULL; +#endif } } @@ -560,6 +562,7 @@ static void reiserfs_dirty_inode(struct inode *inode) reiserfs_write_unlock(inode->i_sb); } +#ifdef CONFIG_REISERFS_FS_POSIX_ACL static void reiserfs_clear_inode(struct inode *inode) { struct posix_acl *acl; @@ -574,6 +577,9 @@ static void reiserfs_clear_inode(struct inode *inode) posix_acl_release(acl); REISERFS_I(inode)->i_acl_default = NULL; } +#else +#define reiserfs_clear_inode NULL +#endif #ifdef CONFIG_QUOTA static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, diff --git a/fs/select.c b/fs/select.c index 33b72ba0f86..dcbc1112b7e 100644 --- a/fs/select.c +++ b/fs/select.c @@ -658,8 +658,6 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) unsigned int i; struct poll_list *head; struct poll_list *walk; - struct fdtable *fdt; - int max_fdset; /* Allocate small arguments on the stack to save memory and be faster - use long to make sure the buffer is aligned properly on 64 bit archs to avoid unaligned access */ @@ -667,11 +665,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) struct poll_list *stack_pp = NULL; /* Do a sanity check on nfds ... */ - rcu_read_lock(); - fdt = files_fdtable(current->files); - max_fdset = fdt->max_fdset; - rcu_read_unlock(); - if (nfds > max_fdset && nfds > OPEN_MAX) + if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur) return -EINVAL; poll_initwait(&table); diff --git a/fs/splice.c b/fs/splice.c index 684bca3d3a1..13e92dd19fb 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -12,7 +12,7 @@ * Jens to support splicing to files, network, direct splicing, etc and * fixing lots of bugs. * - * Copyright (C) 2005-2006 Jens Axboe <axboe@suse.de> + * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk> * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org> * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu> * diff --git a/fs/super.c b/fs/super.c index 5c4c94d5495..aec99ddbe53 100644 --- a/fs/super.c +++ b/fs/super.c @@ -199,7 +199,7 @@ EXPORT_SYMBOL(deactivate_super); * success, 0 if we had failed (superblock contents was already dead or * dying when grab_super() had been called). */ -static int grab_super(struct super_block *s) +static int grab_super(struct super_block *s) __releases(sb_lock) { s->s_count++; spin_unlock(&sb_lock); @@ -220,6 +220,37 @@ static int grab_super(struct super_block *s) return 0; } +/* + * Write out and wait upon all dirty data associated with this + * superblock. Filesystem data as well as the underlying block + * device. Takes the superblock lock. Requires a second blkdev + * flush by the caller to complete the operation. + */ +void __fsync_super(struct super_block *sb) +{ + sync_inodes_sb(sb, 0); + DQUOT_SYNC(sb); + lock_super(sb); + if (sb->s_dirt && sb->s_op->write_super) + sb->s_op->write_super(sb); + unlock_super(sb); + if (sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, 1); + sync_blockdev(sb->s_bdev); + sync_inodes_sb(sb, 1); +} + +/* + * Write out and wait upon all dirty data associated with this + * superblock. Filesystem data as well as the underlying block + * device. Takes the superblock lock. + */ +int fsync_super(struct super_block *sb) +{ + __fsync_super(sb); + return sync_blockdev(sb->s_bdev); +} + /** * generic_shutdown_super - common helper for ->kill_sb() * @sb: superblock to kill @@ -540,8 +571,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) { int retval; +#ifdef CONFIG_BLOCK if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) return -EACCES; +#endif if (flags & MS_RDONLY) acct_auto_close(sb); shrink_dcache_sb(sb); @@ -661,6 +694,7 @@ void kill_litter_super(struct super_block *sb) EXPORT_SYMBOL(kill_litter_super); +#ifdef CONFIG_BLOCK static int set_bdev_super(struct super_block *s, void *data) { s->s_bdev = data; @@ -756,6 +790,7 @@ void kill_block_super(struct super_block *sb) } EXPORT_SYMBOL(kill_block_super); +#endif int get_sb_nodev(struct file_system_type *fs_type, int flags, void *data, diff --git a/fs/sync.c b/fs/sync.c index 955aef04da2..1de747b5ddb 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -10,11 +10,124 @@ #include <linux/syscalls.h> #include <linux/linkage.h> #include <linux/pagemap.h> +#include <linux/quotaops.h> +#include <linux/buffer_head.h> #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) /* + * sync everything. Start out by waking pdflush, because that writes back + * all queues in parallel. + */ +static void do_sync(unsigned long wait) +{ + wakeup_pdflush(0); + sync_inodes(0); /* All mappings, inodes and their blockdevs */ + DQUOT_SYNC(NULL); + sync_supers(); /* Write the superblocks */ + sync_filesystems(0); /* Start syncing the filesystems */ + sync_filesystems(wait); /* Waitingly sync the filesystems */ + sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */ + if (!wait) + printk("Emergency Sync complete\n"); + if (unlikely(laptop_mode)) + laptop_sync_completion(); +} + +asmlinkage long sys_sync(void) +{ + do_sync(1); + return 0; +} + +void emergency_sync(void) +{ + pdflush_operation(do_sync, 0); +} + +/* + * Generic function to fsync a file. + * + * filp may be NULL if called via the msync of a vma. + */ +int file_fsync(struct file *filp, struct dentry *dentry, int datasync) +{ + struct inode * inode = dentry->d_inode; + struct super_block * sb; + int ret, err; + + /* sync the inode to buffers */ + ret = write_inode_now(inode, 0); + + /* sync the superblock to buffers */ + sb = inode->i_sb; + lock_super(sb); + if (sb->s_op->write_super) + sb->s_op->write_super(sb); + unlock_super(sb); + + /* .. finally sync the buffers to disk */ + err = sync_blockdev(sb->s_bdev); + if (!ret) + ret = err; + return ret; +} + +long do_fsync(struct file *file, int datasync) +{ + int ret; + int err; + struct address_space *mapping = file->f_mapping; + + if (!file->f_op || !file->f_op->fsync) { + /* Why? We can still call filemap_fdatawrite */ + ret = -EINVAL; + goto out; + } + + ret = filemap_fdatawrite(mapping); + + /* + * We need to protect against concurrent writers, which could cause + * livelocks in fsync_buffers_list(). + */ + mutex_lock(&mapping->host->i_mutex); + err = file->f_op->fsync(file, file->f_dentry, datasync); + if (!ret) + ret = err; + mutex_unlock(&mapping->host->i_mutex); + err = filemap_fdatawait(mapping); + if (!ret) + ret = err; +out: + return ret; +} + +static long __do_fsync(unsigned int fd, int datasync) +{ + struct file *file; + int ret = -EBADF; + + file = fget(fd); + if (file) { + ret = do_fsync(file, datasync); + fput(file); + } + return ret; +} + +asmlinkage long sys_fsync(unsigned int fd) +{ + return __do_fsync(fd, 0); +} + +asmlinkage long sys_fdatasync(unsigned int fd) +{ + return __do_fsync(fd, 1); +} + +/* * sys_sync_file_range() permits finely controlled syncing over a segment of * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is * zero then sys_sync_file_range() will operate from offset out to EOF. diff --git a/fs/udf/super.c b/fs/udf/super.c index 5dd356cbbda..1d3b5d2070e 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1621,6 +1621,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) goto error_out; } + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_READ_ONLY) + printk("UDF-fs: Partition marked readonly; forcing readonly mount\n"); + sb->s_flags |= MS_RDONLY; + if ( udf_find_fileset(sb, &fileset, &rootdir) ) { printk("UDF-fs: No fileset found\n"); diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 26b364c9d62..35115bca036 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -1,5 +1,6 @@ config XFS_FS tristate "XFS filesystem support" + depends on BLOCK help XFS is a high performance journaling filesystem which originated on the SGI IRIX platform. It is completely multi-threaded, can diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6 index 9e7f85986d0..291948d5085 100644 --- a/fs/xfs/Makefile-linux-2.6 +++ b/fs/xfs/Makefile-linux-2.6 @@ -30,7 +30,6 @@ ifeq ($(CONFIG_XFS_TRACE),y) EXTRA_CFLAGS += -DXFS_BLI_TRACE EXTRA_CFLAGS += -DXFS_BMAP_TRACE EXTRA_CFLAGS += -DXFS_BMBT_TRACE - EXTRA_CFLAGS += -DXFS_DIR_TRACE EXTRA_CFLAGS += -DXFS_DIR2_TRACE EXTRA_CFLAGS += -DXFS_DQUOT_TRACE EXTRA_CFLAGS += -DXFS_ILOCK_TRACE diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index aba7fcf881a..d5973758981 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -34,6 +34,14 @@ kmem_alloc(size_t size, unsigned int __nocast flags) gfp_t lflags = kmem_flags_convert(flags); void *ptr; +#ifdef DEBUG + if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) { + printk(KERN_WARNING "Large %s attempt, size=%ld\n", + __FUNCTION__, (long)size); + dump_stack(); + } +#endif + do { if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) ptr = kmalloc(size, lflags); @@ -60,6 +68,27 @@ kmem_zalloc(size_t size, unsigned int __nocast flags) return ptr; } +void * +kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize, + unsigned int __nocast flags) +{ + void *ptr; + size_t kmsize = maxsize; + unsigned int kmflags = (flags & ~KM_SLEEP) | KM_NOSLEEP; + + while (!(ptr = kmem_zalloc(kmsize, kmflags))) { + if ((kmsize <= minsize) && (flags & KM_NOSLEEP)) + break; + if ((kmsize >>= 1) <= minsize) { + kmsize = minsize; + kmflags = flags; + } + } + if (ptr) + *size = kmsize; + return ptr; +} + void kmem_free(void *ptr, size_t size) { diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 0e8293c5a32..9ebabdf7829 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -30,6 +30,7 @@ #define KM_NOSLEEP 0x0002u #define KM_NOFS 0x0004u #define KM_MAYFAIL 0x0008u +#define KM_LARGE 0x0010u /* * We use a special process flag to avoid recursive callbacks into @@ -41,7 +42,7 @@ kmem_flags_convert(unsigned int __nocast flags) { gfp_t lflags; - BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL)); + BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_LARGE)); if (flags & KM_NOSLEEP) { lflags = GFP_ATOMIC | __GFP_NOWARN; @@ -54,8 +55,9 @@ kmem_flags_convert(unsigned int __nocast flags) } extern void *kmem_alloc(size_t, unsigned int __nocast); -extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast); extern void *kmem_zalloc(size_t, unsigned int __nocast); +extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast); +extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast); extern void kmem_free(void *, size_t); /* diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h index b25090094cc..2009e6d922c 100644 --- a/fs/xfs/linux-2.6/sema.h +++ b/fs/xfs/linux-2.6/sema.h @@ -29,8 +29,6 @@ typedef struct semaphore sema_t; -#define init_sema(sp, val, c, d) sema_init(sp, val) -#define initsema(sp, val) sema_init(sp, val) #define initnsema(sp, val, name) sema_init(sp, val) #define psema(sp, b) down(sp) #define vsema(sp) up(sp) diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h index 9a8ad481b00..351a8f454bd 100644 --- a/fs/xfs/linux-2.6/sv.h +++ b/fs/xfs/linux-2.6/sv.h @@ -53,8 +53,6 @@ static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state, remove_wait_queue(&sv->waiters, &wait); } -#define init_sv(sv,type,name,flag) \ - init_waitqueue_head(&(sv)->waiters) #define sv_init(sv,flag,name) \ init_waitqueue_head(&(sv)->waiters) #define sv_destroy(sv) \ diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 34dcb43a783..09360cf1e1f 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -71,7 +71,7 @@ xfs_page_trace( int tag, struct inode *inode, struct page *page, - int mask) + unsigned long pgoff) { xfs_inode_t *ip; bhv_vnode_t *vp = vn_from_inode(inode); @@ -91,7 +91,7 @@ xfs_page_trace( (void *)ip, (void *)inode, (void *)page, - (void *)((unsigned long)mask), + (void *)pgoff, (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), (void *)((unsigned long)((isize >> 32) & 0xffffffff)), @@ -105,7 +105,7 @@ xfs_page_trace( (void *)NULL); } #else -#define xfs_page_trace(tag, inode, page, mask) +#define xfs_page_trace(tag, inode, page, pgoff) #endif /* @@ -1197,7 +1197,7 @@ xfs_vm_releasepage( .nr_to_write = 1, }; - xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask); + xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, 0); if (!page_has_buffers(page)) return 0; @@ -1356,7 +1356,6 @@ xfs_end_io_direct( ioend->io_size = size; xfs_finish_ioend(ioend); } else { - ASSERT(size >= 0); xfs_destroy_ioend(ioend); } diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 2af528dcfb0..9bbadafdcb0 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -318,8 +318,12 @@ xfs_buf_free( if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) free_address(bp->b_addr - bp->b_offset); - for (i = 0; i < bp->b_page_count; i++) - page_cache_release(bp->b_pages[i]); + for (i = 0; i < bp->b_page_count; i++) { + struct page *page = bp->b_pages[i]; + + ASSERT(!PagePrivate(page)); + page_cache_release(page); + } _xfs_buf_free_pages(bp); } else if (bp->b_flags & _XBF_KMEM_ALLOC) { /* @@ -400,6 +404,7 @@ _xfs_buf_lookup_pages( nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); size -= nbytes; + ASSERT(!PagePrivate(page)); if (!PageUptodate(page)) { page_count--; if (blocksize >= PAGE_CACHE_SIZE) { @@ -768,7 +773,7 @@ xfs_buf_get_noaddr( _xfs_buf_initialize(bp, target, 0, len, 0); try_again: - data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL); + data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE); if (unlikely(data == NULL)) goto fail_free_buf; @@ -1117,10 +1122,10 @@ xfs_buf_bio_end_io( do { struct page *page = bvec->bv_page; + ASSERT(!PagePrivate(page)); if (unlikely(bp->b_error)) { if (bp->b_flags & XBF_READ) ClearPageUptodate(page); - SetPageError(page); } else if (blocksize >= PAGE_CACHE_SIZE) { SetPageUptodate(page); } else if (!PagePrivate(page) && @@ -1156,16 +1161,16 @@ _xfs_buf_ioapply( total_nr_pages = bp->b_page_count; map_i = 0; - if (bp->b_flags & _XBF_RUN_QUEUES) { - bp->b_flags &= ~_XBF_RUN_QUEUES; - rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC; - } else { - rw = (bp->b_flags & XBF_READ) ? READ : WRITE; - } - if (bp->b_flags & XBF_ORDERED) { ASSERT(!(bp->b_flags & XBF_READ)); rw = WRITE_BARRIER; + } else if (bp->b_flags & _XBF_RUN_QUEUES) { + ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); + bp->b_flags &= ~_XBF_RUN_QUEUES; + rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; + } else { + rw = (bp->b_flags & XBF_WRITE) ? WRITE : + (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; } /* Special code path for reading a sub page size buffer in -- @@ -1681,6 +1686,7 @@ xfsbufd( xfs_buf_t *bp, *n; struct list_head *dwq = &target->bt_delwrite_queue; spinlock_t *dwlk = &target->bt_delwrite_lock; + int count; current->flags |= PF_MEMALLOC; @@ -1696,6 +1702,7 @@ xfsbufd( schedule_timeout_interruptible( xfs_buf_timer_centisecs * msecs_to_jiffies(10)); + count = 0; age = xfs_buf_age_centisecs * msecs_to_jiffies(10); spin_lock(dwlk); list_for_each_entry_safe(bp, n, dwq, b_list) { @@ -1711,9 +1718,11 @@ xfsbufd( break; } - bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); + bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q| + _XBF_RUN_QUEUES); bp->b_flags |= XBF_WRITE; - list_move(&bp->b_list, &tmp); + list_move_tail(&bp->b_list, &tmp); + count++; } } spin_unlock(dwlk); @@ -1724,12 +1733,12 @@ xfsbufd( list_del_init(&bp->b_list); xfs_buf_iostrategy(bp); - - blk_run_address_space(target->bt_mapping); } if (as_list_len > 0) purge_addresses(); + if (count) + blk_run_address_space(target->bt_mapping); clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); } while (!kthread_should_stop()); @@ -1767,7 +1776,7 @@ xfs_flush_buftarg( continue; } - list_move(&bp->b_list, &tmp); + list_move_tail(&bp->b_list, &tmp); } spin_unlock(dwlk); @@ -1776,7 +1785,7 @@ xfs_flush_buftarg( */ list_for_each_entry_safe(bp, n, &tmp, b_list) { xfs_buf_lock(bp); - bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); + bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|_XBF_RUN_QUEUES); bp->b_flags |= XBF_WRITE; if (wait) bp->b_flags &= ~XBF_ASYNC; @@ -1786,6 +1795,9 @@ xfs_flush_buftarg( xfs_buf_iostrategy(bp); } + if (wait) + blk_run_address_space(target->bt_mapping); + /* * Remaining list items must be flushed before returning */ @@ -1797,9 +1809,6 @@ xfs_flush_buftarg( xfs_buf_relse(bp); } - if (wait) - blk_run_address_space(target->bt_mapping); - return pincount; } diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 7858703ed84..9dd235cb010 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -298,11 +298,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); #define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) #define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) -#define XFS_BUF_ISUNINITIAL(bp) (0) -#define XFS_BUF_UNUNINITIAL(bp) (0) - -#define XFS_BUF_BP_ISMAPPED(bp) (1) - #define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone) #define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func)) #define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL) @@ -393,8 +388,6 @@ static inline int XFS_bwrite(xfs_buf_t *bp) return error; } -#define XFS_bdwrite(bp) xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC) - static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp) { bp->b_strat = xfs_bdstrat_cb; diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index 6c162c3dde7..ed3a5e1b4b6 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -34,7 +34,7 @@ xfs_param_t xfs_params = { .restrict_chown = { 0, 1, 1 }, .sgid_inherit = { 0, 0, 1 }, .symlink_mode = { 0, 0, 1 }, - .panic_mask = { 0, 0, 127 }, + .panic_mask = { 0, 0, 255 }, .error_level = { 0, 3, 11 }, .syncd_timer = { 1*100, 30*100, 7200*100}, .stats_clear = { 0, 0, 1 }, diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 6e52a5dd38d..a74f854d91e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -653,7 +653,7 @@ xfs_attrmulti_by_handle( STATIC int xfs_ioc_space( bhv_desc_t *bdp, - bhv_vnode_t *vp, + struct inode *inode, struct file *filp, int flags, unsigned int cmd, @@ -735,7 +735,7 @@ xfs_ioctl( !capable(CAP_SYS_ADMIN)) return -EPERM; - return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg); + return xfs_ioc_space(bdp, inode, filp, ioflags, cmd, arg); case XFS_IOC_DIOINFO: { struct dioattr da; @@ -763,6 +763,8 @@ xfs_ioctl( return xfs_ioc_fsgeometry(mp, arg); case XFS_IOC_GETVERSION: + return put_user(inode->i_generation, (int __user *)arg); + case XFS_IOC_GETXFLAGS: case XFS_IOC_SETXFLAGS: case XFS_IOC_FSGETXATTR: @@ -957,7 +959,7 @@ xfs_ioctl( STATIC int xfs_ioc_space( bhv_desc_t *bdp, - bhv_vnode_t *vp, + struct inode *inode, struct file *filp, int ioflags, unsigned int cmd, @@ -967,13 +969,13 @@ xfs_ioc_space( int attr_flags = 0; int error; - if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) + if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) return -XFS_ERROR(EPERM); if (!(filp->f_mode & FMODE_WRITE)) return -XFS_ERROR(EBADF); - if (!VN_ISREG(vp)) + if (!S_ISREG(inode->i_mode)) return -XFS_ERROR(EINVAL); if (copy_from_user(&bf, arg, sizeof(bf))) @@ -1264,13 +1266,6 @@ xfs_ioc_xattr( break; } - case XFS_IOC_GETVERSION: { - flags = vn_to_inode(vp)->i_generation; - if (copy_to_user(arg, &flags, sizeof(flags))) - error = -EFAULT; - break; - } - default: error = -ENOTTY; break; diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 22e3b714f62..3ba814ae3bb 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -623,12 +623,27 @@ xfs_vn_getattr( { struct inode *inode = dentry->d_inode; bhv_vnode_t *vp = vn_from_inode(inode); - int error = 0; + bhv_vattr_t vattr = { .va_mask = XFS_AT_STAT }; + int error; - if (unlikely(vp->v_flag & VMODIFIED)) - error = vn_revalidate(vp); - if (!error) - generic_fillattr(inode, stat); + error = bhv_vop_getattr(vp, &vattr, ATTR_LAZY, NULL); + if (likely(!error)) { + stat->size = i_size_read(inode); + stat->dev = inode->i_sb->s_dev; + stat->rdev = (vattr.va_rdev == 0) ? 0 : + MKDEV(sysv_major(vattr.va_rdev) & 0x1ff, + sysv_minor(vattr.va_rdev)); + stat->mode = vattr.va_mode; + stat->nlink = vattr.va_nlink; + stat->uid = vattr.va_uid; + stat->gid = vattr.va_gid; + stat->ino = vattr.va_nodeid; + stat->atime = vattr.va_atime; + stat->mtime = vattr.va_mtime; + stat->ctime = vattr.va_ctime; + stat->blocks = vattr.va_nblocks; + stat->blksize = vattr.va_blocksize; + } return -error; } diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index a13f75c1a93..2b0e0018738 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -148,11 +148,7 @@ BUFFER_FNS(PrivateStart, unwritten); (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) #define NBPP PAGE_SIZE -#define DPPSHFT (PAGE_SHIFT - 9) #define NDPP (1 << (PAGE_SHIFT - 9)) -#define dtop(DD) (((DD) + NDPP - 1) >> DPPSHFT) -#define dtopt(DD) ((DD) >> DPPSHFT) -#define dpoff(DD) ((DD) & (NDPP-1)) #define NBBY 8 /* number of bits per byte */ #define NBPC PAGE_SIZE /* Number of bytes per click */ @@ -172,8 +168,6 @@ BUFFER_FNS(PrivateStart, unwritten); #define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) #define btoc64(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) #define btoct64(x) ((__uint64_t)(x)>>BPCSHIFT) -#define io_btoc(x) (((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT) -#define io_btoct(x) ((__psunsigned_t)(x)>>IO_BPCSHIFT) /* off_t bytes to clicks */ #define offtoc(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) @@ -186,7 +180,6 @@ BUFFER_FNS(PrivateStart, unwritten); #define ctob(x) ((__psunsigned_t)(x)<<BPCSHIFT) #define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) #define ctob64(x) ((__uint64_t)(x)<<BPCSHIFT) -#define io_ctob(x) ((__psunsigned_t)(x)<<IO_BPCSHIFT) /* bytes to clicks */ #define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) @@ -339,4 +332,11 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) return(x * y); } +static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) +{ + x += y - 1; + do_div(x, y); + return x; +} + #endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index ee788b1cb36..55992b40353 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -270,12 +270,12 @@ xfs_read( } } - if (unlikely((ioflags & IO_ISDIRECT) && VN_CACHED(vp))) - bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), - -1, FI_REMAPF_LOCKED); - - if (unlikely(ioflags & IO_ISDIRECT)) + if (unlikely(ioflags & IO_ISDIRECT)) { + if (VN_CACHED(vp)) + bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), + -1, FI_REMAPF_LOCKED); mutex_unlock(&inode->i_mutex); + } xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, (void *)iovp, segs, *offset, ioflags); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 9df9ed37d21..38c4d128a8c 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -227,7 +227,9 @@ xfs_initialize_vnode( xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); xfs_set_inodeops(inode); + spin_lock(&ip->i_flags_lock); ip->i_flags &= ~XFS_INEW; + spin_unlock(&ip->i_flags_lock); barrier(); unlock_new_inode(inode); diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 91fc2c4b335..da255bdf526 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h @@ -79,7 +79,7 @@ typedef enum { #define VFS_RDONLY 0x0001 /* read-only vfs */ #define VFS_GRPID 0x0002 /* group-ID assigned from directory */ #define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ -#define VFS_UMOUNT 0x0008 /* unmount in progress */ +/* ---- VFS_UMOUNT ---- 0x0008 -- unneeded, fixed via kthread APIs */ #define VFS_32BITINODES 0x0010 /* do not use inums above 32 bits */ #define VFS_END 0x0010 /* max flag */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index c42b3221b20..515f5fdea57 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -85,8 +85,6 @@ typedef enum { #define VN_BHV_HEAD(vp) ((bhv_head_t *)(&((vp)->v_bh))) #define vn_bhv_head_init(bhp,name) bhv_head_init(bhp,name) #define vn_bhv_remove(bhp,bdp) bhv_remove(bhp,bdp) -#define vn_bhv_lookup(bhp,ops) bhv_lookup(bhp,ops) -#define vn_bhv_lookup_unlocked(bhp,ops) bhv_lookup_unlocked(bhp,ops) /* * Vnode to Linux inode mapping. diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 5b2dcc58b24..33ad5af386e 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -382,18 +382,6 @@ xfs_qm_dquot_logitem_unlock( /* - * The transaction with the dquot locked has aborted. The dquot - * must not be dirty within the transaction. We simply unlock just - * as if the transaction had been cancelled. - */ -STATIC void -xfs_qm_dquot_logitem_abort( - xfs_dq_logitem_t *ql) -{ - xfs_qm_dquot_logitem_unlock(ql); -} - -/* * this needs to stamp an lsn into the dquot, I think. * rpc's that look at user dquot's would then have to * push on the dependency recorded in the dquot @@ -426,7 +414,6 @@ STATIC struct xfs_item_ops xfs_dquot_item_ops = { .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_qm_dquot_logitem_committed, .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push, - .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_abort, .iop_pushbuf = (void(*)(xfs_log_item_t*)) xfs_qm_dquot_logitem_pushbuf, .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) @@ -559,17 +546,6 @@ xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn) } /* - * The transaction of which this QUOTAOFF is a part has been aborted. - * Just clean up after ourselves. - * Shouldn't this never happen in the case of qoffend logitems? XXX - */ -STATIC void -xfs_qm_qoff_logitem_abort(xfs_qoff_logitem_t *qf) -{ - kmem_free(qf, sizeof(xfs_qoff_logitem_t)); -} - -/* * There isn't much you can do to push on an quotaoff item. It is simply * stuck waiting for the log to be flushed to disk. */ @@ -644,7 +620,6 @@ STATIC struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_qm_qoffend_logitem_committed, .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push, - .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort, .iop_pushbuf = NULL, .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_qm_qoffend_logitem_committing @@ -667,7 +642,6 @@ STATIC struct xfs_item_ops xfs_qm_qoff_logitem_ops = { .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_qm_qoff_logitem_committed, .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push, - .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort, .iop_pushbuf = NULL, .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_qm_qoff_logitem_committing diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index e23e45535c4..7c6a3a50379 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -112,17 +112,17 @@ xfs_Gqm_init(void) { xfs_dqhash_t *udqhash, *gdqhash; xfs_qm_t *xqm; - uint i, hsize, flags = KM_SLEEP | KM_MAYFAIL; + size_t hsize; + uint i; /* * Initialize the dquot hash tables. */ - hsize = XFS_QM_HASHSIZE_HIGH; - while (!(udqhash = kmem_zalloc(hsize * sizeof(xfs_dqhash_t), flags))) { - if ((hsize >>= 1) <= XFS_QM_HASHSIZE_LOW) - flags = KM_SLEEP; - } - gdqhash = kmem_zalloc(hsize * sizeof(xfs_dqhash_t), KM_SLEEP); + udqhash = kmem_zalloc_greedy(&hsize, + XFS_QM_HASHSIZE_LOW, XFS_QM_HASHSIZE_HIGH, + KM_SLEEP | KM_MAYFAIL | KM_LARGE); + gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE); + hsize /= sizeof(xfs_dqhash_t); ndquot = hsize << 8; xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index 4568deb6da8..689407de0a2 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -56,12 +56,6 @@ extern kmem_zone_t *qm_dqtrxzone; #define XFS_QM_HASHSIZE_HIGH ((NBPP * 4) / sizeof(xfs_dqhash_t)) /* - * We output a cmn_err when quotachecking a quota file with more than - * this many fsbs. - */ -#define XFS_QM_BIG_QCHECK_NBLKS 500 - -/* * This defines the unit of allocation of dquots. * Currently, it is just one file system block, and a 4K blk contains 30 * (136 * 30 = 4080) dquots. It's probably not worth trying to make diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h index b7ddd04aae3..a8b85e2be9d 100644 --- a/fs/xfs/quota/xfs_quota_priv.h +++ b/fs/xfs/quota/xfs_quota_priv.h @@ -75,7 +75,6 @@ static inline int XQMISLCKD(struct xfs_dqhash *h) #define xfs_qm_freelist_lock(qm) XQMLCK(&((qm)->qm_dqfreelist)) #define xfs_qm_freelist_unlock(qm) XQMUNLCK(&((qm)->qm_dqfreelist)) -#define XFS_QM_IS_FREELIST_LOCKED(qm) XQMISLCKD(&((qm)->qm_dqfreelist)) /* * Hash into a bucket in the dquot hash table, based on <mp, id>. @@ -170,6 +169,5 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \ #define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) -#define DQFLAGTO_DIRTYSTR(d) (XFS_DQ_IS_DIRTY(d) ? "DIRTY" : "NOTDIRTY") #endif /* __XFS_QUOTA_PRIV_H__ */ diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c index addf5a7ea06..5cf2e86caa7 100644 --- a/fs/xfs/support/ktrace.c +++ b/fs/xfs/support/ktrace.c @@ -75,7 +75,7 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep) sleep); } else { ktep = (ktrace_entry_t*)kmem_zalloc((nentries * sizeof(*ktep)), - sleep); + sleep | KM_LARGE); } if (ktep == NULL) { diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index dc2361dd740..9ece7f87ec5 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -150,7 +150,7 @@ typedef struct xfs_agi { #define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)XFS_BUF_PTR(bp)) typedef struct xfs_agfl { - xfs_agblock_t agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */ + __be32 agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */ } xfs_agfl_t; /* diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index d2bbcd882a6..e80dda3437d 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -1477,8 +1477,10 @@ xfs_alloc_ag_vextent_small( /* * Can't allocate from the freelist for some reason. */ - else + else { + fbno = NULLAGBLOCK; flen = 0; + } /* * Can't do the allocation, give up. */ @@ -2021,7 +2023,7 @@ xfs_alloc_get_freelist( /* * Get the block number and update the data structures. */ - bno = INT_GET(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)], ARCH_CONVERT); + bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]); be32_add(&agf->agf_flfirst, 1); xfs_trans_brelse(tp, agflbp); if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) @@ -2108,7 +2110,7 @@ xfs_alloc_put_freelist( { xfs_agf_t *agf; /* a.g. freespace structure */ xfs_agfl_t *agfl; /* a.g. free block array */ - xfs_agblock_t *blockp;/* pointer to array entry */ + __be32 *blockp;/* pointer to array entry */ int error; #ifdef XFS_ALLOC_TRACE static char fname[] = "xfs_alloc_put_freelist"; @@ -2132,7 +2134,7 @@ xfs_alloc_put_freelist( pag->pagf_flcount++; ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; - INT_SET(*blockp, ARCH_CONVERT, bno); + *blockp = cpu_to_be32(bno); TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); xfs_trans_log_buf(tp, agflbp, diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 7446556e802..74cadf95d4e 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -92,6 +92,7 @@ xfs_alloc_delrec( xfs_alloc_key_t *rkp; /* right block key pointer */ xfs_alloc_ptr_t *rpp; /* right block address pointer */ int rrecs=0; /* number of records in right block */ + int numrecs; xfs_alloc_rec_t *rrp; /* right block record pointer */ xfs_btree_cur_t *tcur; /* temporary btree cursor */ @@ -115,7 +116,8 @@ xfs_alloc_delrec( /* * Fail if we're off the end of the block. */ - if (ptr > be16_to_cpu(block->bb_numrecs)) { + numrecs = be16_to_cpu(block->bb_numrecs); + if (ptr > numrecs) { *stat = 0; return 0; } @@ -129,18 +131,18 @@ xfs_alloc_delrec( lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur); lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur); #ifdef DEBUG - for (i = ptr; i < be16_to_cpu(block->bb_numrecs); i++) { + for (i = ptr; i < numrecs; i++) { if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level))) return error; } #endif - if (ptr < be16_to_cpu(block->bb_numrecs)) { + if (ptr < numrecs) { memmove(&lkp[ptr - 1], &lkp[ptr], - (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lkp)); + (numrecs - ptr) * sizeof(*lkp)); memmove(&lpp[ptr - 1], &lpp[ptr], - (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lpp)); - xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1); - xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1); + (numrecs - ptr) * sizeof(*lpp)); + xfs_alloc_log_ptrs(cur, bp, ptr, numrecs - 1); + xfs_alloc_log_keys(cur, bp, ptr, numrecs - 1); } } /* @@ -149,10 +151,10 @@ xfs_alloc_delrec( */ else { lrp = XFS_ALLOC_REC_ADDR(block, 1, cur); - if (ptr < be16_to_cpu(block->bb_numrecs)) { + if (ptr < numrecs) { memmove(&lrp[ptr - 1], &lrp[ptr], - (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lrp)); - xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1); + (numrecs - ptr) * sizeof(*lrp)); + xfs_alloc_log_recs(cur, bp, ptr, numrecs - 1); } /* * If it's the first record in the block, we'll need a key @@ -167,7 +169,8 @@ xfs_alloc_delrec( /* * Decrement and log the number of entries in the block. */ - be16_add(&block->bb_numrecs, -1); + numrecs--; + block->bb_numrecs = cpu_to_be16(numrecs); xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); /* * See if the longest free extent in the allocation group was @@ -181,14 +184,14 @@ xfs_alloc_delrec( if (level == 0 && cur->bc_btnum == XFS_BTNUM_CNT && be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK && - ptr > be16_to_cpu(block->bb_numrecs)) { - ASSERT(ptr == be16_to_cpu(block->bb_numrecs) + 1); + ptr > numrecs) { + ASSERT(ptr == numrecs + 1); /* * There are still records in the block. Grab the size * from the last one. */ - if (be16_to_cpu(block->bb_numrecs)) { - rrp = XFS_ALLOC_REC_ADDR(block, be16_to_cpu(block->bb_numrecs), cur); + if (numrecs) { + rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur); agf->agf_longest = rrp->ar_blockcount; } /* @@ -211,7 +214,7 @@ xfs_alloc_delrec( * and it's NOT the leaf level, * then we can get rid of this level. */ - if (be16_to_cpu(block->bb_numrecs) == 1 && level > 0) { + if (numrecs == 1 && level > 0) { /* * lpp is still set to the first pointer in the block. * Make it the new root of the btree. @@ -267,7 +270,7 @@ xfs_alloc_delrec( * If the number of records remaining in the block is at least * the minimum, we're done. */ - if (be16_to_cpu(block->bb_numrecs) >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) { + if (numrecs >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) { if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i))) return error; *stat = 1; @@ -419,19 +422,21 @@ xfs_alloc_delrec( * See if we can join with the left neighbor block. */ if (lbno != NULLAGBLOCK && - lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { + lrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { /* * Set "right" to be the starting block, * "left" to be the left neighbor. */ rbno = bno; right = block; + rrecs = be16_to_cpu(right->bb_numrecs); rbp = bp; if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, lbno, 0, &lbp, XFS_ALLOC_BTREE_REF))) return error; left = XFS_BUF_TO_ALLOC_BLOCK(lbp); + lrecs = be16_to_cpu(left->bb_numrecs); if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) return error; } @@ -439,20 +444,21 @@ xfs_alloc_delrec( * If that won't work, see if we can join with the right neighbor block. */ else if (rbno != NULLAGBLOCK && - rrecs + be16_to_cpu(block->bb_numrecs) <= - XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { + rrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { /* * Set "left" to be the starting block, * "right" to be the right neighbor. */ lbno = bno; left = block; + lrecs = be16_to_cpu(left->bb_numrecs); lbp = bp; if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, rbno, 0, &rbp, XFS_ALLOC_BTREE_REF))) return error; right = XFS_BUF_TO_ALLOC_BLOCK(rbp); + rrecs = be16_to_cpu(right->bb_numrecs); if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) return error; } @@ -474,34 +480,28 @@ xfs_alloc_delrec( /* * It's a non-leaf. Move keys and pointers. */ - lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur); - lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur); + lkp = XFS_ALLOC_KEY_ADDR(left, lrecs + 1, cur); + lpp = XFS_ALLOC_PTR_ADDR(left, lrecs + 1, cur); rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); #ifdef DEBUG - for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { + for (i = 0; i < rrecs; i++) { if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level))) return error; } #endif - memcpy(lkp, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*lkp)); - memcpy(lpp, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*lpp)); - xfs_alloc_log_keys(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1, - be16_to_cpu(left->bb_numrecs) + - be16_to_cpu(right->bb_numrecs)); - xfs_alloc_log_ptrs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1, - be16_to_cpu(left->bb_numrecs) + - be16_to_cpu(right->bb_numrecs)); + memcpy(lkp, rkp, rrecs * sizeof(*lkp)); + memcpy(lpp, rpp, rrecs * sizeof(*lpp)); + xfs_alloc_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); + xfs_alloc_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); } else { /* * It's a leaf. Move records. */ - lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur); + lrp = XFS_ALLOC_REC_ADDR(left, lrecs + 1, cur); rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); - memcpy(lrp, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*lrp)); - xfs_alloc_log_recs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1, - be16_to_cpu(left->bb_numrecs) + - be16_to_cpu(right->bb_numrecs)); + memcpy(lrp, rrp, rrecs * sizeof(*lrp)); + xfs_alloc_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); } /* * If we joined with the left neighbor, set the buffer in the @@ -509,7 +509,7 @@ xfs_alloc_delrec( */ if (bp != lbp) { xfs_btree_setbuf(cur, level, lbp); - cur->bc_ptrs[level] += be16_to_cpu(left->bb_numrecs); + cur->bc_ptrs[level] += lrecs; } /* * If we joined with the right neighbor and there's a level above @@ -521,7 +521,8 @@ xfs_alloc_delrec( /* * Fix up the number of records in the surviving block. */ - be16_add(&left->bb_numrecs, be16_to_cpu(right->bb_numrecs)); + lrecs += rrecs; + left->bb_numrecs = cpu_to_be16(lrecs); /* * Fix up the right block pointer in the surviving block, and log it. */ @@ -608,6 +609,7 @@ xfs_alloc_insrec( xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */ xfs_alloc_key_t nkey; /* new key value, from split */ xfs_alloc_rec_t nrec; /* new record value, for caller */ + int numrecs; int optr; /* old ptr value */ xfs_alloc_ptr_t *pp; /* pointer to btree addresses */ int ptr; /* index in btree block for this rec */ @@ -653,13 +655,14 @@ xfs_alloc_insrec( */ bp = cur->bc_bufs[level]; block = XFS_BUF_TO_ALLOC_BLOCK(bp); + numrecs = be16_to_cpu(block->bb_numrecs); #ifdef DEBUG if ((error = xfs_btree_check_sblock(cur, block, level, bp))) return error; /* * Check that the new entry is being inserted in the right place. */ - if (ptr <= be16_to_cpu(block->bb_numrecs)) { + if (ptr <= numrecs) { if (level == 0) { rp = XFS_ALLOC_REC_ADDR(block, ptr, cur); xfs_btree_check_rec(cur->bc_btnum, recp, rp); @@ -670,12 +673,12 @@ xfs_alloc_insrec( } #endif nbno = NULLAGBLOCK; - ncur = (xfs_btree_cur_t *)0; + ncur = NULL; /* * If the block is full, we can't insert the new entry until we * make the block un-full. */ - if (be16_to_cpu(block->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { + if (numrecs == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { /* * First, try shifting an entry to the right neighbor. */ @@ -729,6 +732,7 @@ xfs_alloc_insrec( * At this point we know there's room for our new entry in the block * we're pointing at. */ + numrecs = be16_to_cpu(block->bb_numrecs); if (level > 0) { /* * It's a non-leaf entry. Make a hole for the new data @@ -737,15 +741,15 @@ xfs_alloc_insrec( kp = XFS_ALLOC_KEY_ADDR(block, 1, cur); pp = XFS_ALLOC_PTR_ADDR(block, 1, cur); #ifdef DEBUG - for (i = be16_to_cpu(block->bb_numrecs); i >= ptr; i--) { + for (i = numrecs; i >= ptr; i--) { if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level))) return error; } #endif memmove(&kp[ptr], &kp[ptr - 1], - (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*kp)); + (numrecs - ptr + 1) * sizeof(*kp)); memmove(&pp[ptr], &pp[ptr - 1], - (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*pp)); + (numrecs - ptr + 1) * sizeof(*pp)); #ifdef DEBUG if ((error = xfs_btree_check_sptr(cur, *bnop, level))) return error; @@ -755,11 +759,12 @@ xfs_alloc_insrec( */ kp[ptr - 1] = key; pp[ptr - 1] = cpu_to_be32(*bnop); - be16_add(&block->bb_numrecs, 1); - xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs)); - xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs)); + numrecs++; + block->bb_numrecs = cpu_to_be16(numrecs); + xfs_alloc_log_keys(cur, bp, ptr, numrecs); + xfs_alloc_log_ptrs(cur, bp, ptr, numrecs); #ifdef DEBUG - if (ptr < be16_to_cpu(block->bb_numrecs)) + if (ptr < numrecs) xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1, kp + ptr); #endif @@ -769,16 +774,17 @@ xfs_alloc_insrec( */ rp = XFS_ALLOC_REC_ADDR(block, 1, cur); memmove(&rp[ptr], &rp[ptr - 1], - (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*rp)); + (numrecs - ptr + 1) * sizeof(*rp)); /* * Now stuff the new record in, bump numrecs * and log the new data. */ - rp[ptr - 1] = *recp; /* INT_: struct copy */ - be16_add(&block->bb_numrecs, 1); - xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs)); + rp[ptr - 1] = *recp; + numrecs++; + block->bb_numrecs = cpu_to_be16(numrecs); + xfs_alloc_log_recs(cur, bp, ptr, numrecs); #ifdef DEBUG - if (ptr < be16_to_cpu(block->bb_numrecs)) + if (ptr < numrecs) xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1, rp + ptr); #endif @@ -819,8 +825,8 @@ xfs_alloc_insrec( */ *bnop = nbno; if (nbno != NULLAGBLOCK) { - *recp = nrec; /* INT_: struct copy */ - *curp = ncur; /* INT_: struct copy */ + *recp = nrec; + *curp = ncur; } *stat = 1; return 0; @@ -981,7 +987,7 @@ xfs_alloc_lookup( */ bp = cur->bc_bufs[level]; if (bp && XFS_BUF_ADDR(bp) != d) - bp = (xfs_buf_t *)0; + bp = NULL; if (!bp) { /* * Need to get a new buffer. Read it, then @@ -1229,7 +1235,7 @@ xfs_alloc_lshift( if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level))) return error; #endif - *lpp = *rpp; /* INT_: copy */ + *lpp = *rpp; xfs_alloc_log_ptrs(cur, lbp, nrec, nrec); xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp); } @@ -1406,8 +1412,8 @@ xfs_alloc_newroot( kp = XFS_ALLOC_KEY_ADDR(new, 1, cur); if (be16_to_cpu(left->bb_level) > 0) { - kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); /* INT_: structure copy */ - kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);/* INT_: structure copy */ + kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); + kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur); } else { xfs_alloc_rec_t *rp; /* btree record pointer */ @@ -1527,8 +1533,8 @@ xfs_alloc_rshift( if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level))) return error; #endif - *rkp = *lkp; /* INT_: copy */ - *rpp = *lpp; /* INT_: copy */ + *rkp = *lkp; + *rpp = *lpp; xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1); @@ -2044,7 +2050,7 @@ xfs_alloc_insert( nbno = NULLAGBLOCK; nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); - ncur = (xfs_btree_cur_t *)0; + ncur = NULL; pcur = cur; /* * Loop going up the tree, starting at the leaf level. @@ -2076,7 +2082,7 @@ xfs_alloc_insert( */ if (ncur) { pcur = ncur; - ncur = (xfs_btree_cur_t *)0; + ncur = NULL; } } while (nbno != NULLAGBLOCK); *stat = i; diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 1a210104327..9ada7bdbae5 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -91,7 +91,6 @@ STATIC int xfs_attr_refillstate(xfs_da_state_t *state); /* * Routines to manipulate out-of-line attribute values. */ -STATIC int xfs_attr_rmtval_get(xfs_da_args_t *args); STATIC int xfs_attr_rmtval_set(xfs_da_args_t *args); STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args); @@ -180,7 +179,7 @@ xfs_attr_get(bhv_desc_t *bdp, const char *name, char *value, int *valuelenp, return(error); } -STATIC int +int xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen, char *value, int valuelen, int flags) { @@ -440,7 +439,7 @@ xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int f * Generic handler routine to remove a name from an attribute list. * Transitions attribute list from Btree to shortform as necessary. */ -STATIC int +int xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags) { xfs_da_args_t args; @@ -591,6 +590,110 @@ xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred) return xfs_attr_remove_int(dp, name, namelen, flags); } +int /* error */ +xfs_attr_list_int(xfs_attr_list_context_t *context) +{ + int error; + xfs_inode_t *dp = context->dp; + + /* + * Decide on what work routines to call based on the inode size. + */ + if (XFS_IFORK_Q(dp) == 0 || + (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && + dp->i_d.di_anextents == 0)) { + error = 0; + } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { + error = xfs_attr_shortform_list(context); + } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { + error = xfs_attr_leaf_list(context); + } else { + error = xfs_attr_node_list(context); + } + return error; +} + +#define ATTR_ENTBASESIZE /* minimum bytes used by an attr */ \ + (((struct attrlist_ent *) 0)->a_name - (char *) 0) +#define ATTR_ENTSIZE(namelen) /* actual bytes used by an attr */ \ + ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \ + & ~(sizeof(u_int32_t)-1)) + +/* + * Format an attribute and copy it out to the user's buffer. + * Take care to check values and protect against them changing later, + * we may be reading them directly out of a user buffer. + */ +/*ARGSUSED*/ +STATIC int +xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp, + char *name, int namelen, + int valuelen, char *value) +{ + attrlist_ent_t *aep; + int arraytop; + + ASSERT(!(context->flags & ATTR_KERNOVAL)); + ASSERT(context->count >= 0); + ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); + ASSERT(context->firstu >= sizeof(*context->alist)); + ASSERT(context->firstu <= context->bufsize); + + arraytop = sizeof(*context->alist) + + context->count * sizeof(context->alist->al_offset[0]); + context->firstu -= ATTR_ENTSIZE(namelen); + if (context->firstu < arraytop) { + xfs_attr_trace_l_c("buffer full", context); + context->alist->al_more = 1; + context->seen_enough = 1; + return 1; + } + + aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]); + aep->a_valuelen = valuelen; + memcpy(aep->a_name, name, namelen); + aep->a_name[ namelen ] = 0; + context->alist->al_offset[ context->count++ ] = context->firstu; + context->alist->al_count = context->count; + xfs_attr_trace_l_c("add", context); + return 0; +} + +STATIC int +xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp, + char *name, int namelen, + int valuelen, char *value) +{ + char *offset; + int arraytop; + + ASSERT(context->count >= 0); + + arraytop = context->count + namesp->attr_namelen + namelen + 1; + if (arraytop > context->firstu) { + context->count = -1; /* insufficient space */ + return 1; + } + offset = (char *)context->alist + context->count; + strncpy(offset, namesp->attr_name, namesp->attr_namelen); + offset += namesp->attr_namelen; + strncpy(offset, name, namelen); /* real name */ + offset += namelen; + *offset = '\0'; + context->count += namesp->attr_namelen + namelen + 1; + return 0; +} + +/*ARGSUSED*/ +STATIC int +xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp, + char *name, int namelen, + int valuelen, char *value) +{ + context->count += namesp->attr_namelen + namelen + 1; + return 0; +} + /* * Generate a list of extended attribute names and optionally * also value lengths. Positive return value follows the XFS @@ -615,13 +718,13 @@ xfs_attr_list(bhv_desc_t *bdp, char *buffer, int bufsize, int flags, return(XFS_ERROR(EINVAL)); if ((cursor->initted == 0) && (cursor->hashval || cursor->blkno || cursor->offset)) - return(XFS_ERROR(EINVAL)); + return XFS_ERROR(EINVAL); /* * Check for a properly aligned buffer. */ if (((long)buffer) & (sizeof(int)-1)) - return(XFS_ERROR(EFAULT)); + return XFS_ERROR(EFAULT); if (flags & ATTR_KERNOVAL) bufsize = 0; @@ -634,53 +737,47 @@ xfs_attr_list(bhv_desc_t *bdp, char *buffer, int bufsize, int flags, context.dupcnt = 0; context.resynch = 1; context.flags = flags; - if (!(flags & ATTR_KERNAMELS)) { + context.seen_enough = 0; + context.alist = (attrlist_t *)buffer; + context.put_value = 0; + + if (flags & ATTR_KERNAMELS) { + context.bufsize = bufsize; + context.firstu = context.bufsize; + if (flags & ATTR_KERNOVAL) + context.put_listent = xfs_attr_kern_list_sizes; + else + context.put_listent = xfs_attr_kern_list; + } else { context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */ context.firstu = context.bufsize; - context.alist = (attrlist_t *)buffer; context.alist->al_count = 0; context.alist->al_more = 0; context.alist->al_offset[0] = context.bufsize; - } - else { - context.bufsize = bufsize; - context.firstu = context.bufsize; - context.alist = (attrlist_t *)buffer; + context.put_listent = xfs_attr_put_listent; } if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return (EIO); + return EIO; xfs_ilock(dp, XFS_ILOCK_SHARED); - /* - * Decide on what work routines to call based on the inode size. - */ xfs_attr_trace_l_c("syscall start", &context); - if (XFS_IFORK_Q(dp) == 0 || - (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && - dp->i_d.di_anextents == 0)) { - error = 0; - } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { - error = xfs_attr_shortform_list(&context); - } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { - error = xfs_attr_leaf_list(&context); - } else { - error = xfs_attr_node_list(&context); - } + + error = xfs_attr_list_int(&context); + xfs_iunlock(dp, XFS_ILOCK_SHARED); xfs_attr_trace_l_c("syscall end", &context); - if (!(context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS))) { - ASSERT(error >= 0); - } - else { /* must return negated buffer size or the error */ + if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) { + /* must return negated buffer size or the error */ if (context.count < 0) error = XFS_ERROR(ERANGE); else error = -context.count; - } + } else + ASSERT(error >= 0); - return(error); + return error; } int /* error */ @@ -1122,19 +1219,19 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context) context->cursor->blkno = 0; error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK); if (error) - return(error); + return XFS_ERROR(error); ASSERT(bp != NULL); leaf = bp->data; if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) { XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW, context->dp->i_mount, leaf); xfs_da_brelse(NULL, bp); - return(XFS_ERROR(EFSCORRUPTED)); + return XFS_ERROR(EFSCORRUPTED); } - (void)xfs_attr_leaf_list_int(bp, context); + error = xfs_attr_leaf_list_int(bp, context); xfs_da_brelse(NULL, bp); - return(0); + return XFS_ERROR(error); } @@ -1858,8 +1955,12 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) return(XFS_ERROR(EFSCORRUPTED)); } error = xfs_attr_leaf_list_int(bp, context); - if (error || !leaf->hdr.info.forw) - break; /* not really an error, buffer full or EOF */ + if (error) { + xfs_da_brelse(NULL, bp); + return error; + } + if (context->seen_enough || leaf->hdr.info.forw == 0) + break; cursor->blkno = be32_to_cpu(leaf->hdr.info.forw); xfs_da_brelse(NULL, bp); error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1, @@ -1886,7 +1987,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) * Read the value associated with an attribute from the out-of-line buffer * that we stored it in. */ -STATIC int +int xfs_attr_rmtval_get(xfs_da_args_t *args) { xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE]; diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index 981633f6c07..783977d3ea7 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -37,6 +37,7 @@ struct cred; struct bhv_vnode; +struct xfs_attr_list_context; typedef int (*attrset_t)(struct bhv_vnode *, char *, void *, size_t, int); typedef int (*attrget_t)(struct bhv_vnode *, char *, void *, size_t, int); @@ -160,13 +161,16 @@ struct xfs_da_args; */ int xfs_attr_get(bhv_desc_t *, const char *, char *, int *, int, struct cred *); int xfs_attr_set(bhv_desc_t *, const char *, char *, int, int, struct cred *); +int xfs_attr_set_int(struct xfs_inode *, const char *, int, char *, int, int); int xfs_attr_remove(bhv_desc_t *, const char *, int, struct cred *); -int xfs_attr_list(bhv_desc_t *, char *, int, int, - struct attrlist_cursor_kern *, struct cred *); +int xfs_attr_remove_int(struct xfs_inode *, const char *, int, int); +int xfs_attr_list(bhv_desc_t *, char *, int, int, struct attrlist_cursor_kern *, struct cred *); +int xfs_attr_list_int(struct xfs_attr_list_context *); int xfs_attr_inactive(struct xfs_inode *dp); int xfs_attr_shortform_getvalue(struct xfs_da_args *); int xfs_attr_fetch(struct xfs_inode *, const char *, int, char *, int *, int, struct cred *); +int xfs_attr_rmtval_get(struct xfs_da_args *args); #endif /* __XFS_ATTR_H__ */ diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 9455051f012..9719bbef122 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -89,9 +89,46 @@ STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf, int dst_start, int move_count, xfs_mount_t *mp); STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); -STATIC int xfs_attr_put_listent(xfs_attr_list_context_t *context, - attrnames_t *, char *name, int namelen, - int valuelen); + +/*======================================================================== + * Namespace helper routines + *========================================================================*/ + +STATIC inline attrnames_t * +xfs_attr_flags_namesp(int flags) +{ + return ((flags & XFS_ATTR_SECURE) ? &attr_secure: + ((flags & XFS_ATTR_ROOT) ? &attr_trusted : &attr_user)); +} + +/* + * If namespace bits don't match return 0. + * If all match then return 1. + */ +STATIC inline int +xfs_attr_namesp_match(int arg_flags, int ondisk_flags) +{ + return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); +} + +/* + * If namespace bits don't match and we don't have an override for it + * then return 0. + * If all match or are overridable then return 1. + */ +STATIC inline int +xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags) +{ + if (((arg_flags & ATTR_SECURE) == 0) != + ((ondisk_flags & XFS_ATTR_SECURE) == 0) && + !(arg_flags & ATTR_KERNORMALS)) + return 0; + if (((arg_flags & ATTR_ROOT) == 0) != + ((ondisk_flags & XFS_ATTR_ROOT) == 0) && + !(arg_flags & ATTR_KERNROOTLS)) + return 0; + return 1; +} /*======================================================================== @@ -228,11 +265,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) continue; if (memcmp(args->name, sfe->nameval, args->namelen) != 0) continue; - if (((args->flags & ATTR_SECURE) != 0) != - ((sfe->flags & XFS_ATTR_SECURE) != 0)) - continue; - if (((args->flags & ATTR_ROOT) != 0) != - ((sfe->flags & XFS_ATTR_ROOT) != 0)) + if (!xfs_attr_namesp_match(args->flags, sfe->flags)) continue; ASSERT(0); #endif @@ -246,8 +279,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) sfe->namelen = args->namelen; sfe->valuelen = args->valuelen; - sfe->flags = (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE : - ((args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0); + sfe->flags = XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); memcpy(sfe->nameval, args->name, args->namelen); memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen); sf->hdr.count++; @@ -282,11 +314,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) continue; if (memcmp(sfe->nameval, args->name, args->namelen) != 0) continue; - if (((args->flags & ATTR_SECURE) != 0) != - ((sfe->flags & XFS_ATTR_SECURE) != 0)) - continue; - if (((args->flags & ATTR_ROOT) != 0) != - ((sfe->flags & XFS_ATTR_ROOT) != 0)) + if (!xfs_attr_namesp_match(args->flags, sfe->flags)) continue; break; } @@ -363,11 +391,7 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) continue; if (memcmp(args->name, sfe->nameval, args->namelen) != 0) continue; - if (((args->flags & ATTR_SECURE) != 0) != - ((sfe->flags & XFS_ATTR_SECURE) != 0)) - continue; - if (((args->flags & ATTR_ROOT) != 0) != - ((sfe->flags & XFS_ATTR_ROOT) != 0)) + if (!xfs_attr_namesp_match(args->flags, sfe->flags)) continue; return(XFS_ERROR(EEXIST)); } @@ -394,11 +418,7 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args) continue; if (memcmp(args->name, sfe->nameval, args->namelen) != 0) continue; - if (((args->flags & ATTR_SECURE) != 0) != - ((sfe->flags & XFS_ATTR_SECURE) != 0)) - continue; - if (((args->flags & ATTR_ROOT) != 0) != - ((sfe->flags & XFS_ATTR_ROOT) != 0)) + if (!xfs_attr_namesp_match(args->flags, sfe->flags)) continue; if (args->flags & ATTR_KERNOVAL) { args->valuelen = sfe->valuelen; @@ -485,8 +505,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) nargs.valuelen = sfe->valuelen; nargs.hashval = xfs_da_hashname((char *)sfe->nameval, sfe->namelen); - nargs.flags = (sfe->flags & XFS_ATTR_SECURE) ? ATTR_SECURE : - ((sfe->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0); + nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */ ASSERT(error == ENOATTR); error = xfs_attr_leaf_add(bp, &nargs); @@ -520,6 +539,10 @@ xfs_attr_shortform_compare(const void *a, const void *b) } } + +#define XFS_ISRESET_CURSOR(cursor) \ + (!((cursor)->initted) && !((cursor)->hashval) && \ + !((cursor)->blkno) && !((cursor)->offset)) /* * Copy out entries of shortform attribute lists for attr_list(). * Shortform attribute lists are not stored in hashval sorted order. @@ -537,6 +560,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) xfs_attr_sf_entry_t *sfe; xfs_inode_t *dp; int sbsize, nsbuf, count, i; + int error; ASSERT(context != NULL); dp = context->dp; @@ -552,46 +576,51 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) xfs_attr_trace_l_c("sf start", context); /* - * If the buffer is large enough, do not bother with sorting. + * If the buffer is large enough and the cursor is at the start, + * do not bother with sorting since we will return everything in + * one buffer and another call using the cursor won't need to be + * made. * Note the generous fudge factor of 16 overhead bytes per entry. + * If bufsize is zero then put_listent must be a search function + * and can just scan through what we have. */ - if ((dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize) { + if (context->bufsize == 0 || + (XFS_ISRESET_CURSOR(cursor) && + (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) { for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { attrnames_t *namesp; - if (((context->flags & ATTR_SECURE) != 0) != - ((sfe->flags & XFS_ATTR_SECURE) != 0) && - !(context->flags & ATTR_KERNORMALS)) { - sfe = XFS_ATTR_SF_NEXTENTRY(sfe); - continue; - } - if (((context->flags & ATTR_ROOT) != 0) != - ((sfe->flags & XFS_ATTR_ROOT) != 0) && - !(context->flags & ATTR_KERNROOTLS)) { + if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) { sfe = XFS_ATTR_SF_NEXTENTRY(sfe); continue; } - namesp = (sfe->flags & XFS_ATTR_SECURE) ? &attr_secure: - ((sfe->flags & XFS_ATTR_ROOT) ? &attr_trusted : - &attr_user); - if (context->flags & ATTR_KERNOVAL) { - ASSERT(context->flags & ATTR_KERNAMELS); - context->count += namesp->attr_namelen + - sfe->namelen + 1; - } - else { - if (xfs_attr_put_listent(context, namesp, - (char *)sfe->nameval, - (int)sfe->namelen, - (int)sfe->valuelen)) - break; - } + namesp = xfs_attr_flags_namesp(sfe->flags); + error = context->put_listent(context, + namesp, + (char *)sfe->nameval, + (int)sfe->namelen, + (int)sfe->valuelen, + (char*)&sfe->nameval[sfe->namelen]); + + /* + * Either search callback finished early or + * didn't fit it all in the buffer after all. + */ + if (context->seen_enough) + break; + + if (error) + return error; sfe = XFS_ATTR_SF_NEXTENTRY(sfe); } xfs_attr_trace_l_c("sf big-gulp", context); return(0); } + /* do no more for a search callback */ + if (context->bufsize == 0) + return 0; + /* * It didn't all fit, so we have to sort everything on hashval. */ @@ -614,15 +643,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) kmem_free(sbuf, sbsize); return XFS_ERROR(EFSCORRUPTED); } - if (((context->flags & ATTR_SECURE) != 0) != - ((sfe->flags & XFS_ATTR_SECURE) != 0) && - !(context->flags & ATTR_KERNORMALS)) { - sfe = XFS_ATTR_SF_NEXTENTRY(sfe); - continue; - } - if (((context->flags & ATTR_ROOT) != 0) != - ((sfe->flags & XFS_ATTR_ROOT) != 0) && - !(context->flags & ATTR_KERNROOTLS)) { + if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) { sfe = XFS_ATTR_SF_NEXTENTRY(sfe); continue; } @@ -671,24 +692,22 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) for ( ; i < nsbuf; i++, sbp++) { attrnames_t *namesp; - namesp = (sbp->flags & XFS_ATTR_SECURE) ? &attr_secure : - ((sbp->flags & XFS_ATTR_ROOT) ? &attr_trusted : - &attr_user); + namesp = xfs_attr_flags_namesp(sbp->flags); if (cursor->hashval != sbp->hash) { cursor->hashval = sbp->hash; cursor->offset = 0; } - if (context->flags & ATTR_KERNOVAL) { - ASSERT(context->flags & ATTR_KERNAMELS); - context->count += namesp->attr_namelen + - sbp->namelen + 1; - } else { - if (xfs_attr_put_listent(context, namesp, - sbp->name, sbp->namelen, - sbp->valuelen)) - break; - } + error = context->put_listent(context, + namesp, + sbp->name, + sbp->namelen, + sbp->valuelen, + &sbp->name[sbp->namelen]); + if (error) + return error; + if (context->seen_enough) + break; cursor->offset++; } @@ -810,8 +829,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) nargs.value = (char *)&name_loc->nameval[nargs.namelen]; nargs.valuelen = be16_to_cpu(name_loc->valuelen); nargs.hashval = be32_to_cpu(entry->hashval); - nargs.flags = (entry->flags & XFS_ATTR_SECURE) ? ATTR_SECURE : - ((entry->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0); + nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags); xfs_attr_shortform_add(&nargs, forkoff); } error = 0; @@ -1098,8 +1116,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) be16_to_cpu(map->size)); entry->hashval = cpu_to_be32(args->hashval); entry->flags = tmp ? XFS_ATTR_LOCAL : 0; - entry->flags |= (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE : - ((args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0); + entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); if (args->rename) { entry->flags |= XFS_ATTR_INCOMPLETE; if ((args->blkno2 == args->blkno) && @@ -1926,7 +1943,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) else break; } - ASSERT((probe >= 0) && + ASSERT((probe >= 0) && (!leaf->hdr.count || (probe < be16_to_cpu(leaf->hdr.count)))); ASSERT((span <= 4) || (be32_to_cpu(entry->hashval) == hashval)); @@ -1971,14 +1988,9 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, probe); if (name_loc->namelen != args->namelen) continue; - if (memcmp(args->name, (char *)name_loc->nameval, - args->namelen) != 0) + if (memcmp(args->name, (char *)name_loc->nameval, args->namelen) != 0) continue; - if (((args->flags & ATTR_SECURE) != 0) != - ((entry->flags & XFS_ATTR_SECURE) != 0)) - continue; - if (((args->flags & ATTR_ROOT) != 0) != - ((entry->flags & XFS_ATTR_ROOT) != 0)) + if (!xfs_attr_namesp_match(args->flags, entry->flags)) continue; args->index = probe; return(XFS_ERROR(EEXIST)); @@ -1989,11 +2001,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) if (memcmp(args->name, (char *)name_rmt->name, args->namelen) != 0) continue; - if (((args->flags & ATTR_SECURE) != 0) != - ((entry->flags & XFS_ATTR_SECURE) != 0)) - continue; - if (((args->flags & ATTR_ROOT) != 0) != - ((entry->flags & XFS_ATTR_ROOT) != 0)) + if (!xfs_attr_namesp_match(args->flags, entry->flags)) continue; args->index = probe; args->rmtblkno = be32_to_cpu(name_rmt->valueblk); @@ -2312,8 +2320,6 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) attrlist_cursor_kern_t *cursor; xfs_attr_leafblock_t *leaf; xfs_attr_leaf_entry_t *entry; - xfs_attr_leaf_name_local_t *name_loc; - xfs_attr_leaf_name_remote_t *name_rmt; int retval, i; ASSERT(bp != NULL); @@ -2355,9 +2361,8 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) * We have found our place, start copying out the new attributes. */ retval = 0; - for ( ; (i < be16_to_cpu(leaf->hdr.count)) - && (retval == 0); entry++, i++) { - attrnames_t *namesp; + for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) { + attrnames_t *namesp; if (be32_to_cpu(entry->hashval) != cursor->hashval) { cursor->hashval = be32_to_cpu(entry->hashval); @@ -2366,115 +2371,69 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) if (entry->flags & XFS_ATTR_INCOMPLETE) continue; /* skip incomplete entries */ - if (((context->flags & ATTR_SECURE) != 0) != - ((entry->flags & XFS_ATTR_SECURE) != 0) && - !(context->flags & ATTR_KERNORMALS)) - continue; /* skip non-matching entries */ - if (((context->flags & ATTR_ROOT) != 0) != - ((entry->flags & XFS_ATTR_ROOT) != 0) && - !(context->flags & ATTR_KERNROOTLS)) - continue; /* skip non-matching entries */ - - namesp = (entry->flags & XFS_ATTR_SECURE) ? &attr_secure : - ((entry->flags & XFS_ATTR_ROOT) ? &attr_trusted : - &attr_user); + if (!xfs_attr_namesp_match_overrides(context->flags, entry->flags)) + continue; + + namesp = xfs_attr_flags_namesp(entry->flags); if (entry->flags & XFS_ATTR_LOCAL) { - name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, i); - if (context->flags & ATTR_KERNOVAL) { - ASSERT(context->flags & ATTR_KERNAMELS); - context->count += namesp->attr_namelen + - (int)name_loc->namelen + 1; - } else { - retval = xfs_attr_put_listent(context, namesp, - (char *)name_loc->nameval, - (int)name_loc->namelen, - be16_to_cpu(name_loc->valuelen)); - } + xfs_attr_leaf_name_local_t *name_loc = + XFS_ATTR_LEAF_NAME_LOCAL(leaf, i); + + retval = context->put_listent(context, + namesp, + (char *)name_loc->nameval, + (int)name_loc->namelen, + be16_to_cpu(name_loc->valuelen), + (char *)&name_loc->nameval[name_loc->namelen]); + if (retval) + return retval; } else { - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i); - if (context->flags & ATTR_KERNOVAL) { - ASSERT(context->flags & ATTR_KERNAMELS); - context->count += namesp->attr_namelen + - (int)name_rmt->namelen + 1; - } else { - retval = xfs_attr_put_listent(context, namesp, - (char *)name_rmt->name, - (int)name_rmt->namelen, - be32_to_cpu(name_rmt->valuelen)); + xfs_attr_leaf_name_remote_t *name_rmt = + XFS_ATTR_LEAF_NAME_REMOTE(leaf, i); + + int valuelen = be32_to_cpu(name_rmt->valuelen); + + if (context->put_value) { + xfs_da_args_t args; + + memset((char *)&args, 0, sizeof(args)); + args.dp = context->dp; + args.whichfork = XFS_ATTR_FORK; + args.valuelen = valuelen; + args.value = kmem_alloc(valuelen, KM_SLEEP); + args.rmtblkno = be32_to_cpu(name_rmt->valueblk); + args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); + retval = xfs_attr_rmtval_get(&args); + if (retval) + return retval; + retval = context->put_listent(context, + namesp, + (char *)name_rmt->name, + (int)name_rmt->namelen, + valuelen, + (char*)args.value); + kmem_free(args.value, valuelen); } + else { + retval = context->put_listent(context, + namesp, + (char *)name_rmt->name, + (int)name_rmt->namelen, + valuelen, + NULL); + } + if (retval) + return retval; } - if (retval == 0) { - cursor->offset++; - } + if (context->seen_enough) + break; + cursor->offset++; } xfs_attr_trace_l_cl("blk end", context, leaf); return(retval); } -#define ATTR_ENTBASESIZE /* minimum bytes used by an attr */ \ - (((struct attrlist_ent *) 0)->a_name - (char *) 0) -#define ATTR_ENTSIZE(namelen) /* actual bytes used by an attr */ \ - ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \ - & ~(sizeof(u_int32_t)-1)) - -/* - * Format an attribute and copy it out to the user's buffer. - * Take care to check values and protect against them changing later, - * we may be reading them directly out of a user buffer. - */ -/*ARGSUSED*/ -STATIC int -xfs_attr_put_listent(xfs_attr_list_context_t *context, - attrnames_t *namesp, char *name, int namelen, int valuelen) -{ - attrlist_ent_t *aep; - int arraytop; - - ASSERT(!(context->flags & ATTR_KERNOVAL)); - if (context->flags & ATTR_KERNAMELS) { - char *offset; - - ASSERT(context->count >= 0); - - arraytop = context->count + namesp->attr_namelen + namelen + 1; - if (arraytop > context->firstu) { - context->count = -1; /* insufficient space */ - return(1); - } - offset = (char *)context->alist + context->count; - strncpy(offset, namesp->attr_name, namesp->attr_namelen); - offset += namesp->attr_namelen; - strncpy(offset, name, namelen); /* real name */ - offset += namelen; - *offset = '\0'; - context->count += namesp->attr_namelen + namelen + 1; - return(0); - } - - ASSERT(context->count >= 0); - ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); - ASSERT(context->firstu >= sizeof(*context->alist)); - ASSERT(context->firstu <= context->bufsize); - - arraytop = sizeof(*context->alist) + - context->count * sizeof(context->alist->al_offset[0]); - context->firstu -= ATTR_ENTSIZE(namelen); - if (context->firstu < arraytop) { - xfs_attr_trace_l_c("buffer full", context); - context->alist->al_more = 1; - return(1); - } - - aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]); - aep->a_valuelen = valuelen; - memcpy(aep->a_name, name, namelen); - aep->a_name[ namelen ] = 0; - context->alist->al_offset[ context->count++ ] = context->firstu; - context->alist->al_count = context->count; - xfs_attr_trace_l_c("add", context); - return(0); -} /*======================================================================== * Manage the INCOMPLETE flag in a leaf entry diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index 51c3ee156b2..040f732ce1e 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h @@ -130,6 +130,19 @@ typedef struct xfs_attr_leafblock { #define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT) /* + * Conversion macros for converting namespace bits from argument flags + * to ondisk flags. + */ +#define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE) +#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE) +#define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK) +#define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK) +#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\ + ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0)) +#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\ + ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0)) + +/* * Alignment for namelist and valuelist entries (since they are mixed * there can be only one alignment value) */ @@ -196,16 +209,26 @@ static inline int xfs_attr_leaf_entsize_local_max(int bsize) * Structure used to pass context around among the routines. *========================================================================*/ + +struct xfs_attr_list_context; + +typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, struct attrnames *, + char *, int, int, char *); + typedef struct xfs_attr_list_context { - struct xfs_inode *dp; /* inode */ - struct attrlist_cursor_kern *cursor;/* position in list */ - struct attrlist *alist; /* output buffer */ - int count; /* num used entries */ - int dupcnt; /* count dup hashvals seen */ - int bufsize;/* total buffer size */ - int firstu; /* first used byte in buffer */ - int flags; /* from VOP call */ - int resynch;/* T/F: resynch with cursor */ + struct xfs_inode *dp; /* inode */ + struct attrlist_cursor_kern *cursor; /* position in list */ + struct attrlist *alist; /* output buffer */ + int seen_enough; /* T/F: seen enough of list? */ + int count; /* num used entries */ + int dupcnt; /* count dup hashvals seen */ + int bufsize; /* total buffer size */ + int firstu; /* first used byte in buffer */ + int flags; /* from VOP call */ + int resynch; /* T/F: resynch with cursor */ + int put_value; /* T/F: need value for listent */ + put_listent_func_t put_listent; /* list output fmt function */ + int index; /* index into output buffer */ } xfs_attr_list_context_t; /* diff --git a/fs/xfs/xfs_behavior.c b/fs/xfs/xfs_behavior.c index f4fe3715a80..0dc17219d41 100644 --- a/fs/xfs/xfs_behavior.c +++ b/fs/xfs/xfs_behavior.c @@ -110,26 +110,6 @@ bhv_remove_not_first(bhv_head_t *bhp, bhv_desc_t *bdp) } /* - * Look for a specific ops vector on the specified behavior chain. - * Return the associated behavior descriptor. Or NULL, if not found. - */ -bhv_desc_t * -bhv_lookup(bhv_head_t *bhp, void *ops) -{ - bhv_desc_t *curdesc; - - for (curdesc = bhp->bh_first; - curdesc != NULL; - curdesc = curdesc->bd_next) { - - if (curdesc->bd_ops == ops) - return curdesc; - } - - return NULL; -} - -/* * Looks for the first behavior within a specified range of positions. * Return the associated behavior descriptor. Or NULL, if none found. */ diff --git a/fs/xfs/xfs_behavior.h b/fs/xfs/xfs_behavior.h index 6e6e56fb352..e7ca1fed955 100644 --- a/fs/xfs/xfs_behavior.h +++ b/fs/xfs/xfs_behavior.h @@ -176,12 +176,10 @@ extern void bhv_insert_initial(bhv_head_t *, bhv_desc_t *); * Behavior module prototypes. */ extern void bhv_remove_not_first(bhv_head_t *bhp, bhv_desc_t *bdp); -extern bhv_desc_t * bhv_lookup(bhv_head_t *bhp, void *ops); extern bhv_desc_t * bhv_lookup_range(bhv_head_t *bhp, int low, int high); extern bhv_desc_t * bhv_base(bhv_head_t *bhp); /* No bhv locking on Linux */ -#define bhv_lookup_unlocked bhv_lookup #define bhv_base_unlocked bhv_base #endif /* __XFS_BEHAVIOR_H__ */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index bf46fae303a..5b050c06795 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -2999,7 +2999,7 @@ xfs_bmap_btree_to_extents( int error; /* error return value */ xfs_ifork_t *ifp; /* inode fork data */ xfs_mount_t *mp; /* mount point structure */ - xfs_bmbt_ptr_t *pp; /* ptr to block address */ + __be64 *pp; /* ptr to block address */ xfs_bmbt_block_t *rblock;/* root btree block */ ifp = XFS_IFORK_PTR(ip, whichfork); @@ -3011,12 +3011,12 @@ xfs_bmap_btree_to_extents( ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1); mp = ip->i_mount; pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes); + cbno = be64_to_cpu(*pp); *logflagsp = 0; #ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), 1))) + if ((error = xfs_btree_check_lptr(cur, cbno, 1))) return error; #endif - cbno = INT_GET(*pp, ARCH_CONVERT); if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF))) return error; @@ -3512,9 +3512,9 @@ xfs_bmap_extents_to_btree( */ kp = XFS_BMAP_KEY_IADDR(block, 1, cur); arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); - INT_SET(kp->br_startoff, ARCH_CONVERT, xfs_bmbt_disk_get_startoff(arp)); + kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); pp = XFS_BMAP_PTR_IADDR(block, 1, cur); - INT_SET(*pp, ARCH_CONVERT, args.fsbno); + *pp = cpu_to_be64(args.fsbno); /* * Do all this logging at the end so that * the root is at the right level. @@ -3705,7 +3705,7 @@ STATIC xfs_bmbt_rec_t * /* pointer to found extent entry */ xfs_bmap_search_extents( xfs_inode_t *ip, /* incore inode pointer */ xfs_fileoff_t bno, /* block number searched for */ - int whichfork, /* data or attr fork */ + int fork, /* data or attr fork */ int *eofp, /* out: end of file found */ xfs_extnum_t *lastxp, /* out: last extent index */ xfs_bmbt_irec_t *gotp, /* out: extent entry found */ @@ -3713,25 +3713,28 @@ xfs_bmap_search_extents( { xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_rec_t *ep; /* extent record pointer */ - int rt; /* realtime flag */ XFS_STATS_INC(xs_look_exlist); - ifp = XFS_IFORK_PTR(ip, whichfork); + ifp = XFS_IFORK_PTR(ip, fork); ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); - rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); - if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) { - cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld " - "start_block : %llx start_off : %llx blkcnt : %llx " - "extent-state : %x \n", - (ip->i_mount)->m_fsname, (long long)ip->i_ino, + if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && + !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { + xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, + "Access to block zero in inode %llu " + "start_block: %llx start_off: %llx " + "blkcnt: %llx extent-state: %x lastx: %x\n", + (unsigned long long)ip->i_ino, (unsigned long long)gotp->br_startblock, (unsigned long long)gotp->br_startoff, (unsigned long long)gotp->br_blockcount, - gotp->br_state); - } - return ep; + gotp->br_state, *lastxp); + *lastxp = NULLEXTNUM; + *eofp = 1; + return NULL; + } + return ep; } @@ -4494,7 +4497,7 @@ xfs_bmap_read_extents( xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ xfs_mount_t *mp; /* file system mount structure */ - xfs_bmbt_ptr_t *pp; /* pointer to block address */ + __be64 *pp; /* pointer to block address */ /* REFERENCED */ xfs_extnum_t room; /* number of entries there's room for */ @@ -4510,10 +4513,10 @@ xfs_bmap_read_extents( level = be16_to_cpu(block->bb_level); ASSERT(level > 0); pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); - ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO); - ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks); - bno = INT_GET(*pp, ARCH_CONVERT); + bno = be64_to_cpu(*pp); + ASSERT(bno != NULLDFSBNO); + ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); /* * Go down the tree until leaf level is reached, following the first * pointer (leftmost) at each level. @@ -4530,10 +4533,8 @@ xfs_bmap_read_extents( break; pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); - XFS_WANT_CORRUPTED_GOTO( - XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)), - error0); - bno = INT_GET(*pp, ARCH_CONVERT); + bno = be64_to_cpu(*pp); + XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); xfs_trans_brelse(tp, bp); } /* @@ -6141,7 +6142,7 @@ xfs_check_block( short sz) { int i, j, dmxr; - xfs_bmbt_ptr_t *pp, *thispa; /* pointer to block address */ + __be64 *pp, *thispa; /* pointer to block address */ xfs_bmbt_key_t *prevp, *keyp; ASSERT(be16_to_cpu(block->bb_level) > 0); @@ -6179,11 +6180,10 @@ xfs_check_block( thispa = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, j, dmxr); } - if (INT_GET(*thispa, ARCH_CONVERT) == - INT_GET(*pp, ARCH_CONVERT)) { + if (*thispa == *pp) { cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", __FUNCTION__, j, i, - INT_GET(*thispa, ARCH_CONVERT)); + (unsigned long long)be64_to_cpu(*thispa)); panic("%s: ptrs are equal in node\n", __FUNCTION__); } @@ -6210,7 +6210,7 @@ xfs_bmap_check_leaf_extents( xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ xfs_mount_t *mp; /* file system mount structure */ - xfs_bmbt_ptr_t *pp; /* pointer to block address */ + __be64 *pp; /* pointer to block address */ xfs_bmbt_rec_t *ep; /* pointer to current extent */ xfs_bmbt_rec_t *lastp; /* pointer to previous extent */ xfs_bmbt_rec_t *nextp; /* pointer to next extent */ @@ -6231,10 +6231,12 @@ xfs_bmap_check_leaf_extents( ASSERT(level > 0); xfs_check_block(block, mp, 1, ifp->if_broot_bytes); pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); - ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO); - ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks); - bno = INT_GET(*pp, ARCH_CONVERT); + bno = be64_to_cpu(*pp); + + ASSERT(bno != NULLDFSBNO); + ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); + /* * Go down the tree until leaf level is reached, following the first * pointer (leftmost) at each level. @@ -6265,8 +6267,8 @@ xfs_bmap_check_leaf_extents( xfs_check_block(block, mp, 0, 0); pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); - XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)), error0); - bno = INT_GET(*pp, ARCH_CONVERT); + bno = be64_to_cpu(*pp); + XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); if (bp_release) { bp_release = 0; xfs_trans_brelse(NULL, bp); @@ -6372,7 +6374,7 @@ xfs_bmap_count_blocks( xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ xfs_mount_t *mp; /* file system mount structure */ - xfs_bmbt_ptr_t *pp; /* pointer to block address */ + __be64 *pp; /* pointer to block address */ bno = NULLFSBLOCK; mp = ip->i_mount; @@ -6395,10 +6397,10 @@ xfs_bmap_count_blocks( level = be16_to_cpu(block->bb_level); ASSERT(level > 0); pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); - ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO); - ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks); - bno = INT_GET(*pp, ARCH_CONVERT); + bno = be64_to_cpu(*pp); + ASSERT(bno != NULLDFSBNO); + ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, @@ -6425,7 +6427,7 @@ xfs_bmap_count_tree( int error; xfs_buf_t *bp, *nbp; int level = levelin; - xfs_bmbt_ptr_t *pp; + __be64 *pp; xfs_fsblock_t bno = blockno; xfs_fsblock_t nextbno; xfs_bmbt_block_t *block, *nextblock; @@ -6452,7 +6454,7 @@ xfs_bmap_count_tree( /* Dive to the next level */ pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); - bno = INT_GET(*pp, ARCH_CONVERT); + bno = be64_to_cpu(*pp); if (unlikely((error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { xfs_trans_brelse(tp, bp); diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 18fb7385d71..a7b835bf870 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -58,7 +58,7 @@ STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_bmbt_rshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *, - xfs_bmbt_key_t *, xfs_btree_cur_t **, int *); + __uint64_t *, xfs_btree_cur_t **, int *); STATIC int xfs_bmbt_updkey(xfs_btree_cur_t *, xfs_bmbt_key_t *, int); @@ -192,16 +192,11 @@ xfs_bmbt_trace_argifk( xfs_btree_cur_t *cur, int i, xfs_fsblock_t f, - xfs_bmbt_key_t *k, + xfs_dfiloff_t o, int line) { - xfs_dfsbno_t d; - xfs_dfiloff_t o; - - d = (xfs_dfsbno_t)f; - o = INT_GET(k->br_startoff, ARCH_CONVERT); xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line, - i, d >> 32, (int)d, o >> 32, + i, (xfs_dfsbno_t)f >> 32, (int)f, o >> 32, (int)o, 0, 0, 0, 0, 0, 0); } @@ -248,7 +243,7 @@ xfs_bmbt_trace_argik( { xfs_dfiloff_t o; - o = INT_GET(k->br_startoff, ARCH_CONVERT); + o = be64_to_cpu(k->br_startoff); xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line, i, o >> 32, (int)o, 0, 0, 0, 0, 0, @@ -286,8 +281,8 @@ xfs_bmbt_trace_cursor( xfs_bmbt_trace_argfffi(fname, c, o, b, i, j, __LINE__) #define XFS_BMBT_TRACE_ARGI(c,i) \ xfs_bmbt_trace_argi(fname, c, i, __LINE__) -#define XFS_BMBT_TRACE_ARGIFK(c,i,f,k) \ - xfs_bmbt_trace_argifk(fname, c, i, f, k, __LINE__) +#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ + xfs_bmbt_trace_argifk(fname, c, i, f, s, __LINE__) #define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ xfs_bmbt_trace_argifr(fname, c, i, f, r, __LINE__) #define XFS_BMBT_TRACE_ARGIK(c,i,k) \ @@ -299,7 +294,7 @@ xfs_bmbt_trace_cursor( #define XFS_BMBT_TRACE_ARGBII(c,b,i,j) #define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) #define XFS_BMBT_TRACE_ARGI(c,i) -#define XFS_BMBT_TRACE_ARGIFK(c,i,f,k) +#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) #define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) #define XFS_BMBT_TRACE_ARGIK(c,i,k) #define XFS_BMBT_TRACE_CURSOR(c,s) @@ -357,7 +352,7 @@ xfs_bmbt_delrec( XFS_BMBT_TRACE_CURSOR(cur, ENTRY); XFS_BMBT_TRACE_ARGI(cur, level); ptr = cur->bc_ptrs[level]; - tcur = (xfs_btree_cur_t *)0; + tcur = NULL; if (ptr == 0) { XFS_BMBT_TRACE_CURSOR(cur, EXIT); *stat = 0; @@ -382,7 +377,7 @@ xfs_bmbt_delrec( pp = XFS_BMAP_PTR_IADDR(block, 1, cur); #ifdef DEBUG for (i = ptr; i < numrecs; i++) { - if ((error = xfs_btree_check_lptr(cur, INT_GET(pp[i], ARCH_CONVERT), level))) { + if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -404,7 +399,8 @@ xfs_bmbt_delrec( xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1); } if (ptr == 1) { - INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_disk_get_startoff(rp)); + key.br_startoff = + cpu_to_be64(xfs_bmbt_disk_get_startoff(rp)); kp = &key; } } @@ -621,7 +617,7 @@ xfs_bmbt_delrec( rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); #ifdef DEBUG for (i = 0; i < numrrecs; i++) { - if ((error = xfs_btree_check_lptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))) { + if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -748,7 +744,7 @@ xfs_bmbt_insrec( int logflags; /* inode logging flags */ xfs_fsblock_t nbno; /* new block number */ struct xfs_btree_cur *ncur; /* new btree cursor */ - xfs_bmbt_key_t nkey; /* new btree key value */ + __uint64_t startoff; /* new btree key value */ xfs_bmbt_rec_t nrec; /* new record count */ int optr; /* old key/record index */ xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */ @@ -759,9 +755,8 @@ xfs_bmbt_insrec( ASSERT(level < cur->bc_nlevels); XFS_BMBT_TRACE_CURSOR(cur, ENTRY); XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp); - ncur = (xfs_btree_cur_t *)0; - INT_SET(key.br_startoff, ARCH_CONVERT, - xfs_bmbt_disk_get_startoff(recp)); + ncur = NULL; + key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(recp)); optr = ptr = cur->bc_ptrs[level]; if (ptr == 0) { XFS_BMBT_TRACE_CURSOR(cur, EXIT); @@ -820,7 +815,7 @@ xfs_bmbt_insrec( optr = ptr = cur->bc_ptrs[level]; } else { if ((error = xfs_bmbt_split(cur, level, - &nbno, &nkey, &ncur, + &nbno, &startoff, &ncur, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); @@ -840,7 +835,7 @@ xfs_bmbt_insrec( #endif ptr = cur->bc_ptrs[level]; xfs_bmbt_disk_set_allf(&nrec, - nkey.br_startoff, 0, 0, + startoff, 0, 0, XFS_EXT_NORM); } else { XFS_BMBT_TRACE_CURSOR(cur, @@ -858,7 +853,7 @@ xfs_bmbt_insrec( pp = XFS_BMAP_PTR_IADDR(block, 1, cur); #ifdef DEBUG for (i = numrecs; i >= ptr; i--) { - if ((error = xfs_btree_check_lptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), + if ((error = xfs_btree_check_lptr_disk(cur, pp[i - 1], level))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; @@ -870,14 +865,13 @@ xfs_bmbt_insrec( memmove(&pp[ptr], &pp[ptr - 1], /* INT_: direct copy */ (numrecs - ptr + 1) * sizeof(*pp)); #ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)*bnop, - level))) { + if ((error = xfs_btree_check_lptr(cur, *bnop, level))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } #endif kp[ptr - 1] = key; - INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop); + pp[ptr - 1] = cpu_to_be64(*bnop); numrecs++; block->bb_numrecs = cpu_to_be16(numrecs); xfs_bmbt_log_keys(cur, bp, ptr, numrecs); @@ -988,7 +982,7 @@ xfs_bmbt_killroot( cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur); #ifdef DEBUG for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { - if ((error = xfs_btree_check_lptr(cur, INT_GET(cpp[i], ARCH_CONVERT), level - 1))) { + if ((error = xfs_btree_check_lptr_disk(cur, cpp[i], level - 1))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -1132,7 +1126,7 @@ xfs_bmbt_lookup( d = XFS_FSB_TO_DADDR(mp, fsbno); bp = cur->bc_bufs[level]; if (bp && XFS_BUF_ADDR(bp) != d) - bp = (xfs_buf_t *)0; + bp = NULL; if (!bp) { if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp, XFS_BMAP_BTREE_REF))) { @@ -1170,7 +1164,7 @@ xfs_bmbt_lookup( keyno = (low + high) >> 1; if (level > 0) { kkp = kkbase + keyno - 1; - startoff = INT_GET(kkp->br_startoff, ARCH_CONVERT); + startoff = be64_to_cpu(kkp->br_startoff); } else { krp = krbase + keyno - 1; startoff = xfs_bmbt_disk_get_startoff(krp); @@ -1189,13 +1183,13 @@ xfs_bmbt_lookup( if (diff > 0 && --keyno < 1) keyno = 1; pp = XFS_BMAP_PTR_IADDR(block, keyno, cur); + fsbno = be64_to_cpu(*pp); #ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) { + if ((error = xfs_btree_check_lptr(cur, fsbno, level))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } #endif - fsbno = INT_GET(*pp, ARCH_CONVERT); cur->bc_ptrs[level] = keyno; } } @@ -1313,7 +1307,7 @@ xfs_bmbt_lshift( lpp = XFS_BMAP_PTR_IADDR(left, lrecs, cur); rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); #ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, INT_GET(*rpp, ARCH_CONVERT), level))) { + if ((error = xfs_btree_check_lptr_disk(cur, *rpp, level))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -1340,7 +1334,7 @@ xfs_bmbt_lshift( if (level > 0) { #ifdef DEBUG for (i = 0; i < rrecs; i++) { - if ((error = xfs_btree_check_lptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT), + if ((error = xfs_btree_check_lptr_disk(cur, rpp[i + 1], level))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; @@ -1354,8 +1348,7 @@ xfs_bmbt_lshift( } else { memmove(rrp, rrp + 1, rrecs * sizeof(*rrp)); xfs_bmbt_log_recs(cur, rbp, 1, rrecs); - INT_SET(key.br_startoff, ARCH_CONVERT, - xfs_bmbt_disk_get_startoff(rrp)); + key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp)); rkp = &key; } if ((error = xfs_bmbt_updkey(cur, rkp, level + 1))) { @@ -1445,7 +1438,7 @@ xfs_bmbt_rshift( rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); #ifdef DEBUG for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) { - if ((error = xfs_btree_check_lptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))) { + if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -1454,7 +1447,7 @@ xfs_bmbt_rshift( memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); #ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, INT_GET(*lpp, ARCH_CONVERT), level))) { + if ((error = xfs_btree_check_lptr_disk(cur, *lpp, level))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -1469,8 +1462,7 @@ xfs_bmbt_rshift( memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); *rrp = *lrp; xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - INT_SET(key.br_startoff, ARCH_CONVERT, - xfs_bmbt_disk_get_startoff(rrp)); + key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp)); rkp = &key; } be16_add(&left->bb_numrecs, -1); @@ -1535,7 +1527,7 @@ xfs_bmbt_split( xfs_btree_cur_t *cur, int level, xfs_fsblock_t *bnop, - xfs_bmbt_key_t *keyp, + __uint64_t *startoff, xfs_btree_cur_t **curp, int *stat) /* success/failure */ { @@ -1560,7 +1552,7 @@ xfs_bmbt_split( xfs_bmbt_rec_t *rrp; /* right record pointer */ XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, keyp); + XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff); args.tp = cur->bc_tp; args.mp = cur->bc_mp; lbp = cur->bc_bufs[level]; @@ -1619,7 +1611,7 @@ xfs_bmbt_split( rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); #ifdef DEBUG for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { - if ((error = xfs_btree_check_lptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level))) { + if ((error = xfs_btree_check_lptr_disk(cur, lpp[i], level))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -1629,13 +1621,13 @@ xfs_bmbt_split( memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - keyp->br_startoff = INT_GET(rkp->br_startoff, ARCH_CONVERT); + *startoff = be64_to_cpu(rkp->br_startoff); } else { lrp = XFS_BMAP_REC_IADDR(left, i, cur); rrp = XFS_BMAP_REC_IADDR(right, 1, cur); memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - keyp->br_startoff = xfs_bmbt_disk_get_startoff(rrp); + *startoff = xfs_bmbt_disk_get_startoff(rrp); } be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); right->bb_rightsib = left->bb_rightsib; @@ -1728,9 +1720,9 @@ xfs_bmdr_to_bmbt( { int dmxr; xfs_bmbt_key_t *fkp; - xfs_bmbt_ptr_t *fpp; + __be64 *fpp; xfs_bmbt_key_t *tkp; - xfs_bmbt_ptr_t *tpp; + __be64 *tpp; rblock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); rblock->bb_level = dblock->bb_level; @@ -1745,7 +1737,7 @@ xfs_bmdr_to_bmbt( tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); - memcpy(tpp, fpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */ + memcpy(tpp, fpp, sizeof(*fpp) * dmxr); } /* @@ -1805,7 +1797,7 @@ xfs_bmbt_decrement( tp = cur->bc_tp; mp = cur->bc_mp; for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) { - fsbno = INT_GET(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT); + fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur)); if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp, XFS_BMAP_BTREE_REF))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); @@ -2135,7 +2127,7 @@ xfs_bmbt_increment( tp = cur->bc_tp; mp = cur->bc_mp; for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) { - fsbno = INT_GET(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT); + fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur)); if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp, XFS_BMAP_BTREE_REF))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); @@ -2178,7 +2170,7 @@ xfs_bmbt_insert( level = 0; nbno = NULLFSBLOCK; xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); - ncur = (xfs_btree_cur_t *)0; + ncur = NULL; pcur = cur; do { if ((error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur, @@ -2205,7 +2197,7 @@ xfs_bmbt_insert( } if (ncur) { pcur = ncur; - ncur = (xfs_btree_cur_t *)0; + ncur = NULL; } } while (nbno != NULLFSBLOCK); XFS_BMBT_TRACE_CURSOR(cur, EXIT); @@ -2356,12 +2348,12 @@ xfs_bmbt_newroot( args.firstblock = args.fsbno; if (args.fsbno == NULLFSBLOCK) { #ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) { + if ((error = xfs_btree_check_lptr_disk(cur, *pp, level))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } #endif - args.fsbno = INT_GET(*pp, ARCH_CONVERT); + args.fsbno = be64_to_cpu(*pp); args.type = XFS_ALLOCTYPE_START_BNO; } else args.type = XFS_ALLOCTYPE_NEAR_BNO; @@ -2393,7 +2385,7 @@ xfs_bmbt_newroot( cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur); #ifdef DEBUG for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { - if ((error = xfs_btree_check_lptr(cur, INT_GET(pp[i], ARCH_CONVERT), level))) { + if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -2401,13 +2393,12 @@ xfs_bmbt_newroot( #endif memcpy(cpp, pp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*pp)); #ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)args.fsbno, - level))) { + if ((error = xfs_btree_check_lptr(cur, args.fsbno, level))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } #endif - INT_SET(*pp, ARCH_CONVERT, args.fsbno); + *pp = cpu_to_be64(args.fsbno); xfs_iroot_realloc(cur->bc_private.b.ip, 1 - be16_to_cpu(cblock->bb_numrecs), cur->bc_private.b.whichfork); xfs_btree_setbuf(cur, level, bp); @@ -2681,9 +2672,9 @@ xfs_bmbt_to_bmdr( { int dmxr; xfs_bmbt_key_t *fkp; - xfs_bmbt_ptr_t *fpp; + __be64 *fpp; xfs_bmbt_key_t *tkp; - xfs_bmbt_ptr_t *tpp; + __be64 *tpp; ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC); ASSERT(be64_to_cpu(rblock->bb_leftsib) == NULLDFSBNO); @@ -2698,7 +2689,7 @@ xfs_bmbt_to_bmdr( tpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); - memcpy(tpp, fpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */ + memcpy(tpp, fpp, sizeof(*fpp) * dmxr); } /* @@ -2740,7 +2731,7 @@ xfs_bmbt_update( XFS_BMBT_TRACE_CURSOR(cur, EXIT); return 0; } - INT_SET(key.br_startoff, ARCH_CONVERT, off); + key.br_startoff = cpu_to_be64(off); if ((error = xfs_bmbt_updkey(cur, &key, 1))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 6478cfa0e53..49539de9525 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -163,13 +163,14 @@ typedef struct xfs_bmbt_irec /* * Key structure for non-leaf levels of the tree. */ -typedef struct xfs_bmbt_key -{ - xfs_dfiloff_t br_startoff; /* starting file offset */ +typedef struct xfs_bmbt_key { + __be64 br_startoff; /* starting file offset */ } xfs_bmbt_key_t, xfs_bmdr_key_t; -typedef xfs_dfsbno_t xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; /* btree pointer type */ - /* btree block header type */ +/* btree pointer type */ +typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; + +/* btree block header type */ typedef struct xfs_btree_lblock xfs_bmbt_block_t; #define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp)) diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index ee2255bd656..aeb87ca69fc 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -161,7 +161,7 @@ xfs_btree_check_key( k1 = ak1; k2 = ak2; - ASSERT(INT_GET(k1->br_startoff, ARCH_CONVERT) < INT_GET(k2->br_startoff, ARCH_CONVERT)); + ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff)); break; } case XFS_BTNUM_INO: { @@ -170,7 +170,7 @@ xfs_btree_check_key( k1 = ak1; k2 = ak2; - ASSERT(INT_GET(k1->ir_startino, ARCH_CONVERT) < INT_GET(k2->ir_startino, ARCH_CONVERT)); + ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino)); break; } default: @@ -285,8 +285,8 @@ xfs_btree_check_rec( r1 = ar1; r2 = ar2; - ASSERT(INT_GET(r1->ir_startino, ARCH_CONVERT) + XFS_INODES_PER_CHUNK <= - INT_GET(r2->ir_startino, ARCH_CONVERT)); + ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <= + be32_to_cpu(r2->ir_startino)); break; } default: diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 44f1bd98064..892b06c5426 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -145,7 +145,7 @@ typedef struct xfs_btree_cur union { xfs_alloc_rec_incore_t a; xfs_bmbt_irec_t b; - xfs_inobt_rec_t i; + xfs_inobt_rec_incore_t i; } bc_rec; /* current insert/search record value */ struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */ int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */ @@ -243,6 +243,9 @@ xfs_btree_check_lptr( xfs_dfsbno_t ptr, /* btree block disk address */ int level); /* btree block level */ +#define xfs_btree_check_lptr_disk(cur, ptr, level) \ + xfs_btree_check_lptr(cur, be64_to_cpu(ptr), level) + /* * Checking routine: check that short form block header is ok. */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index a4aa53974f7..7a55c248ea7 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -234,7 +234,6 @@ xfs_buf_item_format( ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || (bip->bli_flags & XFS_BLI_STALE)); bp = bip->bli_buf; - ASSERT(XFS_BUF_BP_ISMAPPED(bp)); vecp = log_vector; /* @@ -628,25 +627,6 @@ xfs_buf_item_committed( } /* - * This is called when the transaction holding the buffer is aborted. - * Just behave as if the transaction had been cancelled. If we're shutting down - * and have aborted this transaction, we'll trap this buffer when it tries to - * get written out. - */ -STATIC void -xfs_buf_item_abort( - xfs_buf_log_item_t *bip) -{ - xfs_buf_t *bp; - - bp = bip->bli_buf; - xfs_buftrace("XFS_ABORT", bp); - XFS_BUF_SUPER_STALE(bp); - xfs_buf_item_unlock(bip); - return; -} - -/* * This is called to asynchronously write the buffer associated with this * buf log item out to disk. The buffer will already have been locked by * a successful call to xfs_buf_item_trylock(). If the buffer still has @@ -693,7 +673,6 @@ STATIC struct xfs_item_ops xfs_buf_item_ops = { .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_buf_item_committed, .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push, - .iop_abort = (void(*)(xfs_log_item_t*))xfs_buf_item_abort, .iop_pushbuf = NULL, .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_buf_item_committing @@ -901,7 +880,6 @@ xfs_buf_item_relse( XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list); if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) && (XFS_BUF_IODONE_FUNC(bp) != NULL)) { - ASSERT((XFS_BUF_ISUNINITIAL(bp)) == 0); XFS_BUF_CLR_IODONE_FUNC(bp); } diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 32ab61d17ac..a68bc1f1a31 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -1054,7 +1054,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) xfs_da_node_entry_t *btree; xfs_dablk_t blkno; int probe, span, max, error, retval; - xfs_dahash_t hashval; + xfs_dahash_t hashval, btreehashval; xfs_da_args_t *args; args = state->args; @@ -1079,30 +1079,32 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) return(error); } curr = blk->bp->data; - ASSERT(be16_to_cpu(curr->magic) == XFS_DA_NODE_MAGIC || - be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC || - be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC); + blk->magic = be16_to_cpu(curr->magic); + ASSERT(blk->magic == XFS_DA_NODE_MAGIC || + blk->magic == XFS_DIR2_LEAFN_MAGIC || + blk->magic == XFS_ATTR_LEAF_MAGIC); /* * Search an intermediate node for a match. */ - blk->magic = be16_to_cpu(curr->magic); if (blk->magic == XFS_DA_NODE_MAGIC) { node = blk->bp->data; - blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval); + max = be16_to_cpu(node->hdr.count); + btreehashval = node->btree[max-1].hashval; + blk->hashval = be32_to_cpu(btreehashval); /* * Binary search. (note: small blocks will skip loop) */ - max = be16_to_cpu(node->hdr.count); probe = span = max / 2; hashval = args->hashval; for (btree = &node->btree[probe]; span > 4; btree = &node->btree[probe]) { span /= 2; - if (be32_to_cpu(btree->hashval) < hashval) + btreehashval = be32_to_cpu(btree->hashval); + if (btreehashval < hashval) probe += span; - else if (be32_to_cpu(btree->hashval) > hashval) + else if (btreehashval > hashval) probe -= span; else break; @@ -1133,10 +1135,10 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) blk->index = probe; blkno = be32_to_cpu(btree->before); } - } else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) { + } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); break; - } else if (be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC) { + } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL); break; } @@ -1152,11 +1154,13 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { retval = xfs_dir2_leafn_lookup_int(blk->bp, args, &blk->index, state); - } - else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { + } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { retval = xfs_attr_leaf_lookup_int(blk->bp, args); blk->index = args->index; args->blkno = blk->blkno; + } else { + ASSERT(0); + return XFS_ERROR(EFSCORRUPTED); } if (((retval == ENOENT) || (retval == ENOATTR)) && (blk->hashval == args->hashval)) { @@ -1166,8 +1170,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) return(error); if (retval == 0) { continue; - } - else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { + } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { /* path_shift() gives ENOENT */ retval = XFS_ERROR(ENOATTR); } diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index bc43163456e..0893e16b7d8 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -18,14 +18,6 @@ #ifndef __XFS_ERROR_H__ #define __XFS_ERROR_H__ -#define XFS_ERECOVER 1 /* Failure to recover log */ -#define XFS_ELOGSTAT 2 /* Failure to stat log in user space */ -#define XFS_ENOLOGSPACE 3 /* Reservation too large */ -#define XFS_ENOTSUP 4 /* Operation not supported */ -#define XFS_ENOLSN 5 /* Can't find the lsn you asked for */ -#define XFS_ENOTFOUND 6 -#define XFS_ENOTXFS 7 /* Not XFS filesystem */ - #ifdef DEBUG #define XFS_ERROR_NTRAP 10 extern int xfs_etrap[XFS_ERROR_NTRAP]; @@ -175,6 +167,7 @@ extern int xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud); #define XFS_PTAG_SHUTDOWN_CORRUPT 0x00000010 #define XFS_PTAG_SHUTDOWN_IOERROR 0x00000020 #define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040 +#define XFS_PTAG_FSBLOCK_ZERO 0x00000080 struct xfs_mount; /* PRINTFLIKE4 */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 6cf6d8769b9..6dba78199fa 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -33,9 +33,6 @@ kmem_zone_t *xfs_efi_zone; kmem_zone_t *xfs_efd_zone; STATIC void xfs_efi_item_unlock(xfs_efi_log_item_t *); -STATIC void xfs_efi_item_abort(xfs_efi_log_item_t *); -STATIC void xfs_efd_item_abort(xfs_efd_log_item_t *); - void xfs_efi_item_free(xfs_efi_log_item_t *efip) @@ -184,7 +181,7 @@ STATIC void xfs_efi_item_unlock(xfs_efi_log_item_t *efip) { if (efip->efi_item.li_flags & XFS_LI_ABORTED) - xfs_efi_item_abort(efip); + xfs_efi_item_free(efip); return; } @@ -202,18 +199,6 @@ xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn) } /* - * This is called when the transaction logging the EFI is aborted. - * Free up the EFI and return. No need to clean up the slot for - * the item in the transaction. That was done by the unpin code - * which is called prior to this routine in the abort/fs-shutdown path. - */ -STATIC void -xfs_efi_item_abort(xfs_efi_log_item_t *efip) -{ - xfs_efi_item_free(efip); -} - -/* * There isn't much you can do to push on an efi item. It is simply * stuck waiting for all of its corresponding efd items to be * committed to disk. @@ -255,7 +240,6 @@ STATIC struct xfs_item_ops xfs_efi_item_ops = { .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_efi_item_committed, .iop_push = (void(*)(xfs_log_item_t*))xfs_efi_item_push, - .iop_abort = (void(*)(xfs_log_item_t*))xfs_efi_item_abort, .iop_pushbuf = NULL, .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_efi_item_committing @@ -386,33 +370,6 @@ xfs_efi_release(xfs_efi_log_item_t *efip, } } -/* - * This is called when the transaction that should be committing the - * EFD corresponding to the given EFI is aborted. The committed and - * canceled flags are used to coordinate the freeing of the EFI and - * the references by the transaction that committed it. - */ -STATIC void -xfs_efi_cancel( - xfs_efi_log_item_t *efip) -{ - xfs_mount_t *mp; - SPLDECL(s); - - mp = efip->efi_item.li_mountp; - AIL_LOCK(mp, s); - if (efip->efi_flags & XFS_EFI_COMMITTED) { - /* - * xfs_trans_delete_ail() drops the AIL lock. - */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); - xfs_efi_item_free(efip); - } else { - efip->efi_flags |= XFS_EFI_CANCELED; - AIL_UNLOCK(mp, s); - } -} - STATIC void xfs_efd_item_free(xfs_efd_log_item_t *efdp) { @@ -514,7 +471,7 @@ STATIC void xfs_efd_item_unlock(xfs_efd_log_item_t *efdp) { if (efdp->efd_item.li_flags & XFS_LI_ABORTED) - xfs_efd_item_abort(efdp); + xfs_efd_item_free(efdp); return; } @@ -541,27 +498,6 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn) } /* - * The transaction of which this EFD is a part has been aborted. - * Inform its companion EFI of this fact and then clean up after - * ourselves. No need to clean up the slot for the item in the - * transaction. That was done by the unpin code which is called - * prior to this routine in the abort/fs-shutdown path. - */ -STATIC void -xfs_efd_item_abort(xfs_efd_log_item_t *efdp) -{ - /* - * If we got a log I/O error, it's always the case that the LR with the - * EFI got unpinned and freed before the EFD got aborted. So don't - * reference the EFI at all in that case. - */ - if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0) - xfs_efi_cancel(efdp->efd_efip); - - xfs_efd_item_free(efdp); -} - -/* * There isn't much you can do to push on an efd item. It is simply * stuck waiting for the log to be flushed to disk. */ @@ -602,7 +538,6 @@ STATIC struct xfs_item_ops xfs_efd_item_ops = { .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_efd_item_committed, .iop_push = (void(*)(xfs_log_item_t*))xfs_efd_item_push, - .iop_abort = (void(*)(xfs_log_item_t*))xfs_efd_item_abort, .iop_pushbuf = NULL, .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_efd_item_committing diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 0ea45edaab0..2f049f63e85 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -33,14 +33,16 @@ typedef struct xfs_extent { * conversion routine. */ +#ifndef HAVE_FORMAT32 typedef struct xfs_extent_32 { - xfs_dfsbno_t ext_start; - xfs_extlen_t ext_len; + __uint64_t ext_start; + __uint32_t ext_len; } __attribute__((packed)) xfs_extent_32_t; +#endif typedef struct xfs_extent_64 { - xfs_dfsbno_t ext_start; - xfs_extlen_t ext_len; + __uint64_t ext_start; + __uint32_t ext_len; __uint32_t ext_pad; } xfs_extent_64_t; @@ -50,25 +52,27 @@ typedef struct xfs_extent_64 { * size is given by efi_nextents. */ typedef struct xfs_efi_log_format { - unsigned short efi_type; /* efi log item type */ - unsigned short efi_size; /* size of this item */ - uint efi_nextents; /* # extents to free */ + __uint16_t efi_type; /* efi log item type */ + __uint16_t efi_size; /* size of this item */ + __uint32_t efi_nextents; /* # extents to free */ __uint64_t efi_id; /* efi identifier */ xfs_extent_t efi_extents[1]; /* array of extents to free */ } xfs_efi_log_format_t; +#ifndef HAVE_FORMAT32 typedef struct xfs_efi_log_format_32 { - unsigned short efi_type; /* efi log item type */ - unsigned short efi_size; /* size of this item */ - uint efi_nextents; /* # extents to free */ + __uint16_t efi_type; /* efi log item type */ + __uint16_t efi_size; /* size of this item */ + __uint32_t efi_nextents; /* # extents to free */ __uint64_t efi_id; /* efi identifier */ xfs_extent_32_t efi_extents[1]; /* array of extents to free */ } __attribute__((packed)) xfs_efi_log_format_32_t; +#endif typedef struct xfs_efi_log_format_64 { - unsigned short efi_type; /* efi log item type */ - unsigned short efi_size; /* size of this item */ - uint efi_nextents; /* # extents to free */ + __uint16_t efi_type; /* efi log item type */ + __uint16_t efi_size; /* size of this item */ + __uint32_t efi_nextents; /* # extents to free */ __uint64_t efi_id; /* efi identifier */ xfs_extent_64_t efi_extents[1]; /* array of extents to free */ } xfs_efi_log_format_64_t; @@ -79,25 +83,27 @@ typedef struct xfs_efi_log_format_64 { * size is given by efd_nextents; */ typedef struct xfs_efd_log_format { - unsigned short efd_type; /* efd log item type */ - unsigned short efd_size; /* size of this item */ - uint efd_nextents; /* # of extents freed */ + __uint16_t efd_type; /* efd log item type */ + __uint16_t efd_size; /* size of this item */ + __uint32_t efd_nextents; /* # of extents freed */ __uint64_t efd_efi_id; /* id of corresponding efi */ xfs_extent_t efd_extents[1]; /* array of extents freed */ } xfs_efd_log_format_t; +#ifndef HAVE_FORMAT32 typedef struct xfs_efd_log_format_32 { - unsigned short efd_type; /* efd log item type */ - unsigned short efd_size; /* size of this item */ - uint efd_nextents; /* # of extents freed */ + __uint16_t efd_type; /* efd log item type */ + __uint16_t efd_size; /* size of this item */ + __uint32_t efd_nextents; /* # of extents freed */ __uint64_t efd_efi_id; /* id of corresponding efi */ xfs_extent_32_t efd_extents[1]; /* array of extents freed */ } __attribute__((packed)) xfs_efd_log_format_32_t; +#endif typedef struct xfs_efd_log_format_64 { - unsigned short efd_type; /* efd log item type */ - unsigned short efd_size; /* size of this item */ - uint efd_nextents; /* # of extents freed */ + __uint16_t efd_type; /* efd log item type */ + __uint16_t efd_size; /* size of this item */ + __uint32_t efd_nextents; /* # of extents freed */ __uint64_t efd_efi_id; /* id of corresponding efi */ xfs_extent_64_t efd_extents[1]; /* array of extents freed */ } xfs_efd_log_format_64_t; diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 0f0ad153595..1335449841c 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -22,8 +22,6 @@ * SGI's XFS filesystem's major stuff (constants, structures) */ -#define XFS_NAME "xfs" - /* * Direct I/O attribute record used with XFS_IOC_DIOINFO * d_miniosz is the min xfer size, xfer size multiple and file seek offset @@ -426,11 +424,7 @@ typedef struct xfs_handle { - (char *) &(handle)) \ + (handle).ha_fid.xfs_fid_len) -#define XFS_HANDLE_CMP(h1, h2) memcmp(h1, h2, sizeof(xfs_handle_t)) - -#define FSHSIZE sizeof(fsid_t) - -/* +/* * Flags for going down operation */ #define XFS_FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */ diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 33164a85aa9..a446e5a115c 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -458,7 +458,7 @@ nextag: */ if (XFS_FORCED_SHUTDOWN(mp)) { up_read(&mp->m_peraglock); - return (xfs_buf_t *)0; + return NULL; } agno++; if (agno >= agcount) @@ -466,7 +466,7 @@ nextag: if (agno == pagno) { if (flags == 0) { up_read(&mp->m_peraglock); - return (xfs_buf_t *)0; + return NULL; } flags = 0; } @@ -529,10 +529,10 @@ xfs_dialloc( int offset; /* index of inode in chunk */ xfs_agino_t pagino; /* parent's a.g. relative inode # */ xfs_agnumber_t pagno; /* parent's allocation group number */ - xfs_inobt_rec_t rec; /* inode allocation record */ + xfs_inobt_rec_incore_t rec; /* inode allocation record */ xfs_agnumber_t tagno; /* testing allocation group number */ xfs_btree_cur_t *tcur; /* temp cursor */ - xfs_inobt_rec_t trec; /* temp inode allocation record */ + xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ if (*IO_agbp == NULL) { @@ -945,7 +945,7 @@ xfs_difree( int ilen; /* inodes in an inode cluster */ xfs_mount_t *mp; /* mount structure for filesystem */ int off; /* offset of inode in inode chunk */ - xfs_inobt_rec_t rec; /* btree record */ + xfs_inobt_rec_incore_t rec; /* btree record */ mp = tp->t_mountp; @@ -1195,6 +1195,7 @@ xfs_dilocate( "(0x%llx)", ino, XFS_AGINO_TO_INO(mp, agno, agino)); } + xfs_stack_trace(); #endif /* DEBUG */ return XFS_ERROR(EINVAL); } diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 616eeeb6953..8cdeeaf8632 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -568,7 +568,7 @@ xfs_inobt_insrec( /* * Make a key out of the record data to be inserted, and save it. */ - key.ir_startino = recp->ir_startino; /* INT_: direct copy */ + key.ir_startino = recp->ir_startino; optr = ptr = cur->bc_ptrs[level]; /* * If we're off the left edge, return failure. @@ -600,7 +600,7 @@ xfs_inobt_insrec( } #endif nbno = NULLAGBLOCK; - ncur = (xfs_btree_cur_t *)0; + ncur = NULL; /* * If the block is full, we can't insert the new entry until we * make the block un-full. @@ -641,7 +641,7 @@ xfs_inobt_insrec( return error; #endif ptr = cur->bc_ptrs[level]; - nrec.ir_startino = nkey.ir_startino; /* INT_: direct copy */ + nrec.ir_startino = nkey.ir_startino; } else { /* * Otherwise the insert fails. @@ -681,7 +681,7 @@ xfs_inobt_insrec( if ((error = xfs_btree_check_sptr(cur, *bnop, level))) return error; #endif - kp[ptr - 1] = key; /* INT_: struct copy */ + kp[ptr - 1] = key; pp[ptr - 1] = cpu_to_be32(*bnop); numrecs++; block->bb_numrecs = cpu_to_be16(numrecs); @@ -698,7 +698,7 @@ xfs_inobt_insrec( * Now stuff the new record in, bump numrecs * and log the new data. */ - rp[ptr - 1] = *recp; /* INT_: struct copy */ + rp[ptr - 1] = *recp; numrecs++; block->bb_numrecs = cpu_to_be16(numrecs); xfs_inobt_log_recs(cur, bp, ptr, numrecs); @@ -731,7 +731,7 @@ xfs_inobt_insrec( */ *bnop = nbno; if (nbno != NULLAGBLOCK) { - *recp = nrec; /* INT_: struct copy */ + *recp = nrec; *curp = ncur; } *stat = 1; @@ -878,7 +878,7 @@ xfs_inobt_lookup( */ bp = cur->bc_bufs[level]; if (bp && XFS_BUF_ADDR(bp) != d) - bp = (xfs_buf_t *)0; + bp = NULL; if (!bp) { /* * Need to get a new buffer. Read it, then @@ -950,12 +950,12 @@ xfs_inobt_lookup( xfs_inobt_key_t *kkp; kkp = kkbase + keyno - 1; - startino = INT_GET(kkp->ir_startino, ARCH_CONVERT); + startino = be32_to_cpu(kkp->ir_startino); } else { xfs_inobt_rec_t *krp; krp = krbase + keyno - 1; - startino = INT_GET(krp->ir_startino, ARCH_CONVERT); + startino = be32_to_cpu(krp->ir_startino); } /* * Compute difference to get next direction. @@ -1117,7 +1117,7 @@ xfs_inobt_lshift( if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level))) return error; #endif - *lpp = *rpp; /* INT_: no-change copy */ + *lpp = *rpp; xfs_inobt_log_ptrs(cur, lbp, nrec, nrec); } /* @@ -1160,7 +1160,7 @@ xfs_inobt_lshift( } else { memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - key.ir_startino = rrp->ir_startino; /* INT_: direct copy */ + key.ir_startino = rrp->ir_startino; rkp = &key; } /* @@ -1297,13 +1297,13 @@ xfs_inobt_newroot( */ kp = XFS_INOBT_KEY_ADDR(new, 1, cur); if (be16_to_cpu(left->bb_level) > 0) { - kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); /* INT_: struct copy */ - kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); /* INT_: struct copy */ + kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); + kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); } else { rp = XFS_INOBT_REC_ADDR(left, 1, cur); - INT_COPY(kp[0].ir_startino, rp->ir_startino, ARCH_CONVERT); + kp[0].ir_startino = rp->ir_startino; rp = XFS_INOBT_REC_ADDR(right, 1, cur); - INT_COPY(kp[1].ir_startino, rp->ir_startino, ARCH_CONVERT); + kp[1].ir_startino = rp->ir_startino; } xfs_inobt_log_keys(cur, nbp, 1, 2); /* @@ -1410,8 +1410,8 @@ xfs_inobt_rshift( if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level))) return error; #endif - *rkp = *lkp; /* INT_: no change copy */ - *rpp = *lpp; /* INT_: no change copy */ + *rkp = *lkp; + *rpp = *lpp; xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); } else { @@ -1420,7 +1420,7 @@ xfs_inobt_rshift( memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); *rrp = *lrp; xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - key.ir_startino = rrp->ir_startino; /* INT_: direct copy */ + key.ir_startino = rrp->ir_startino; rkp = &key; } /* @@ -1559,7 +1559,7 @@ xfs_inobt_split( rrp = XFS_INOBT_REC_ADDR(right, 1, cur); memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */ + keyp->ir_startino = rrp->ir_startino; } /* * Find the left block number by looking in the buffer. @@ -1813,9 +1813,9 @@ xfs_inobt_get_rec( * Point to the record and extract its data. */ rec = XFS_INOBT_REC_ADDR(block, ptr, cur); - *ino = INT_GET(rec->ir_startino, ARCH_CONVERT); - *fcnt = INT_GET(rec->ir_freecount, ARCH_CONVERT); - *free = INT_GET(rec->ir_free, ARCH_CONVERT); + *ino = be32_to_cpu(rec->ir_startino); + *fcnt = be32_to_cpu(rec->ir_freecount); + *free = be64_to_cpu(rec->ir_free); *stat = 1; return 0; } @@ -1930,10 +1930,10 @@ xfs_inobt_insert( level = 0; nbno = NULLAGBLOCK; - INT_SET(nrec.ir_startino, ARCH_CONVERT, cur->bc_rec.i.ir_startino); - INT_SET(nrec.ir_freecount, ARCH_CONVERT, cur->bc_rec.i.ir_freecount); - INT_SET(nrec.ir_free, ARCH_CONVERT, cur->bc_rec.i.ir_free); - ncur = (xfs_btree_cur_t *)0; + nrec.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); + nrec.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount); + nrec.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free); + ncur = NULL; pcur = cur; /* * Loop going up the tree, starting at the leaf level. @@ -1965,7 +1965,7 @@ xfs_inobt_insert( */ if (ncur) { pcur = ncur; - ncur = (xfs_btree_cur_t *)0; + ncur = NULL; } } while (nbno != NULLAGBLOCK); *stat = i; @@ -2060,9 +2060,9 @@ xfs_inobt_update( /* * Fill in the new contents and log them. */ - INT_SET(rp->ir_startino, ARCH_CONVERT, ino); - INT_SET(rp->ir_freecount, ARCH_CONVERT, fcnt); - INT_SET(rp->ir_free, ARCH_CONVERT, free); + rp->ir_startino = cpu_to_be32(ino); + rp->ir_freecount = cpu_to_be32(fcnt); + rp->ir_free = cpu_to_be64(free); xfs_inobt_log_recs(cur, bp, ptr, ptr); /* * Updating first record in leaf. Pass new key value up to our parent. @@ -2070,7 +2070,7 @@ xfs_inobt_update( if (ptr == 1) { xfs_inobt_key_t key; /* key containing [ino] */ - INT_SET(key.ir_startino, ARCH_CONVERT, ino); + key.ir_startino = cpu_to_be32(ino); if ((error = xfs_inobt_updkey(cur, &key, 1))) return error; } diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index ae3904cb1ee..2c0e49893ff 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -47,19 +47,24 @@ static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) /* * Data record structure */ -typedef struct xfs_inobt_rec -{ +typedef struct xfs_inobt_rec { + __be32 ir_startino; /* starting inode number */ + __be32 ir_freecount; /* count of free inodes (set bits) */ + __be64 ir_free; /* free inode mask */ +} xfs_inobt_rec_t; + +typedef struct xfs_inobt_rec_incore { xfs_agino_t ir_startino; /* starting inode number */ __int32_t ir_freecount; /* count of free inodes (set bits) */ xfs_inofree_t ir_free; /* free inode mask */ -} xfs_inobt_rec_t; +} xfs_inobt_rec_incore_t; + /* * Key structure */ -typedef struct xfs_inobt_key -{ - xfs_agino_t ir_startino; /* starting inode number */ +typedef struct xfs_inobt_key { + __be32 ir_startino; /* starting inode number */ } xfs_inobt_key_t; /* btree pointer type */ @@ -77,7 +82,7 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; #define XFS_INOBT_IS_FREE(rp,i) \ (((rp)->ir_free & XFS_INOBT_MASK(i)) != 0) #define XFS_INOBT_IS_FREE_DISK(rp,i) \ - ((INT_GET((rp)->ir_free,ARCH_CONVERT) & XFS_INOBT_MASK(i)) != 0) + ((be64_to_cpu((rp)->ir_free) & XFS_INOBT_MASK(i)) != 0) #define XFS_INOBT_SET_FREE(rp,i) ((rp)->ir_free |= XFS_INOBT_MASK(i)) #define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i)) diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 0724df7fabb..b73d216ecaf 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -50,7 +50,7 @@ void xfs_ihash_init(xfs_mount_t *mp) { __uint64_t icount; - uint i, flags = KM_SLEEP | KM_MAYFAIL; + uint i; if (!mp->m_ihsize) { icount = mp->m_maxicount ? mp->m_maxicount : @@ -61,14 +61,13 @@ xfs_ihash_init(xfs_mount_t *mp) (64 * NBPP) / sizeof(xfs_ihash_t)); } - while (!(mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize * - sizeof(xfs_ihash_t), flags))) { - if ((mp->m_ihsize >>= 1) <= NBPP) - flags = KM_SLEEP; - } - for (i = 0; i < mp->m_ihsize; i++) { + mp->m_ihash = kmem_zalloc_greedy(&mp->m_ihsize, + NBPC * sizeof(xfs_ihash_t), + mp->m_ihsize * sizeof(xfs_ihash_t), + KM_SLEEP | KM_MAYFAIL | KM_LARGE); + mp->m_ihsize /= sizeof(xfs_ihash_t); + for (i = 0; i < mp->m_ihsize; i++) rwlock_init(&(mp->m_ihash[i].ih_lock)); - } } /* @@ -77,7 +76,7 @@ xfs_ihash_init(xfs_mount_t *mp) void xfs_ihash_free(xfs_mount_t *mp) { - kmem_free(mp->m_ihash, mp->m_ihsize*sizeof(xfs_ihash_t)); + kmem_free(mp->m_ihash, mp->m_ihsize * sizeof(xfs_ihash_t)); mp->m_ihash = NULL; } @@ -95,7 +94,7 @@ xfs_chash_init(xfs_mount_t *mp) mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize); mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize * sizeof(xfs_chash_t), - KM_SLEEP); + KM_SLEEP | KM_LARGE); for (i = 0; i < mp->m_chsize; i++) { spinlock_init(&mp->m_chash[i].ch_lock,"xfshash"); } @@ -244,7 +243,9 @@ again: XFS_STATS_INC(xs_ig_found); + spin_lock(&ip->i_flags_lock); ip->i_flags &= ~XFS_IRECLAIMABLE; + spin_unlock(&ip->i_flags_lock); version = ih->ih_version; read_unlock(&ih->ih_lock); xfs_ihash_promote(ih, ip, version); @@ -290,15 +291,17 @@ again: finish_inode: if (ip->i_d.di_mode == 0) { - if (!(flags & IGET_CREATE)) + if (!(flags & XFS_IGET_CREATE)) return ENOENT; xfs_iocore_inode_reinit(ip); } - + if (lock_flags != 0) xfs_ilock(ip, lock_flags); + spin_lock(&ip->i_flags_lock); ip->i_flags &= ~XFS_ISTALE; + spin_unlock(&ip->i_flags_lock); vn_trace_exit(vp, "xfs_iget.found", (inst_t *)__return_address); @@ -320,21 +323,20 @@ finish_inode: * Read the disk inode attributes into a new inode structure and get * a new vnode for it. This should also initialize i_ino and i_mount. */ - error = xfs_iread(mp, tp, ino, &ip, bno); - if (error) { + error = xfs_iread(mp, tp, ino, &ip, bno, + (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0); + if (error) return error; - } vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address); xfs_inode_lock_init(ip, vp); xfs_iocore_inode_init(ip); - if (lock_flags != 0) { + if (lock_flags) xfs_ilock(ip, lock_flags); - } - - if ((ip->i_d.di_mode == 0) && !(flags & IGET_CREATE)) { + + if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { xfs_idestroy(ip); return ENOENT; } @@ -369,7 +371,9 @@ finish_inode: ih->ih_next = ip; ip->i_udquot = ip->i_gdquot = NULL; ih->ih_version++; + spin_lock(&ip->i_flags_lock); ip->i_flags |= XFS_INEW; + spin_unlock(&ip->i_flags_lock); write_unlock(&ih->ih_lock); @@ -548,7 +552,7 @@ xfs_inode_lock_init( mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", vp->v_number); init_waitqueue_head(&ip->i_ipin_wait); atomic_set(&ip->i_pincount, 0); - init_sema(&ip->i_flock, 1, "xfsfino", vp->v_number); + initnsema(&ip->i_flock, 1, "xfsfino"); } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1f8ecff8553..c27d7d495aa 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -854,7 +854,8 @@ xfs_iread( xfs_trans_t *tp, xfs_ino_t ino, xfs_inode_t **ipp, - xfs_daddr_t bno) + xfs_daddr_t bno, + uint imap_flags) { xfs_buf_t *bp; xfs_dinode_t *dip; @@ -866,6 +867,7 @@ xfs_iread( ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP); ip->i_ino = ino; ip->i_mount = mp; + spin_lock_init(&ip->i_flags_lock); /* * Get pointer's to the on-disk inode and the buffer containing it. @@ -874,7 +876,7 @@ xfs_iread( * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will * know that this is a new incore inode. */ - error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, 0); + error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags); if (error) { kmem_zone_free(xfs_inode_zone, ip); return error; @@ -1113,7 +1115,7 @@ xfs_ialloc( * to prevent others from looking at until we're done. */ error = xfs_trans_iget(tp->t_mountp, tp, ino, - IGET_CREATE, XFS_ILOCK_EXCL, &ip); + XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); if (error != 0) { return error; } @@ -2213,7 +2215,9 @@ xfs_ifree_cluster( if (ip == free_ip) { if (xfs_iflock_nowait(ip)) { + spin_lock(&ip->i_flags_lock); ip->i_flags |= XFS_ISTALE; + spin_unlock(&ip->i_flags_lock); if (xfs_inode_clean(ip)) { xfs_ifunlock(ip); @@ -2227,7 +2231,9 @@ xfs_ifree_cluster( if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { if (xfs_iflock_nowait(ip)) { + spin_lock(&ip->i_flags_lock); ip->i_flags |= XFS_ISTALE; + spin_unlock(&ip->i_flags_lock); if (xfs_inode_clean(ip)) { xfs_ifunlock(ip); @@ -2257,7 +2263,9 @@ xfs_ifree_cluster( AIL_LOCK(mp,s); iip->ili_flush_lsn = iip->ili_item.li_lsn; AIL_UNLOCK(mp, s); + spin_lock(&iip->ili_inode->i_flags_lock); iip->ili_inode->i_flags |= XFS_ISTALE; + spin_unlock(&iip->ili_inode->i_flags_lock); pre_flushed++; } lip = lip->li_bio_list; @@ -2753,19 +2761,29 @@ xfs_iunpin( * call as the inode reclaim may be blocked waiting for * the inode to become unpinned. */ + struct inode *inode = NULL; + + spin_lock(&ip->i_flags_lock); if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { bhv_vnode_t *vp = XFS_ITOV_NULL(ip); /* make sync come back and flush this inode */ if (vp) { - struct inode *inode = vn_to_inode(vp); + inode = vn_to_inode(vp); if (!(inode->i_state & - (I_NEW|I_FREEING|I_CLEAR))) - mark_inode_dirty_sync(inode); + (I_NEW|I_FREEING|I_CLEAR))) { + inode = igrab(inode); + if (inode) + mark_inode_dirty_sync(inode); + } else + inode = NULL; } } + spin_unlock(&ip->i_flags_lock); wake_up(&ip->i_ipin_wait); + if (inode) + iput(inode); } } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index d10b76ed1e5..e96eb0835fe 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -267,6 +267,7 @@ typedef struct xfs_inode { sema_t i_flock; /* inode flush lock */ atomic_t i_pincount; /* inode pin count */ wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ + spinlock_t i_flags_lock; /* inode i_flags lock */ #ifdef HAVE_REFCACHE struct xfs_inode **i_refcache; /* ptr to entry in ref cache */ struct xfs_inode *i_release; /* inode to unref */ @@ -389,11 +390,14 @@ typedef struct xfs_inode { (((vfsp)->vfs_flag & VFS_GRPID) || ((pip)->i_d.di_mode & S_ISGID)) /* - * xfs_iget.c prototypes. + * Flags for xfs_iget() */ +#define XFS_IGET_CREATE 0x1 +#define XFS_IGET_BULKSTAT 0x2 -#define IGET_CREATE 1 - +/* + * xfs_iget.c prototypes. + */ void xfs_ihash_init(struct xfs_mount *); void xfs_ihash_free(struct xfs_mount *); void xfs_chash_init(struct xfs_mount *); @@ -425,7 +429,7 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *, xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **, xfs_daddr_t, uint); int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, - xfs_inode_t **, xfs_daddr_t); + xfs_inode_t **, xfs_daddr_t, uint); int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index f8e80d8e723..a7a92251eb5 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -743,21 +743,6 @@ xfs_inode_item_committed( } /* - * The transaction with the inode locked has aborted. The inode - * must not be dirty within the transaction (unless we're forcibly - * shutting down). We simply unlock just as if the transaction - * had been cancelled. - */ -STATIC void -xfs_inode_item_abort( - xfs_inode_log_item_t *iip) -{ - xfs_inode_item_unlock(iip); - return; -} - - -/* * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK * failed to get the inode flush lock but did get the inode locked SHARED. * Here we're trying to see if the inode buffer is incore, and if so whether it's @@ -915,7 +900,6 @@ STATIC struct xfs_item_ops xfs_inode_item_ops = { .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_inode_item_committed, .iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push, - .iop_abort = (void(*)(xfs_log_item_t*))xfs_inode_item_abort, .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf, .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_inode_item_committing diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 5db6cd1b4cf..bfe92ea1795 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -25,52 +25,54 @@ * must be added on to the end. */ typedef struct xfs_inode_log_format { - unsigned short ilf_type; /* inode log item type */ - unsigned short ilf_size; /* size of this item */ - uint ilf_fields; /* flags for fields logged */ - ushort ilf_asize; /* size of attr d/ext/root */ - ushort ilf_dsize; /* size of data/ext/root */ - xfs_ino_t ilf_ino; /* inode number */ + __uint16_t ilf_type; /* inode log item type */ + __uint16_t ilf_size; /* size of this item */ + __uint32_t ilf_fields; /* flags for fields logged */ + __uint16_t ilf_asize; /* size of attr d/ext/root */ + __uint16_t ilf_dsize; /* size of data/ext/root */ + __uint64_t ilf_ino; /* inode number */ union { - xfs_dev_t ilfu_rdev; /* rdev value for dev inode*/ + __uint32_t ilfu_rdev; /* rdev value for dev inode*/ uuid_t ilfu_uuid; /* mount point value */ } ilf_u; __int64_t ilf_blkno; /* blkno of inode buffer */ - int ilf_len; /* len of inode buffer */ - int ilf_boffset; /* off of inode in buffer */ + __int32_t ilf_len; /* len of inode buffer */ + __int32_t ilf_boffset; /* off of inode in buffer */ } xfs_inode_log_format_t; +#ifndef HAVE_FORMAT32 typedef struct xfs_inode_log_format_32 { - unsigned short ilf_type; /* 16: inode log item type */ - unsigned short ilf_size; /* 16: size of this item */ - uint ilf_fields; /* 32: flags for fields logged */ - ushort ilf_asize; /* 32: size of attr d/ext/root */ - ushort ilf_dsize; /* 32: size of data/ext/root */ - xfs_ino_t ilf_ino; /* 64: inode number */ + __uint16_t ilf_type; /* inode log item type */ + __uint16_t ilf_size; /* size of this item */ + __uint32_t ilf_fields; /* flags for fields logged */ + __uint16_t ilf_asize; /* size of attr d/ext/root */ + __uint16_t ilf_dsize; /* size of data/ext/root */ + __uint64_t ilf_ino; /* inode number */ union { - xfs_dev_t ilfu_rdev; /* 32: rdev value for dev inode*/ - uuid_t ilfu_uuid; /* 128: mount point value */ + __uint32_t ilfu_rdev; /* rdev value for dev inode*/ + uuid_t ilfu_uuid; /* mount point value */ } ilf_u; - __int64_t ilf_blkno; /* 64: blkno of inode buffer */ - int ilf_len; /* 32: len of inode buffer */ - int ilf_boffset; /* 32: off of inode in buffer */ + __int64_t ilf_blkno; /* blkno of inode buffer */ + __int32_t ilf_len; /* len of inode buffer */ + __int32_t ilf_boffset; /* off of inode in buffer */ } __attribute__((packed)) xfs_inode_log_format_32_t; +#endif typedef struct xfs_inode_log_format_64 { - unsigned short ilf_type; /* 16: inode log item type */ - unsigned short ilf_size; /* 16: size of this item */ - uint ilf_fields; /* 32: flags for fields logged */ - ushort ilf_asize; /* 32: size of attr d/ext/root */ - ushort ilf_dsize; /* 32: size of data/ext/root */ - __uint32_t ilf_pad; /* 32: pad for 64 bit boundary */ - xfs_ino_t ilf_ino; /* 64: inode number */ + __uint16_t ilf_type; /* inode log item type */ + __uint16_t ilf_size; /* size of this item */ + __uint32_t ilf_fields; /* flags for fields logged */ + __uint16_t ilf_asize; /* size of attr d/ext/root */ + __uint16_t ilf_dsize; /* size of data/ext/root */ + __uint32_t ilf_pad; /* pad for 64 bit boundary */ + __uint64_t ilf_ino; /* inode number */ union { - xfs_dev_t ilfu_rdev; /* 32: rdev value for dev inode*/ - uuid_t ilfu_uuid; /* 128: mount point value */ + __uint32_t ilfu_rdev; /* rdev value for dev inode*/ + uuid_t ilfu_uuid; /* mount point value */ } ilf_u; - __int64_t ilf_blkno; /* 64: blkno of inode buffer */ - int ilf_len; /* 32: len of inode buffer */ - int ilf_boffset; /* 32: off of inode in buffer */ + __int64_t ilf_blkno; /* blkno of inode buffer */ + __int32_t ilf_len; /* len of inode buffer */ + __int32_t ilf_boffset; /* off of inode in buffer */ } xfs_inode_log_format_64_t; /* diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index f1949c16df1..19655124da7 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -398,6 +398,23 @@ xfs_flush_space( return 1; } +STATIC int +xfs_cmn_err_fsblock_zero( + xfs_inode_t *ip, + xfs_bmbt_irec_t *imap) +{ + xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, + "Access to block zero in inode %llu " + "start_block: %llx start_off: %llx " + "blkcnt: %llx extent-state: %x\n", + (unsigned long long)ip->i_ino, + (unsigned long long)imap->br_startblock, + (unsigned long long)imap->br_startoff, + (unsigned long long)imap->br_blockcount, + imap->br_state); + return EFSCORRUPTED; +} + int xfs_iomap_write_direct( xfs_inode_t *ip, @@ -536,23 +553,17 @@ xfs_iomap_write_direct( * Copy any maps to caller's array and return any error. */ if (nimaps == 0) { - error = (ENOSPC); + error = ENOSPC; + goto error_out; + } + + if (unlikely(!imap.br_startblock && !(io->io_flags & XFS_IOCORE_RT))) { + error = xfs_cmn_err_fsblock_zero(ip, &imap); goto error_out; } *ret_imap = imap; *nmaps = 1; - if ( !(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) { - cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld " - "start_block : %llx start_off : %llx blkcnt : %llx " - "extent-state : %x \n", - (ip->i_mount)->m_fsname, - (long long)ip->i_ino, - (unsigned long long)ret_imap->br_startblock, - (unsigned long long)ret_imap->br_startoff, - (unsigned long long)ret_imap->br_blockcount, - ret_imap->br_state); - } return 0; error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ @@ -715,17 +726,8 @@ retry: goto retry; } - if (!(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) { - cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld " - "start_block : %llx start_off : %llx blkcnt : %llx " - "extent-state : %x \n", - (ip->i_mount)->m_fsname, - (long long)ip->i_ino, - (unsigned long long)ret_imap->br_startblock, - (unsigned long long)ret_imap->br_startoff, - (unsigned long long)ret_imap->br_blockcount, - ret_imap->br_state); - } + if (unlikely(!imap[0].br_startblock && !(io->io_flags & XFS_IOCORE_RT))) + return xfs_cmn_err_fsblock_zero(ip, &imap[0]); *ret_imap = imap[0]; *nmaps = 1; @@ -853,24 +855,10 @@ xfs_iomap_write_allocate( * See if we were able to allocate an extent that * covers at least part of the callers request */ - for (i = 0; i < nimaps; i++) { - if (!(io->io_flags & XFS_IOCORE_RT) && - !imap[i].br_startblock) { - cmn_err(CE_PANIC,"Access to block zero: " - "fs <%s> inode: %lld " - "start_block : %llx start_off : %llx " - "blkcnt : %llx extent-state : %x \n", - (ip->i_mount)->m_fsname, - (long long)ip->i_ino, - (unsigned long long) - imap[i].br_startblock, - (unsigned long long) - imap[i].br_startoff, - (unsigned long long) - imap[i].br_blockcount, - imap[i].br_state); - } + if (unlikely(!imap[i].br_startblock && + !(io->io_flags & XFS_IOCORE_RT))) + return xfs_cmn_err_fsblock_zero(ip, &imap[i]); if ((offset_fsb >= imap[i].br_startoff) && (offset_fsb < (imap[i].br_startoff + imap[i].br_blockcount))) { @@ -941,7 +929,7 @@ xfs_iomap_write_unwritten( XFS_WRITE_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); - goto error0; + return XFS_ERROR(error); } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -967,19 +955,11 @@ xfs_iomap_write_unwritten( error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) - goto error0; - - if ( !(io->io_flags & XFS_IOCORE_RT) && !imap.br_startblock) { - cmn_err(CE_PANIC,"Access to block zero: fs <%s> " - "inode: %lld start_block : %llx start_off : " - "%llx blkcnt : %llx extent-state : %x \n", - (ip->i_mount)->m_fsname, - (long long)ip->i_ino, - (unsigned long long)imap.br_startblock, - (unsigned long long)imap.br_startoff, - (unsigned long long)imap.br_blockcount, - imap.br_state); - } + return XFS_ERROR(error); + + if (unlikely(!imap.br_startblock && + !(io->io_flags & XFS_IOCORE_RT))) + return xfs_cmn_err_fsblock_zero(ip, &imap); if ((numblks_fsb = imap.br_blockcount) == 0) { /* @@ -999,6 +979,5 @@ error_on_bmapi_transaction: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); xfs_iunlock(ip, XFS_ILOCK_EXCL); -error0: return XFS_ERROR(error); } diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 46249e4d1fe..7775ddc0b3c 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -39,6 +39,16 @@ #include "xfs_error.h" #include "xfs_btree.h" +int +xfs_internal_inum( + xfs_mount_t *mp, + xfs_ino_t ino) +{ + return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || + (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) && + (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); +} + STATIC int xfs_bulkstat_one_iget( xfs_mount_t *mp, /* mount point for filesystem */ @@ -52,7 +62,8 @@ xfs_bulkstat_one_iget( bhv_vnode_t *vp; int error; - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno); + error = xfs_iget(mp, NULL, ino, + XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); if (error) { *stat = BULKSTAT_RV_NOTHING; return error; @@ -212,17 +223,12 @@ xfs_bulkstat_one( xfs_dinode_t *dip; /* dinode inode pointer */ dip = (xfs_dinode_t *)dibuff; + *stat = BULKSTAT_RV_NOTHING; - if (!buffer || ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || - (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) && - (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))) { - *stat = BULKSTAT_RV_NOTHING; + if (!buffer || xfs_internal_inum(mp, ino)) return XFS_ERROR(EINVAL); - } - if (ubsize < sizeof(*buf)) { - *stat = BULKSTAT_RV_NOTHING; + if (ubsize < sizeof(*buf)) return XFS_ERROR(ENOMEM); - } buf = kmem_alloc(sizeof(*buf), KM_SLEEP); @@ -238,8 +244,7 @@ xfs_bulkstat_one( } if (copy_to_user(buffer, buf, sizeof(*buf))) { - *stat = BULKSTAT_RV_NOTHING; - error = EFAULT; + error = EFAULT; goto out_free; } @@ -253,6 +258,46 @@ xfs_bulkstat_one( } /* + * Test to see whether we can use the ondisk inode directly, based + * on the given bulkstat flags, filling in dipp accordingly. + * Returns zero if the inode is dodgey. + */ +STATIC int +xfs_bulkstat_use_dinode( + xfs_mount_t *mp, + int flags, + xfs_buf_t *bp, + int clustidx, + xfs_dinode_t **dipp) +{ + xfs_dinode_t *dip; + unsigned int aformat; + + *dipp = NULL; + if (!bp || (flags & BULKSTAT_FG_IGET)) + return 1; + dip = (xfs_dinode_t *) + xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog); + if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC || + !XFS_DINODE_GOOD_VERSION( + INT_GET(dip->di_core.di_version, ARCH_CONVERT))) + return 0; + if (flags & BULKSTAT_FG_QUICK) { + *dipp = dip; + return 1; + } + /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ + aformat = INT_GET(dip->di_core.di_aformat, ARCH_CONVERT); + if ((XFS_CFORK_Q(&dip->di_core) == 0) || + (aformat == XFS_DINODE_FMT_LOCAL) || + (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) { + *dipp = dip; + return 1; + } + return 1; +} + +/* * Return stat information in bulk (by-inode) for the filesystem. */ int /* error status */ @@ -284,10 +329,11 @@ xfs_bulkstat( xfs_agino_t gino; /* current btree rec's start inode */ int i; /* loop index */ int icount; /* count of inodes good in irbuf */ + size_t irbsize; /* size of irec buffer in bytes */ xfs_ino_t ino; /* inode number (filesystem) */ - xfs_inobt_rec_t *irbp; /* current irec buffer pointer */ - xfs_inobt_rec_t *irbuf; /* start of irec buffer */ - xfs_inobt_rec_t *irbufend; /* end of good irec buffer entries */ + xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */ + xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ + xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ xfs_ino_t lastino=0; /* last inode number returned */ int nbcluster; /* # of blocks in a cluster */ int nicluster; /* # of inodes in a cluster */ @@ -328,13 +374,10 @@ xfs_bulkstat( (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); nimask = ~(nicluster - 1); nbcluster = nicluster >> mp->m_sb.sb_inopblog; - /* - * Allocate a page-sized buffer for inode btree records. - * We could try allocating something smaller, but for normal - * calls we'll always (potentially) need the whole page. - */ - irbuf = kmem_alloc(NBPC, KM_SLEEP); - nirbuf = NBPC / sizeof(*irbuf); + irbuf = kmem_zalloc_greedy(&irbsize, NBPC, NBPC * 4, + KM_SLEEP | KM_MAYFAIL | KM_LARGE); + nirbuf = irbsize / sizeof(*irbuf); + /* * Loop over the allocation groups, starting from the last * inode returned; 0 means start of the allocation group. @@ -358,7 +401,7 @@ xfs_bulkstat( * Allocate and initialize a btree cursor for ialloc btree. */ cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO, - (xfs_inode_t *)0, 0); + (xfs_inode_t *)0, 0); irbp = irbuf; irbufend = irbuf + nirbuf; end_of_ag = 0; @@ -395,9 +438,9 @@ xfs_bulkstat( gcnt++; } gfree |= XFS_INOBT_MASKN(0, chunkidx); - INT_SET(irbp->ir_startino, ARCH_CONVERT, gino); - INT_SET(irbp->ir_freecount, ARCH_CONVERT, gcnt); - INT_SET(irbp->ir_free, ARCH_CONVERT, gfree); + irbp->ir_startino = gino; + irbp->ir_freecount = gcnt; + irbp->ir_free = gfree; irbp++; agino = gino + XFS_INODES_PER_CHUNK; icount = XFS_INODES_PER_CHUNK - gcnt; @@ -451,11 +494,27 @@ xfs_bulkstat( } /* * If this chunk has any allocated inodes, save it. + * Also start read-ahead now for this chunk. */ if (gcnt < XFS_INODES_PER_CHUNK) { - INT_SET(irbp->ir_startino, ARCH_CONVERT, gino); - INT_SET(irbp->ir_freecount, ARCH_CONVERT, gcnt); - INT_SET(irbp->ir_free, ARCH_CONVERT, gfree); + /* + * Loop over all clusters in the next chunk. + * Do a readahead if there are any allocated + * inodes in that cluster. + */ + for (agbno = XFS_AGINO_TO_AGBNO(mp, gino), + chunkidx = 0; + chunkidx < XFS_INODES_PER_CHUNK; + chunkidx += nicluster, + agbno += nbcluster) { + if (XFS_INOBT_MASKN(chunkidx, + nicluster) & ~gfree) + xfs_btree_reada_bufs(mp, agno, + agbno, nbcluster); + } + irbp->ir_startino = gino; + irbp->ir_freecount = gcnt; + irbp->ir_free = gfree; irbp++; icount += XFS_INODES_PER_CHUNK - gcnt; } @@ -479,33 +538,11 @@ xfs_bulkstat( for (irbp = irbuf; irbp < irbufend && ubleft >= statstruct_size; irbp++) { /* - * Read-ahead the next chunk's worth of inodes. - */ - if (&irbp[1] < irbufend) { - /* - * Loop over all clusters in the next chunk. - * Do a readahead if there are any allocated - * inodes in that cluster. - */ - for (agbno = XFS_AGINO_TO_AGBNO(mp, - INT_GET(irbp[1].ir_startino, ARCH_CONVERT)), - chunkidx = 0; - chunkidx < XFS_INODES_PER_CHUNK; - chunkidx += nicluster, - agbno += nbcluster) { - if (XFS_INOBT_MASKN(chunkidx, - nicluster) & - ~(INT_GET(irbp[1].ir_free, ARCH_CONVERT))) - xfs_btree_reada_bufs(mp, agno, - agbno, nbcluster); - } - } - /* * Now process this chunk of inodes. */ - for (agino = INT_GET(irbp->ir_startino, ARCH_CONVERT), chunkidx = 0, clustidx = 0; + for (agino = irbp->ir_startino, chunkidx = clustidx = 0; ubleft > 0 && - INT_GET(irbp->ir_freecount, ARCH_CONVERT) < XFS_INODES_PER_CHUNK; + irbp->ir_freecount < XFS_INODES_PER_CHUNK; chunkidx++, clustidx++, agino++) { ASSERT(chunkidx < XFS_INODES_PER_CHUNK); /* @@ -525,11 +562,12 @@ xfs_bulkstat( */ if ((chunkidx & (nicluster - 1)) == 0) { agbno = XFS_AGINO_TO_AGBNO(mp, - INT_GET(irbp->ir_startino, ARCH_CONVERT)) + + irbp->ir_startino) + ((chunkidx & nimask) >> mp->m_sb.sb_inopblog); - if (flags & BULKSTAT_FG_QUICK) { + if (flags & (BULKSTAT_FG_QUICK | + BULKSTAT_FG_INLINE)) { ino = XFS_AGINO_TO_INO(mp, agno, agino); bno = XFS_AGB_TO_DADDR(mp, agno, @@ -543,6 +581,7 @@ xfs_bulkstat( KM_SLEEP); ip->i_ino = ino; ip->i_mount = mp; + spin_lock_init(&ip->i_flags_lock); if (bp) xfs_buf_relse(bp); error = xfs_itobp(mp, NULL, ip, @@ -564,30 +603,34 @@ xfs_bulkstat( /* * Skip if this inode is free. */ - if (XFS_INOBT_MASK(chunkidx) & INT_GET(irbp->ir_free, ARCH_CONVERT)) + if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) continue; /* * Count used inodes as free so we can tell * when the chunk is used up. */ - INT_MOD(irbp->ir_freecount, ARCH_CONVERT, +1); + irbp->ir_freecount++; ino = XFS_AGINO_TO_INO(mp, agno, agino); bno = XFS_AGB_TO_DADDR(mp, agno, agbno); - if (flags & BULKSTAT_FG_QUICK) { - dip = (xfs_dinode_t *)xfs_buf_offset(bp, - (clustidx << mp->m_sb.sb_inodelog)); - - if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) - != XFS_DINODE_MAGIC - || !XFS_DINODE_GOOD_VERSION( - INT_GET(dip->di_core.di_version, ARCH_CONVERT))) - continue; + if (!xfs_bulkstat_use_dinode(mp, flags, bp, + clustidx, &dip)) + continue; + /* + * If we need to do an iget, cannot hold bp. + * Drop it, until starting the next cluster. + */ + if ((flags & BULKSTAT_FG_INLINE) && !dip) { + if (bp) + xfs_buf_relse(bp); + bp = NULL; } /* * Get the inode and fill in a single buffer. * BULKSTAT_FG_QUICK uses dip to fill it in. * BULKSTAT_FG_IGET uses igets. + * BULKSTAT_FG_INLINE uses dip if we have an + * inline attr fork, else igets. * See: xfs_bulkstat_one & xfs_dm_bulkstat_one. * This is also used to count inodes/blks, etc * in xfs_qm_quotacheck. @@ -597,8 +640,15 @@ xfs_bulkstat( ubleft, private_data, bno, &ubused, dip, &fmterror); if (fmterror == BULKSTAT_RV_NOTHING) { - if (error == ENOMEM) + if (error == EFAULT) { + ubleft = 0; + rval = error; + break; + } + else if (error == ENOMEM) ubleft = 0; + else + lastino = ino; continue; } if (fmterror == BULKSTAT_RV_GIVEUP) { @@ -633,7 +683,7 @@ xfs_bulkstat( /* * Done, we're either out of filesystem or space to put the data. */ - kmem_free(irbuf, NBPC); + kmem_free(irbuf, irbsize); *ubcountp = ubelem; if (agno >= mp->m_sb.sb_agcount) { /* diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index be5f12e07d2..f25a28862a1 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -36,15 +36,16 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, /* * Values for stat return value. */ -#define BULKSTAT_RV_NOTHING 0 -#define BULKSTAT_RV_DIDONE 1 -#define BULKSTAT_RV_GIVEUP 2 +#define BULKSTAT_RV_NOTHING 0 +#define BULKSTAT_RV_DIDONE 1 +#define BULKSTAT_RV_GIVEUP 2 /* * Values for bulkstat flag argument. */ -#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */ -#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */ +#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */ +#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */ +#define BULKSTAT_FG_INLINE 0x4 /* No iget if inline attrs */ /* * Return stat information in bulk (by-inode) for the filesystem. @@ -80,6 +81,11 @@ xfs_bulkstat_one( void *dibuff, int *stat); +int +xfs_internal_inum( + xfs_mount_t *mp, + xfs_ino_t ino); + int /* error status */ xfs_inumbers( xfs_mount_t *mp, /* mount point for filesystem */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 21ac1a67e3e..c48bf61f17b 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -617,7 +617,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) reg[0].i_len = sizeof(magic); XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_UNMOUNT); - error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0, 0); + error = xfs_log_reserve(mp, 600, 1, &tic, + XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); if (!error) { /* remove inited flag */ ((xlog_ticket_t *)tic)->t_flags = 0; @@ -655,8 +656,11 @@ xfs_log_unmount_write(xfs_mount_t *mp) } else { LOG_UNLOCK(log, s); } - if (tic) + if (tic) { + xlog_trace_loggrant(log, tic, "unmount rec"); + xlog_ungrant_log_space(log, tic); xlog_state_put_ticket(log, tic); + } } else { /* * We're already in forced_shutdown mode, couldn't @@ -1196,7 +1200,7 @@ xlog_alloc_log(xfs_mount_t *mp, kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); iclog = *iclogp; iclog->hic_data = (xlog_in_core_2_t *) - kmem_zalloc(iclogsize, KM_SLEEP); + kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE); iclog->ic_prev = prev_iclog; prev_iclog = iclog; @@ -2212,9 +2216,13 @@ xlog_state_do_callback( iclog = iclog->ic_next; } while (first_iclog != iclog); - if (repeats && (repeats % 10) == 0) { + + if (repeats > 5000) { + flushcnt += repeats; + repeats = 0; xfs_fs_cmn_err(CE_WARN, log->l_mp, - "xlog_state_do_callback: looping %d", repeats); + "%s: possible infinite loop (%d iterations)", + __FUNCTION__, flushcnt); } } while (!ioerrors && loopdidcallbacks); @@ -2246,6 +2254,7 @@ xlog_state_do_callback( } #endif + flushcnt = 0; if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) { flushcnt = log->l_flushcnt; log->l_flushcnt = 0; diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index eacb3d4987f..ebbe93f4f97 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -48,16 +48,10 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) */ /* - * Flags to xfs_log_mount - */ -#define XFS_LOG_RECOVER 0x1 - -/* * Flags to xfs_log_done() */ #define XFS_LOG_REL_PERM_RESERV 0x1 - /* * Flags to xfs_log_reserve() * @@ -70,8 +64,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) #define XFS_LOG_SLEEP 0x0 #define XFS_LOG_NOSLEEP 0x1 #define XFS_LOG_PERM_RESERV 0x2 -#define XFS_LOG_RESV_ALL (XFS_LOG_NOSLEEP|XFS_LOG_PERM_RESERV) - /* * Flags to xfs_log_force() diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 34bcbf50789..9bd3cdf11a8 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -32,7 +32,6 @@ struct xfs_mount; #define XLOG_MIN_ICLOGS 2 #define XLOG_MED_ICLOGS 4 #define XLOG_MAX_ICLOGS 8 -#define XLOG_CALLBACK_SIZE 10 #define XLOG_HEADER_MAGIC_NUM 0xFEEDbabe /* Invalid cycle number */ #define XLOG_VERSION_1 1 #define XLOG_VERSION_2 2 /* Large IClogs, Log sunit */ @@ -149,9 +148,6 @@ struct xfs_mount; #define XLOG_WAS_CONT_TRANS 0x08 /* Cont this trans into new region */ #define XLOG_END_TRANS 0x10 /* End a continued transaction */ #define XLOG_UNMOUNT_TRANS 0x20 /* Unmount a filesystem transaction */ -#define XLOG_SKIP_TRANS (XLOG_COMMIT_TRANS | XLOG_CONTINUE_TRANS | \ - XLOG_WAS_CONT_TRANS | XLOG_END_TRANS | \ - XLOG_UNMOUNT_TRANS) #ifdef __KERNEL__ /* @@ -506,6 +502,12 @@ extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); #define XLOG_TRACE_SLEEP_FLUSH 3 #define XLOG_TRACE_WAKE_FLUSH 4 +/* + * Unmount record type is used as a pseudo transaction type for the ticket. + * It's value must be outside the range of XFS_TRANS_* values. + */ +#define XLOG_UNMOUNT_REC_TYPE (-1U) + #endif /* __KERNEL__ */ #endif /* __XFS_LOG_PRIV_H__ */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b2bd4be4200..e5f396ff9a3 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -331,7 +331,7 @@ typedef struct xfs_mount { xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ lock_t m_agirotor_lock;/* .. and lock protecting it */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ - uint m_ihsize; /* size of next field */ + size_t m_ihsize; /* size of next field */ struct xfs_ihash *m_ihash; /* fs private inode hash table*/ struct xfs_inode *m_inodes; /* active inode list */ struct list_head m_del_inodes; /* inodes to reclaim */ @@ -541,7 +541,8 @@ static inline xfs_mount_t *xfs_bhvtom(bhv_desc_t *bdp) #define XFS_VFSTOM(vfs) xfs_vfstom(vfs) static inline xfs_mount_t *xfs_vfstom(bhv_vfs_t *vfs) { - return XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfs), &xfs_vfsops)); + return XFS_BHVTOM(bhv_lookup_range(VFS_BHVHEAD(vfs), + VFS_POSITION_XFS, VFS_POSITION_XFS)); } #define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d) diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index acb853b33eb..9dcb32aa4e2 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -281,8 +281,6 @@ typedef struct xfs_qoff_logformat { XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\ XFS_GQUOTA_ACCT) -#define XFS_MOUNT_QUOTA_MASK (XFS_MOUNT_QUOTA_ALL | XFS_UQUOTA_ACTIVE | \ - XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE) /* diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 5a0b678956e..880c73271c0 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1948,7 +1948,7 @@ xfs_growfs_rt( */ nrextents = nrblocks; do_div(nrextents, in->extsize); - nrbmblocks = roundup_64(nrextents, NBBY * sbp->sb_blocksize); + nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize); nrextslog = xfs_highbit32(nrextents); nrsumlevels = nrextslog + 1; nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * nrbmblocks; @@ -1976,7 +1976,10 @@ xfs_growfs_rt( if ((error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_sb.sb_rsumino))) return error; - nmp = NULL; + /* + * Allocate a new (fake) mount/sb. + */ + nmp = kmem_alloc(sizeof(*nmp), KM_SLEEP); /* * Loop over the bitmap blocks. * We will do everything one bitmap block at a time. @@ -1987,10 +1990,6 @@ xfs_growfs_rt( ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0); bmbno < nrbmblocks; bmbno++) { - /* - * Allocate a new (fake) mount/sb. - */ - nmp = kmem_alloc(sizeof(*nmp), KM_SLEEP); *nmp = *mp; nsbp = &nmp->m_sb; /* @@ -2018,13 +2017,13 @@ xfs_growfs_rt( cancelflags = 0; if ((error = xfs_trans_reserve(tp, 0, XFS_GROWRTFREE_LOG_RES(nmp), 0, 0, 0))) - goto error_exit; + break; /* * Lock out other callers by grabbing the bitmap inode lock. */ if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, XFS_ILOCK_EXCL, &ip))) - goto error_exit; + break; ASSERT(ip == mp->m_rbmip); /* * Update the bitmap inode's size. @@ -2038,7 +2037,7 @@ xfs_growfs_rt( */ if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, XFS_ILOCK_EXCL, &ip))) - goto error_exit; + break; ASSERT(ip == mp->m_rsumip); /* * Update the summary inode's size. @@ -2053,7 +2052,7 @@ xfs_growfs_rt( mp->m_rsumlevels != nmp->m_rsumlevels) { error = xfs_rtcopy_summary(mp, nmp, tp); if (error) - goto error_exit; + break; } /* * Update superblock fields. @@ -2080,18 +2079,13 @@ xfs_growfs_rt( error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents, nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno); if (error) - goto error_exit; + break; /* * Mark more blocks free in the superblock. */ xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, nsbp->sb_rextents - sbp->sb_rextents); /* - * Free the fake mp structure. - */ - kmem_free(nmp, sizeof(*nmp)); - nmp = NULL; - /* * Update mp values into the real mp structure. */ mp->m_rsumlevels = nrsumlevels; @@ -2101,15 +2095,15 @@ xfs_growfs_rt( */ xfs_trans_commit(tp, 0, NULL); } - return 0; + + if (error) + xfs_trans_cancel(tp, cancelflags); /* - * Error paths come here. + * Free the fake mp structure. */ -error_exit: - if (nmp) - kmem_free(nmp, sizeof(*nmp)); - xfs_trans_cancel(tp, cancelflags); + kmem_free(nmp, sizeof(*nmp)); + return error; } diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index bf168a91ddb..467854b45c8 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -60,10 +60,6 @@ struct xfs_mount; XFS_SB_VERSION_LOGV2BIT | \ XFS_SB_VERSION_SECTORBIT | \ XFS_SB_VERSION_MOREBITSBIT) -#define XFS_SB_VERSION_OKSASHBITS \ - (XFS_SB_VERSION_NUMBITS | \ - XFS_SB_VERSION_REALFBITS | \ - XFS_SB_VERSION_OKSASHFBITS) #define XFS_SB_VERSION_OKREALBITS \ (XFS_SB_VERSION_NUMBITS | \ XFS_SB_VERSION_OKREALFBITS | \ @@ -81,9 +77,6 @@ struct xfs_mount; #define XFS_SB_VERSION2_RESERVED2BIT 0x00000002 #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ -#define XFS_SB_VERSION2_SASHFBITS 0xff000000 /* Mask: features that - require changing - PROM and SASH */ #define XFS_SB_VERSION2_OKREALFBITS \ (XFS_SB_VERSION2_ATTR2BIT) @@ -238,12 +231,6 @@ static inline int xfs_sb_good_version(xfs_sb_t *sbp) } #endif /* __KERNEL__ */ -#define XFS_SB_GOOD_SASH_VERSION(sbp) \ - ((((sbp)->sb_versionnum >= XFS_SB_VERSION_1) && \ - ((sbp)->sb_versionnum <= XFS_SB_VERSION_3)) || \ - ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - !((sbp)->sb_versionnum & ~XFS_SB_VERSION_OKSASHBITS))) - #define XFS_SB_VERSION_TONEW(v) xfs_sb_version_tonew(v) static inline unsigned xfs_sb_version_tonew(unsigned v) { @@ -461,15 +448,6 @@ static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp) * File system sector to basic block conversions. */ #define XFS_FSS_TO_BB(mp,sec) ((sec) << (mp)->m_sectbb_log) -#define XFS_BB_TO_FSS(mp,bb) \ - (((bb) + (XFS_FSS_TO_BB(mp,1) - 1)) >> (mp)->m_sectbb_log) -#define XFS_BB_TO_FSST(mp,bb) ((bb) >> (mp)->m_sectbb_log) - -/* - * File system sector to byte conversions. - */ -#define XFS_FSS_TO_B(mp,sectno) ((xfs_fsize_t)(sectno) << (mp)->m_sb.sb_sectlog) -#define XFS_B_TO_FSST(mp,b) (((__uint64_t)(b)) >> (mp)->m_sb.sb_sectlog) /* * File system block to basic block conversions. diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 9dc88b38060..c68e00105d2 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -149,7 +149,6 @@ typedef struct xfs_item_ops { void (*iop_unlock)(xfs_log_item_t *); xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); void (*iop_push)(xfs_log_item_t *); - void (*iop_abort)(xfs_log_item_t *); void (*iop_pushbuf)(xfs_log_item_t *); void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); } xfs_item_ops_t; @@ -163,7 +162,6 @@ typedef struct xfs_item_ops { #define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) #define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) #define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip) -#define IOP_ABORT(ip) (*(ip)->li_ops->iop_abort)(ip) #define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip) #define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 558c87ff0c4..fc39b166d40 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -276,7 +276,7 @@ xfs_trans_update_ail( xfs_mount_t *mp, xfs_log_item_t *lip, xfs_lsn_t lsn, - unsigned long s) + unsigned long s) __releases(mp->m_ail_lock) { xfs_ail_entry_t *ailp; xfs_log_item_t *dlip=NULL; @@ -328,7 +328,7 @@ void xfs_trans_delete_ail( xfs_mount_t *mp, xfs_log_item_t *lip, - unsigned long s) + unsigned long s) __releases(mp->m_ail_lock) { xfs_ail_entry_t *ailp; xfs_log_item_t *dlip; diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 13edab8a9e9..447ac4308c9 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -46,11 +46,13 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, /* * From xfs_trans_ail.c */ -void xfs_trans_update_ail(struct xfs_mount *, - struct xfs_log_item *, xfs_lsn_t, - unsigned long); -void xfs_trans_delete_ail(struct xfs_mount *, - struct xfs_log_item *, unsigned long); +void xfs_trans_update_ail(struct xfs_mount *mp, + struct xfs_log_item *lip, xfs_lsn_t lsn, + unsigned long s) + __releases(mp->m_ail_lock); +void xfs_trans_delete_ail(struct xfs_mount *mp, + struct xfs_log_item *lip, unsigned long s) + __releases(mp->m_ail_lock); struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *); struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *, struct xfs_log_item *, int *, int *); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index a34796e57af..62336a4cc5a 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -1922,7 +1922,7 @@ xfs_showargs( } if (mp->m_flags & XFS_MOUNT_IHASHSIZE) - seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", mp->m_ihsize); + seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", (int)mp->m_ihsize); if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 23cfa583772..061e2ffdd1d 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2366,10 +2366,15 @@ xfs_remove( namelen = VNAMELEN(dentry); + if (!xfs_get_dir_entry(dentry, &ip)) { + dm_di_mode = ip->i_d.di_mode; + IRELE(ip); + } + if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, - name, NULL, 0, 0, 0); + name, NULL, dm_di_mode, 0, 0); if (error) return error; } @@ -2995,7 +3000,7 @@ xfs_rmdir( int cancel_flags; int committed; bhv_vnode_t *dir_vp; - int dm_di_mode = 0; + int dm_di_mode = S_IFDIR; int last_cdp_link; int namelen; uint resblks; @@ -3010,11 +3015,16 @@ xfs_rmdir( return XFS_ERROR(EIO); namelen = VNAMELEN(dentry); + if (!xfs_get_dir_entry(dentry, &cdp)) { + dm_di_mode = cdp->i_d.di_mode; + IRELE(cdp); + } + if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, - name, NULL, 0, 0, 0); + name, NULL, dm_di_mode, 0, 0); if (error) return XFS_ERROR(error); } @@ -3834,7 +3844,9 @@ xfs_reclaim( XFS_MOUNT_ILOCK(mp); vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); + spin_lock(&ip->i_flags_lock); ip->i_flags |= XFS_IRECLAIMABLE; + spin_unlock(&ip->i_flags_lock); XFS_MOUNT_IUNLOCK(mp); } return 0; @@ -3859,8 +3871,10 @@ xfs_finish_reclaim( * us. */ write_lock(&ih->ih_lock); + spin_lock(&ip->i_flags_lock); if ((ip->i_flags & XFS_IRECLAIM) || (!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) { + spin_unlock(&ip->i_flags_lock); write_unlock(&ih->ih_lock); if (locked) { xfs_ifunlock(ip); @@ -3869,6 +3883,7 @@ xfs_finish_reclaim( return 1; } ip->i_flags |= XFS_IRECLAIM; + spin_unlock(&ip->i_flags_lock); write_unlock(&ih->ih_lock); /* @@ -4272,7 +4287,7 @@ xfs_free_file_space( xfs_mount_t *mp; int nimap; uint resblks; - int rounding; + uint rounding; int rt; xfs_fileoff_t startoffset_fsb; xfs_trans_t *tp; @@ -4313,8 +4328,7 @@ xfs_free_file_space( vn_iowait(vp); /* wait for the completion of any pending DIOs */ } - rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog), - (__uint8_t)NBPP); + rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP); ilen = len + (offset & (rounding - 1)); ioffset = offset & ~(rounding - 1); if (ilen & (rounding - 1)) |