diff options
Diffstat (limited to 'fs')
33 files changed, 475 insertions, 279 deletions
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 63039ed9576..2bc5dc644b4 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1864,6 +1864,7 @@ cleanup: kfree(psinfo); kfree(notes); kfree(fpu); + kfree(shdr4extnum); #ifdef ELF_CORE_COPY_XFPREGS kfree(xfpu); #endif diff --git a/fs/block_dev.c b/fs/block_dev.c index 1a2421f908f..610e8e0b04b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -762,7 +762,19 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, if (!disk) return ERR_PTR(-ENXIO); - whole = bdget_disk(disk, 0); + /* + * Normally, @bdev should equal what's returned from bdget_disk() + * if partno is 0; however, some drivers (floppy) use multiple + * bdev's for the same physical device and @bdev may be one of the + * aliases. Keep @bdev if partno is 0. This means claimer + * tracking is broken for those devices but it has always been that + * way. + */ + if (partno) + whole = bdget_disk(disk, 0); + else + whole = bdgrab(bdev); + module_put(disk->fops->owner); put_disk(disk); if (!whole) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 300628795fd..3b859a3e6a0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -19,7 +19,6 @@ #ifndef __BTRFS_CTREE__ #define __BTRFS_CTREE__ -#include <linux/version.h> #include <linux/mm.h> #include <linux/highmem.h> #include <linux/fs.h> @@ -1336,6 +1335,11 @@ struct btrfs_ioctl_defrag_range_args { */ #define BTRFS_STRING_ITEM_KEY 253 +/* + * Flags for mount options. + * + * Note: don't forget to add new options to btrfs_show_options() + */ #define BTRFS_MOUNT_NODATASUM (1 << 0) #define BTRFS_MOUNT_NODATACOW (1 << 1) #define BTRFS_MOUNT_NOBARRIER (1 << 2) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index f1cbd028f7b..98c68e658a9 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -82,19 +82,16 @@ static inline struct btrfs_delayed_root *btrfs_get_delayed_root( return root->fs_info->delayed_root; } -static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( - struct inode *inode) +static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode) { - struct btrfs_delayed_node *node; struct btrfs_inode *btrfs_inode = BTRFS_I(inode); struct btrfs_root *root = btrfs_inode->root; u64 ino = btrfs_ino(inode); - int ret; + struct btrfs_delayed_node *node; -again: node = ACCESS_ONCE(btrfs_inode->delayed_node); if (node) { - atomic_inc(&node->refs); /* can be accessed */ + atomic_inc(&node->refs); return node; } @@ -102,8 +99,10 @@ again: node = radix_tree_lookup(&root->delayed_nodes_tree, ino); if (node) { if (btrfs_inode->delayed_node) { + atomic_inc(&node->refs); /* can be accessed */ + BUG_ON(btrfs_inode->delayed_node != node); spin_unlock(&root->inode_lock); - goto again; + return node; } btrfs_inode->delayed_node = node; atomic_inc(&node->refs); /* can be accessed */ @@ -113,6 +112,23 @@ again: } spin_unlock(&root->inode_lock); + return NULL; +} + +static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( + struct inode *inode) +{ + struct btrfs_delayed_node *node; + struct btrfs_inode *btrfs_inode = BTRFS_I(inode); + struct btrfs_root *root = btrfs_inode->root; + u64 ino = btrfs_ino(inode); + int ret; + +again: + node = btrfs_get_delayed_node(inode); + if (node) + return node; + node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS); if (!node) return ERR_PTR(-ENOMEM); @@ -548,19 +564,6 @@ struct btrfs_delayed_item *__btrfs_next_delayed_item( return next; } -static inline struct btrfs_delayed_node *btrfs_get_delayed_node( - struct inode *inode) -{ - struct btrfs_inode *btrfs_inode = BTRFS_I(inode); - struct btrfs_delayed_node *delayed_node; - - delayed_node = btrfs_inode->delayed_node; - if (delayed_node) - atomic_inc(&delayed_node->refs); - - return delayed_node; -} - static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root, u64 root_id) { @@ -1404,8 +1407,7 @@ end: int btrfs_inode_delayed_dir_index_count(struct inode *inode) { - struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node; - int ret = 0; + struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); if (!delayed_node) return -ENOENT; @@ -1415,11 +1417,14 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode) * a new directory index is added into the delayed node and index_cnt * is updated now. So we needn't lock the delayed node. */ - if (!delayed_node->index_cnt) + if (!delayed_node->index_cnt) { + btrfs_release_delayed_node(delayed_node); return -EINVAL; + } BTRFS_I(inode)->index_cnt = delayed_node->index_cnt; - return ret; + btrfs_release_delayed_node(delayed_node); + return 0; } void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, @@ -1613,6 +1618,57 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, inode->i_ctime.tv_nsec); } +int btrfs_fill_inode(struct inode *inode, u32 *rdev) +{ + struct btrfs_delayed_node *delayed_node; + struct btrfs_inode_item *inode_item; + struct btrfs_timespec *tspec; + + delayed_node = btrfs_get_delayed_node(inode); + if (!delayed_node) + return -ENOENT; + + mutex_lock(&delayed_node->mutex); + if (!delayed_node->inode_dirty) { + mutex_unlock(&delayed_node->mutex); + btrfs_release_delayed_node(delayed_node); + return -ENOENT; + } + + inode_item = &delayed_node->inode_item; + + inode->i_uid = btrfs_stack_inode_uid(inode_item); + inode->i_gid = btrfs_stack_inode_gid(inode_item); + btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); + inode->i_mode = btrfs_stack_inode_mode(inode_item); + inode->i_nlink = btrfs_stack_inode_nlink(inode_item); + inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); + BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); + BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); + inode->i_rdev = 0; + *rdev = btrfs_stack_inode_rdev(inode_item); + BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); + + tspec = btrfs_inode_atime(inode_item); + inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec); + inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec); + + tspec = btrfs_inode_mtime(inode_item); + inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec); + inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec); + + tspec = btrfs_inode_ctime(inode_item); + inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec); + inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec); + + inode->i_generation = BTRFS_I(inode)->generation; + BTRFS_I(inode)->index_cnt = (u64)-1; + + mutex_unlock(&delayed_node->mutex); + btrfs_release_delayed_node(delayed_node); + return 0; +} + int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) { diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index d1a6a2915c6..8d27af4bd8b 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -119,6 +119,7 @@ void btrfs_kill_delayed_inode_items(struct inode *inode); int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); +int btrfs_fill_inode(struct inode *inode, u32 *rdev); /* Used for drop dead root */ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1f61bf5b496..71cd456fdb6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4842,7 +4842,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, u64 num_bytes, u64 empty_size, u64 search_start, u64 search_end, u64 hint_byte, struct btrfs_key *ins, - int data) + u64 data) { int ret = 0; struct btrfs_root *root = orig_root->fs_info->extent_root; @@ -4869,7 +4869,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, space_info = __find_space_info(root->fs_info, data); if (!space_info) { - printk(KERN_ERR "No space info for %d\n", data); + printk(KERN_ERR "No space info for %llu\n", data); return -ENOSPC; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9f985a42987..bf0d61567f3 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1893,9 +1893,12 @@ void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) while ((node = rb_last(&ctl->free_space_offset)) != NULL) { info = rb_entry(node, struct btrfs_free_space, offset_index); - unlink_free_space(ctl, info); - kfree(info->bitmap); - kmem_cache_free(btrfs_free_space_cachep, info); + if (!info->bitmap) { + unlink_free_space(ctl, info); + kmem_cache_free(btrfs_free_space_cachep, info); + } else { + free_bitmap(ctl, info); + } if (need_resched()) { spin_unlock(&ctl->tree_lock); cond_resched(); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0a9b10c5b0a..3601f0aebdd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2509,6 +2509,11 @@ static void btrfs_read_locked_inode(struct inode *inode) int maybe_acls; u32 rdev; int ret; + bool filled = false; + + ret = btrfs_fill_inode(inode, &rdev); + if (!ret) + filled = true; path = btrfs_alloc_path(); BUG_ON(!path); @@ -2520,6 +2525,10 @@ static void btrfs_read_locked_inode(struct inode *inode) goto make_bad; leaf = path->nodes[0]; + + if (filled) + goto cache_acl; + inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); if (!leaf->map_token) @@ -2556,7 +2565,7 @@ static void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->index_cnt = (u64)-1; BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); - +cache_acl: /* * try to precache a NULL acl entry for files that don't have * any xattrs or acls @@ -2572,7 +2581,6 @@ static void btrfs_read_locked_inode(struct inode *inode) } btrfs_free_path(path); - inode_item = NULL; switch (inode->i_mode & S_IFMT) { case S_IFREG: @@ -2670,12 +2678,14 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, int ret; /* - * If root is tree root, it means this inode is used to - * store free space information. And these inodes are updated - * when committing the transaction, so they needn't delaye to - * be updated, or deadlock will occured. + * If the inode is a free space inode, we can deadlock during commit + * if we put it into the delayed code. + * + * The data relocation inode should also be directly updated + * without delay */ - if (!is_free_space_inode(root, inode)) { + if (!is_free_space_inode(root, inode) + && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_delayed_update_inode(trans, root, inode); if (!ret) btrfs_set_inode_last_trans(trans, inode); @@ -4520,6 +4530,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode_tree_add(inode); trace_btrfs_inode_new(inode); + btrfs_set_inode_last_trans(trans, inode); return inode; fail: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0bb4ebbb71b..15634d4648d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -723,6 +723,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",clear_cache"); if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) seq_puts(seq, ",user_subvol_rm_allowed"); + if (btrfs_test_opt(root, ENOSPC_DEBUG)) + seq_puts(seq, ",enospc_debug"); + if (btrfs_test_opt(root, AUTO_DEFRAG)) + seq_puts(seq, ",autodefrag"); + if (btrfs_test_opt(root, INODE_MAP_CACHE)) + seq_puts(seq, ",inode_cache"); return 0; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1efa56e18f9..19450bc5363 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2098,7 +2098,8 @@ int btrfs_balance(struct btrfs_root *dev_root) chunk_root->root_key.objectid, found_key.objectid, found_key.offset); - BUG_ON(ret && ret != -ENOSPC); + if (ret && ret != -ENOSPC) + goto error; key.offset = found_key.offset - 1; } ret = 0; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 9542f07d0b9..4698a5c553d 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -290,7 +290,6 @@ static int striped_read(struct inode *inode, struct ceph_inode_info *ci = ceph_inode(inode); u64 pos, this_len; int io_align, page_align; - int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ int left, pages_left; int read; struct page **page_pos; @@ -326,12 +325,11 @@ more: ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); if (ret > 0) { - int didpages = - ((pos & ~PAGE_CACHE_MASK) + ret) >> PAGE_CACHE_SHIFT; + int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; if (read < pos - off) { dout(" zero gap %llu to %llu\n", off + read, pos); - ceph_zero_page_vector_range(page_off + read, + ceph_zero_page_vector_range(page_align + read, pos - off - read, pages); } pos += ret; @@ -356,7 +354,7 @@ more: left = inode->i_size - pos; dout("zero tail %d\n", left); - ceph_zero_page_vector_range(page_off + read, left, + ceph_zero_page_vector_range(page_align + read, left, pages); read += left; } @@ -478,9 +476,6 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, else pos = *offset; - io_align = pos & ~PAGE_MASK; - buf_align = (unsigned long)data & ~PAGE_MASK; - ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); if (ret < 0) return ret; @@ -504,6 +499,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, * boundary. this isn't atomic, unfortunately. :( */ more: + io_align = pos & ~PAGE_MASK; + buf_align = (unsigned long)data & ~PAGE_MASK; len = left; if (file->f_flags & O_DIRECT) { /* write from beginning of first page, regardless of @@ -593,6 +590,7 @@ out: pos += len; written += len; left -= len; + data += written; if (left) goto more; diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 53ed1ad2c11..f66cc162515 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -156,6 +156,6 @@ config CIFS_ACL config CIFS_NFSD_EXPORT bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" - depends on CIFS && EXPERIMENTAL + depends on CIFS && EXPERIMENTAL && BROKEN help Allows NFS server to export a CIFS mounted share (nfsd over cifs) diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index ffb1459dc6e..7260e11e21f 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -42,6 +42,7 @@ #define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ #define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */ #define CIFS_MOUNT_RWPIDFORWARD 0x80000 /* use pid forwarding for rw */ +#define CIFS_MOUNT_POSIXACL 0x100000 /* mirror of MS_POSIXACL in mnt_cifs_flags */ struct cifs_sb_info { struct rb_root tlink_tree; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 2f0c58646c1..3e298997629 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -104,8 +104,7 @@ cifs_sb_deactive(struct super_block *sb) } static int -cifs_read_super(struct super_block *sb, struct smb_vol *volume_info, - const char *devname, int silent) +cifs_read_super(struct super_block *sb) { struct inode *inode; struct cifs_sb_info *cifs_sb; @@ -113,22 +112,16 @@ cifs_read_super(struct super_block *sb, struct smb_vol *volume_info, cifs_sb = CIFS_SB(sb); - spin_lock_init(&cifs_sb->tlink_tree_lock); - cifs_sb->tlink_tree = RB_ROOT; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL) + sb->s_flags |= MS_POSIXACL; - rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); - if (rc) - return rc; - - cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages; + if (cifs_sb_master_tcon(cifs_sb)->ses->capabilities & CAP_LARGE_FILES) + sb->s_maxbytes = MAX_LFS_FILESIZE; + else + sb->s_maxbytes = MAX_NON_LFS; - rc = cifs_mount(sb, cifs_sb, volume_info, devname); - - if (rc) { - if (!silent) - cERROR(1, "cifs_mount failed w/return code = %d", rc); - goto out_mount_failed; - } + /* BB FIXME fix time_gran to be larger for LANMAN sessions */ + sb->s_time_gran = 100; sb->s_magic = CIFS_MAGIC_NUMBER; sb->s_op = &cifs_super_ops; @@ -170,37 +163,14 @@ out_no_root: if (inode) iput(inode); - cifs_umount(sb, cifs_sb); - -out_mount_failed: - bdi_destroy(&cifs_sb->bdi); return rc; } -static void -cifs_put_super(struct super_block *sb) +static void cifs_kill_sb(struct super_block *sb) { - int rc = 0; - struct cifs_sb_info *cifs_sb; - - cFYI(1, "In cifs_put_super"); - cifs_sb = CIFS_SB(sb); - if (cifs_sb == NULL) { - cFYI(1, "Empty cifs superblock info passed to unmount"); - return; - } - - rc = cifs_umount(sb, cifs_sb); - if (rc) - cERROR(1, "cifs_umount failed with return code %d", rc); - if (cifs_sb->mountdata) { - kfree(cifs_sb->mountdata); - cifs_sb->mountdata = NULL; - } - - unload_nls(cifs_sb->local_nls); - bdi_destroy(&cifs_sb->bdi); - kfree(cifs_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + kill_anon_super(sb); + cifs_umount(cifs_sb); } static int @@ -548,7 +518,6 @@ static int cifs_drop_inode(struct inode *inode) } static const struct super_operations cifs_super_ops = { - .put_super = cifs_put_super, .statfs = cifs_statfs, .alloc_inode = cifs_alloc_inode, .destroy_inode = cifs_destroy_inode, @@ -585,7 +554,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) full_path = cifs_build_path_to_root(vol, cifs_sb, cifs_sb_master_tcon(cifs_sb)); if (full_path == NULL) - return NULL; + return ERR_PTR(-ENOMEM); cFYI(1, "Get root dentry for %s", full_path); @@ -614,7 +583,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) dchild = d_alloc(dparent, &name); if (dchild == NULL) { dput(dparent); - dparent = NULL; + dparent = ERR_PTR(-ENOMEM); goto out; } } @@ -632,7 +601,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) if (rc) { dput(dchild); dput(dparent); - dparent = NULL; + dparent = ERR_PTR(rc); goto out; } alias = d_materialise_unique(dchild, inode); @@ -640,7 +609,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) dput(dchild); if (IS_ERR(alias)) { dput(dparent); - dparent = NULL; + dparent = ERR_PTR(-EINVAL); /* XXX */ goto out; } dchild = alias; @@ -660,6 +629,13 @@ out: return dparent; } +static int cifs_set_super(struct super_block *sb, void *data) +{ + struct cifs_mnt_data *mnt_data = data; + sb->s_fs_info = mnt_data->cifs_sb; + return set_anon_super(sb, NULL); +} + static struct dentry * cifs_do_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) @@ -673,82 +649,80 @@ cifs_do_mount(struct file_system_type *fs_type, cFYI(1, "Devname: %s flags: %d ", dev_name, flags); - rc = cifs_setup_volume_info(&volume_info, (char *)data, dev_name); - if (rc) - return ERR_PTR(rc); + volume_info = cifs_get_volume_info((char *)data, dev_name); + if (IS_ERR(volume_info)) + return ERR_CAST(volume_info); cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL); if (cifs_sb == NULL) { root = ERR_PTR(-ENOMEM); - goto out; + goto out_nls; + } + + cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL); + if (cifs_sb->mountdata == NULL) { + root = ERR_PTR(-ENOMEM); + goto out_cifs_sb; } cifs_setup_cifs_sb(volume_info, cifs_sb); + rc = cifs_mount(cifs_sb, volume_info); + if (rc) { + if (!(flags & MS_SILENT)) + cERROR(1, "cifs_mount failed w/return code = %d", rc); + root = ERR_PTR(rc); + goto out_mountdata; + } + mnt_data.vol = volume_info; mnt_data.cifs_sb = cifs_sb; mnt_data.flags = flags; - sb = sget(fs_type, cifs_match_super, set_anon_super, &mnt_data); + sb = sget(fs_type, cifs_match_super, cifs_set_super, &mnt_data); if (IS_ERR(sb)) { root = ERR_CAST(sb); - goto out_cifs_sb; + cifs_umount(cifs_sb); + goto out; } - if (sb->s_fs_info) { + if (sb->s_root) { cFYI(1, "Use existing superblock"); - goto out_shared; - } - - /* - * Copy mount params for use in submounts. Better to do - * the copy here and deal with the error before cleanup gets - * complicated post-mount. - */ - cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL); - if (cifs_sb->mountdata == NULL) { - root = ERR_PTR(-ENOMEM); - goto out_super; - } - - sb->s_flags = flags; - /* BB should we make this contingent on mount parm? */ - sb->s_flags |= MS_NODIRATIME | MS_NOATIME; - sb->s_fs_info = cifs_sb; + cifs_umount(cifs_sb); + } else { + sb->s_flags = flags; + /* BB should we make this contingent on mount parm? */ + sb->s_flags |= MS_NODIRATIME | MS_NOATIME; + + rc = cifs_read_super(sb); + if (rc) { + root = ERR_PTR(rc); + goto out_super; + } - rc = cifs_read_super(sb, volume_info, dev_name, - flags & MS_SILENT ? 1 : 0); - if (rc) { - root = ERR_PTR(rc); - goto out_super; + sb->s_flags |= MS_ACTIVE; } - sb->s_flags |= MS_ACTIVE; - root = cifs_get_root(volume_info, sb); - if (root == NULL) + if (IS_ERR(root)) goto out_super; cFYI(1, "dentry root is: %p", root); goto out; -out_shared: - root = cifs_get_root(volume_info, sb); - if (root) - cFYI(1, "dentry root is: %p", root); - goto out; - out_super: - kfree(cifs_sb->mountdata); deactivate_locked_super(sb); +out: + cifs_cleanup_volume_info(volume_info); + return root; +out_mountdata: + kfree(cifs_sb->mountdata); out_cifs_sb: - unload_nls(cifs_sb->local_nls); kfree(cifs_sb); - -out: - cifs_cleanup_volume_info(&volume_info); - return root; +out_nls: + unload_nls(volume_info->local_nls); + goto out; } static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, @@ -837,7 +811,7 @@ struct file_system_type cifs_fs_type = { .owner = THIS_MODULE, .name = "cifs", .mount = cifs_do_mount, - .kill_sb = kill_anon_super, + .kill_sb = cifs_kill_sb, /* .fs_flags */ }; const struct inode_operations cifs_dir_inode_ops = { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 953f84413c7..8df28e925e5 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -154,12 +154,11 @@ extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, struct cifs_sb_info *cifs_sb); extern int cifs_match_super(struct super_block *, void *); -extern void cifs_cleanup_volume_info(struct smb_vol **pvolume_info); -extern int cifs_setup_volume_info(struct smb_vol **pvolume_info, - char *mount_data, const char *devname); -extern int cifs_mount(struct super_block *, struct cifs_sb_info *, - struct smb_vol *, const char *); -extern int cifs_umount(struct super_block *, struct cifs_sb_info *); +extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info); +extern struct smb_vol *cifs_get_volume_info(char *mount_data, + const char *devname); +extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); +extern void cifs_umount(struct cifs_sb_info *); extern void cifs_dfs_release_automount_timer(void); void cifs_proc_init(void); void cifs_proc_clean(void); @@ -218,7 +217,8 @@ extern int get_dfs_path(int xid, struct cifs_ses *pSesInfo, struct dfs_info3_param **preferrals, int remap); extern void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, - struct super_block *sb, struct smb_vol *vol); + struct cifs_sb_info *cifs_sb, + struct smb_vol *vol); extern int CIFSSMBQFSInfo(const int xid, struct cifs_tcon *tcon, struct kstatfs *FSData); extern int SMBOldQFSInfo(const int xid, struct cifs_tcon *tcon, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 12cf72dd0c4..dbd669cc5bc 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -65,6 +65,8 @@ static int ip_connect(struct TCP_Server_Info *server); static int generic_ip_connect(struct TCP_Server_Info *server); static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); static void cifs_prune_tlinks(struct work_struct *work); +static int cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, + const char *devname); /* * cifs tcp session reconnection @@ -2240,8 +2242,8 @@ cifs_match_super(struct super_block *sb, void *data) rc = compare_mount_options(sb, mnt_data); out: - cifs_put_tlink(tlink); spin_unlock(&cifs_tcp_ses_lock); + cifs_put_tlink(tlink); return rc; } @@ -2474,14 +2476,6 @@ generic_ip_connect(struct TCP_Server_Info *server) if (rc < 0) return rc; - rc = socket->ops->connect(socket, saddr, slen, 0); - if (rc < 0) { - cFYI(1, "Error %d connecting to server", rc); - sock_release(socket); - server->ssocket = NULL; - return rc; - } - /* * Eventually check for other socket options to change from * the default. sock_setsockopt not used because it expects @@ -2510,6 +2504,14 @@ generic_ip_connect(struct TCP_Server_Info *server) socket->sk->sk_sndbuf, socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo); + rc = socket->ops->connect(socket, saddr, slen, 0); + if (rc < 0) { + cFYI(1, "Error %d connecting to server", rc); + sock_release(socket); + server->ssocket = NULL; + return rc; + } + if (sport == htons(RFC1001_PORT)) rc = ip_rfc1001_connect(server); @@ -2546,7 +2548,7 @@ ip_connect(struct TCP_Server_Info *server) } void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, - struct super_block *sb, struct smb_vol *vol_info) + struct cifs_sb_info *cifs_sb, struct smb_vol *vol_info) { /* if we are reconnecting then should we check to see if * any requested capabilities changed locally e.g. via @@ -2600,22 +2602,23 @@ void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, cap &= ~CIFS_UNIX_POSIX_ACL_CAP; else if (CIFS_UNIX_POSIX_ACL_CAP & cap) { cFYI(1, "negotiated posix acl support"); - if (sb) - sb->s_flags |= MS_POSIXACL; + if (cifs_sb) + cifs_sb->mnt_cifs_flags |= + CIFS_MOUNT_POSIXACL; } if (vol_info && vol_info->posix_paths == 0) cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { cFYI(1, "negotiate posix pathnames"); - if (sb) - CIFS_SB(sb)->mnt_cifs_flags |= + if (cifs_sb) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; } - if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) { + if (cifs_sb && (cifs_sb->rsize > 127 * 1024)) { if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) { - CIFS_SB(sb)->rsize = 127 * 1024; + cifs_sb->rsize = 127 * 1024; cFYI(DBG2, "larger reads not supported by srv"); } } @@ -2662,6 +2665,9 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, { INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); + spin_lock_init(&cifs_sb->tlink_tree_lock); + cifs_sb->tlink_tree = RB_ROOT; + if (pvolume_info->rsize > CIFSMaxBufSize) { cERROR(1, "rsize %d too large, using MaxBufSize", pvolume_info->rsize); @@ -2750,21 +2756,21 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, /* * When the server supports very large writes via POSIX extensions, we can - * allow up to 2^24 - PAGE_CACHE_SIZE. + * allow up to 2^24-1, minus the size of a WRITE_AND_X header, not including + * the RFC1001 length. * * Note that this might make for "interesting" allocation problems during - * writeback however (as we have to allocate an array of pointers for the - * pages). A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. + * writeback however as we have to allocate an array of pointers for the + * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. */ -#define CIFS_MAX_WSIZE ((1<<24) - PAGE_CACHE_SIZE) +#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) /* - * When the server doesn't allow large posix writes, default to a wsize of - * 128k - PAGE_CACHE_SIZE -- one page less than the largest frame size - * described in RFC1001. This allows space for the header without going over - * that by default. + * When the server doesn't allow large posix writes, only allow a wsize of + * 128k minus the size of the WRITE_AND_X header. That allows for a write up + * to the maximum size described by RFC1002. */ -#define CIFS_MAX_RFC1001_WSIZE (128 * 1024 - PAGE_CACHE_SIZE) +#define CIFS_MAX_RFC1002_WSIZE (128 * 1024 - sizeof(WRITE_REQ) + 4) /* * The default wsize is 1M. find_get_pages seems to return a maximum of 256 @@ -2783,11 +2789,18 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) /* can server support 24-bit write sizes? (via UNIX extensions) */ if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) - wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1001_WSIZE); + wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE); - /* no CAP_LARGE_WRITE_X? Limit it to 16 bits */ - if (!(server->capabilities & CAP_LARGE_WRITE_X)) - wsize = min_t(unsigned int, wsize, USHRT_MAX); + /* + * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set? + * Limit it to max buffer offered by the server, minus the size of the + * WRITEX header, not including the 4 byte RFC1001 length. + */ + if (!(server->capabilities & CAP_LARGE_WRITE_X) || + (!(server->capabilities & CAP_UNIX) && + (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)))) + wsize = min_t(unsigned int, wsize, + server->maxBuf - sizeof(WRITE_REQ) + 4); /* hard limit of CIFS_MAX_WSIZE */ wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); @@ -2819,15 +2832,9 @@ is_path_accessible(int xid, struct cifs_tcon *tcon, return rc; } -void -cifs_cleanup_volume_info(struct smb_vol **pvolume_info) +static void +cleanup_volume_info_contents(struct smb_vol *volume_info) { - struct smb_vol *volume_info; - - if (!pvolume_info || !*pvolume_info) - return; - - volume_info = *pvolume_info; kfree(volume_info->username); kzfree(volume_info->password); kfree(volume_info->UNC); @@ -2835,28 +2842,44 @@ cifs_cleanup_volume_info(struct smb_vol **pvolume_info) kfree(volume_info->domainname); kfree(volume_info->iocharset); kfree(volume_info->prepath); +} + +void +cifs_cleanup_volume_info(struct smb_vol *volume_info) +{ + if (!volume_info) + return; + cleanup_volume_info_contents(volume_info); kfree(volume_info); - *pvolume_info = NULL; - return; } + #ifdef CONFIG_CIFS_DFS_UPCALL /* build_path_to_root returns full path to root when * we do not have an exiting connection (tcon) */ static char * -build_unc_path_to_root(const struct smb_vol *volume_info, +build_unc_path_to_root(const struct smb_vol *vol, const struct cifs_sb_info *cifs_sb) { - char *full_path; + char *full_path, *pos; + unsigned int pplen = vol->prepath ? strlen(vol->prepath) : 0; + unsigned int unc_len = strnlen(vol->UNC, MAX_TREE_SIZE + 1); - int unc_len = strnlen(volume_info->UNC, MAX_TREE_SIZE + 1); - full_path = kmalloc(unc_len + 1, GFP_KERNEL); + full_path = kmalloc(unc_len + pplen + 1, GFP_KERNEL); if (full_path == NULL) return ERR_PTR(-ENOMEM); - strncpy(full_path, volume_info->UNC, unc_len); - full_path[unc_len] = 0; /* add trailing null */ + strncpy(full_path, vol->UNC, unc_len); + pos = full_path + unc_len; + + if (pplen) { + strncpy(pos, vol->prepath, pplen); + pos += pplen; + } + + *pos = '\0'; /* add trailing null */ convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); + cFYI(1, "%s: full_path=%s", __func__, full_path); return full_path; } @@ -2899,15 +2922,18 @@ expand_dfs_referral(int xid, struct cifs_ses *pSesInfo, &fake_devname); free_dfs_info_array(referrals, num_referrals); - kfree(fake_devname); - - if (cifs_sb->mountdata != NULL) - kfree(cifs_sb->mountdata); if (IS_ERR(mdata)) { rc = PTR_ERR(mdata); mdata = NULL; + } else { + cleanup_volume_info_contents(volume_info); + memset(volume_info, '\0', sizeof(*volume_info)); + rc = cifs_setup_volume_info(volume_info, mdata, + fake_devname); } + kfree(fake_devname); + kfree(cifs_sb->mountdata); cifs_sb->mountdata = mdata; } kfree(full_path); @@ -2915,29 +2941,20 @@ expand_dfs_referral(int xid, struct cifs_ses *pSesInfo, } #endif -int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data, - const char *devname) +static int +cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, + const char *devname) { - struct smb_vol *volume_info; int rc = 0; - *pvolume_info = NULL; - - volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL); - if (!volume_info) { - rc = -ENOMEM; - goto out; - } - - if (cifs_parse_mount_options(mount_data, devname, - volume_info)) { - rc = -EINVAL; - goto out; - } + if (cifs_parse_mount_options(mount_data, devname, volume_info)) + return -EINVAL; if (volume_info->nullauth) { cFYI(1, "null user"); - volume_info->username = ""; + volume_info->username = kzalloc(1, GFP_KERNEL); + if (volume_info->username == NULL) + return -ENOMEM; } else if (volume_info->username) { /* BB fixme parse for domain name here */ cFYI(1, "Username: %s", volume_info->username); @@ -2945,8 +2962,7 @@ int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data, cifserror("No username specified"); /* In userspace mount helper we can get user name from alternate locations such as env variables and files on disk */ - rc = -EINVAL; - goto out; + return -EINVAL; } /* this is needed for ASCII cp to Unicode converts */ @@ -2958,21 +2974,34 @@ int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data, if (volume_info->local_nls == NULL) { cERROR(1, "CIFS mount error: iocharset %s not found", volume_info->iocharset); - rc = -ELIBACC; - goto out; + return -ELIBACC; } } - *pvolume_info = volume_info; - return rc; -out: - cifs_cleanup_volume_info(&volume_info); return rc; } +struct smb_vol * +cifs_get_volume_info(char *mount_data, const char *devname) +{ + int rc; + struct smb_vol *volume_info; + + volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL); + if (!volume_info) + return ERR_PTR(-ENOMEM); + + rc = cifs_setup_volume_info(volume_info, mount_data, devname); + if (rc) { + cifs_cleanup_volume_info(volume_info); + volume_info = ERR_PTR(rc); + } + + return volume_info; +} + int -cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, - struct smb_vol *volume_info, const char *devname) +cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) { int rc = 0; int xid; @@ -2983,6 +3012,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, struct tcon_link *tlink; #ifdef CONFIG_CIFS_DFS_UPCALL int referral_walks_count = 0; +#endif + + rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); + if (rc) + return rc; + + cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages; + +#ifdef CONFIG_CIFS_DFS_UPCALL try_mount_again: /* cleanup activities if we're chasing a referral */ if (referral_walks_count) { @@ -2991,7 +3029,6 @@ try_mount_again: else if (pSesInfo) cifs_put_smb_ses(pSesInfo); - cifs_cleanup_volume_info(&volume_info); FreeXid(xid); } #endif @@ -3007,6 +3044,7 @@ try_mount_again: srvTcp = cifs_get_tcp_session(volume_info); if (IS_ERR(srvTcp)) { rc = PTR_ERR(srvTcp); + bdi_destroy(&cifs_sb->bdi); goto out; } @@ -3018,14 +3056,6 @@ try_mount_again: goto mount_fail_check; } - if (pSesInfo->capabilities & CAP_LARGE_FILES) - sb->s_maxbytes = MAX_LFS_FILESIZE; - else - sb->s_maxbytes = MAX_NON_LFS; - - /* BB FIXME fix time_gran to be larger for LANMAN sessions */ - sb->s_time_gran = 100; - /* search for existing tcon to this server share */ tcon = cifs_get_tcon(pSesInfo, volume_info); if (IS_ERR(tcon)) { @@ -3038,7 +3068,7 @@ try_mount_again: if (tcon->ses->capabilities & CAP_UNIX) { /* reset of caps checks mount to see if unix extensions disabled for just this mount */ - reset_cifs_unix_caps(xid, tcon, sb, volume_info); + reset_cifs_unix_caps(xid, tcon, cifs_sb, volume_info); if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) && (le64_to_cpu(tcon->fsUnixInfo.Capability) & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) { @@ -3161,6 +3191,7 @@ mount_fail_check: cifs_put_smb_ses(pSesInfo); else cifs_put_tcp_session(srvTcp); + bdi_destroy(&cifs_sb->bdi); goto out; } @@ -3335,8 +3366,8 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses, return rc; } -int -cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) +void +cifs_umount(struct cifs_sb_info *cifs_sb) { struct rb_root *root = &cifs_sb->tlink_tree; struct rb_node *node; @@ -3357,7 +3388,10 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) } spin_unlock(&cifs_sb->tlink_tree_lock); - return 0; + bdi_destroy(&cifs_sb->bdi); + kfree(cifs_sb->mountdata); + unload_nls(cifs_sb->local_nls); + kfree(cifs_sb); } int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses) diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 816696621ec..42e5363b410 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -92,6 +92,7 @@ static void cifs_fscache_disable_inode_cookie(struct inode *inode) if (cifsi->fscache) { cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache); + fscache_uncache_all_inode_pages(cifsi->fscache, inode); fscache_relinquish_cookie(cifsi->fscache, 1); cifsi->fscache = NULL; } diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 1525d5e662b..1c5b770c314 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -90,12 +90,10 @@ smbhash(unsigned char *out, const unsigned char *in, unsigned char *key) sg_init_one(&sgout, out, 8); rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8); - if (rc) { + if (rc) cERROR(1, "could not encrypt crypt key rc: %d\n", rc); - crypto_free_blkcipher(tfm_des); - goto smbhash_err; - } + crypto_free_blkcipher(tfm_des); smbhash_err: return rc; } diff --git a/fs/fscache/page.c b/fs/fscache/page.c index a2a5d19ece6..2f343b4d7a7 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -954,3 +954,47 @@ void fscache_mark_pages_cached(struct fscache_retrieval *op, pagevec_reinit(pagevec); } EXPORT_SYMBOL(fscache_mark_pages_cached); + +/* + * Uncache all the pages in an inode that are marked PG_fscache, assuming them + * to be associated with the given cookie. + */ +void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, + struct inode *inode) +{ + struct address_space *mapping = inode->i_mapping; + struct pagevec pvec; + pgoff_t next; + int i; + + _enter("%p,%p", cookie, inode); + + if (!mapping || mapping->nrpages == 0) { + _leave(" [no pages]"); + return; + } + + pagevec_init(&pvec, 0); + next = 0; + while (next <= (loff_t)-1 && + pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE) + ) { + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + pgoff_t page_index = page->index; + + ASSERTCMP(page_index, >=, next); + next = page_index + 1; + + if (PageFsCache(page)) { + __fscache_wait_on_page_write(cookie, page); + __fscache_uncache_page(cookie, page); + } + } + pagevec_release(&pvec); + cond_resched(); + } + + _leave(""); +} +EXPORT_SYMBOL(__fscache_uncache_all_inode_pages); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index b49b55584c8..84a47b709f5 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -500,7 +500,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) out_put_hidden_dir: iput(sbi->hidden_dir); out_put_root: - iput(sbi->alloc_file); + iput(root); out_put_alloc_file: iput(sbi->alloc_file); out_close_cat_tree: diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 3031d81f5f0..4ac88ff79aa 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -36,6 +36,7 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, { DECLARE_COMPLETION_ONSTACK(wait); struct bio *bio; + int ret = 0; bio = bio_alloc(GFP_NOIO, 1); bio->bi_sector = sector; @@ -54,8 +55,10 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, wait_for_completion(&wait); if (!bio_flagged(bio, BIO_UPTODATE)) - return -EIO; - return 0; + ret = -EIO; + + bio_put(bio); + return ret; } static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) diff --git a/fs/inode.c b/fs/inode.c index 0f7e88a7803..43566d17d1b 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -423,7 +423,14 @@ EXPORT_SYMBOL(remove_inode_hash); void end_writeback(struct inode *inode) { might_sleep(); + /* + * We have to cycle tree_lock here because reclaim can be still in the + * process of removing the last page (in __delete_from_page_cache()) + * and we must not free mapping under it. + */ + spin_lock_irq(&inode->i_data.tree_lock); BUG_ON(inode->i_data.nrpages); + spin_unlock_irq(&inode->i_data.tree_lock); BUG_ON(!list_empty(&inode->i_data.private_list)); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); diff --git a/fs/locks.c b/fs/locks.c index 0a4f50dfadf..b286539d547 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -160,10 +160,28 @@ EXPORT_SYMBOL_GPL(unlock_flocks); static struct kmem_cache *filelock_cache __read_mostly; +static void locks_init_lock_always(struct file_lock *fl) +{ + fl->fl_next = NULL; + fl->fl_fasync = NULL; + fl->fl_owner = NULL; + fl->fl_pid = 0; + fl->fl_nspid = NULL; + fl->fl_file = NULL; + fl->fl_flags = 0; + fl->fl_type = 0; + fl->fl_start = fl->fl_end = 0; +} + /* Allocate an empty lock structure. */ struct file_lock *locks_alloc_lock(void) { - return kmem_cache_alloc(filelock_cache, GFP_KERNEL); + struct file_lock *fl = kmem_cache_alloc(filelock_cache, GFP_KERNEL); + + if (fl) + locks_init_lock_always(fl); + + return fl; } EXPORT_SYMBOL_GPL(locks_alloc_lock); @@ -200,17 +218,9 @@ void locks_init_lock(struct file_lock *fl) INIT_LIST_HEAD(&fl->fl_link); INIT_LIST_HEAD(&fl->fl_block); init_waitqueue_head(&fl->fl_wait); - fl->fl_next = NULL; - fl->fl_fasync = NULL; - fl->fl_owner = NULL; - fl->fl_pid = 0; - fl->fl_nspid = NULL; - fl->fl_file = NULL; - fl->fl_flags = 0; - fl->fl_type = 0; - fl->fl_start = fl->fl_end = 0; fl->fl_ops = NULL; fl->fl_lmops = NULL; + locks_init_lock_always(fl); } EXPORT_SYMBOL(locks_init_lock); diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index ce153a6b3ae..419119c371b 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -259,12 +259,10 @@ static void nfs_fscache_disable_inode_cookie(struct inode *inode) dfprintk(FSCACHE, "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); - /* Need to invalidate any mapped pages that were read in before - * turning off the cache. + /* Need to uncache any pages attached to this inode that + * fscache knows about before turning off the cache. */ - if (inode->i_mapping && inode->i_mapping->nrpages) - invalidate_inode_pages2(inode->i_mapping); - + fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode); nfs_fscache_zap_inode_cookie(inode); } } diff --git a/fs/omfs/file.c b/fs/omfs/file.c index d738a7e493d..2c6d95257a4 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -4,7 +4,6 @@ * Released under GPL v2. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/buffer_head.h> diff --git a/fs/proc/base.c b/fs/proc/base.c index 8a84210ca08..fc5bc276769 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2708,6 +2708,9 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole) struct task_io_accounting acct = task->ioac; unsigned long flags; + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + return -EACCES; + if (whole && lock_task_sighand(task, &flags)) { struct task_struct *t = task; @@ -2839,7 +2842,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUGO, proc_tgid_io_accounting), + INF("io", S_IRUSR, proc_tgid_io_accounting), #endif #ifdef CONFIG_HARDWALL INF("hardwall", S_IRUGO, proc_pid_hardwall), @@ -3181,7 +3184,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUGO, proc_tid_io_accounting), + INF("io", S_IRUSR, proc_tid_io_accounting), #endif #ifdef CONFIG_HARDWALL INF("hardwall", S_IRUGO, proc_pid_hardwall), diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c index f0511e81696..eed99428f10 100644 --- a/fs/romfs/mmap-nommu.c +++ b/fs/romfs/mmap-nommu.c @@ -27,14 +27,18 @@ static unsigned long romfs_get_unmapped_area(struct file *file, { struct inode *inode = file->f_mapping->host; struct mtd_info *mtd = inode->i_sb->s_mtd; - unsigned long isize, offset; + unsigned long isize, offset, maxpages, lpages; if (!mtd) goto cant_map_directly; + /* the mapping mustn't extend beyond the EOF */ + lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; isize = i_size_read(inode); offset = pgoff << PAGE_SHIFT; - if (offset > isize || len > isize || offset > isize - len) + + maxpages = (isize + PAGE_SIZE - 1) >> PAGE_SHIFT; + if ((pgoff >= maxpages) || (maxpages - pgoff < lpages)) return (unsigned long) -EINVAL; /* we need to call down to the MTD layer to do the actual mapping */ diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index c8637537881..01d2072fb6d 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -490,6 +490,13 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) args.whichfork = XFS_ATTR_FORK; /* + * we have no control over the attribute names that userspace passes us + * to remove, so we have to allow the name lookup prior to attribute + * removal to fail. + */ + args.op_flags = XFS_DA_OP_OKNOENT; + + /* * Attach the dquots to the inode. */ error = xfs_qm_dqattach(dp, 0); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index cb9b6d1469f..3631783b2b5 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -253,16 +253,21 @@ xfs_iget_cache_hit( rcu_read_lock(); spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~XFS_INEW; - ip->i_flags |= XFS_IRECLAIMABLE; - __xfs_inode_set_reclaim_tag(pag, ip); + ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); + ASSERT(ip->i_flags & XFS_IRECLAIMABLE); trace_xfs_iget_reclaim_fail(ip); goto out_error; } spin_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); + + /* + * Clear the per-lifetime state in the inode as we are now + * effectively a new inode and need to return to the initial + * state before reuse occurs. + */ + ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; ip->i_flags |= XFS_INEW; __xfs_inode_clear_reclaim_tag(mp, pag, ip); inode->i_state = I_NEW; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3ae6d58e547..964cfea7768 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -384,6 +384,16 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) #define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ /* + * Per-lifetime flags need to be reset when re-using a reclaimable inode during + * inode lookup. Thi prevents unintended behaviour on the new inode from + * ocurring. + */ +#define XFS_IRECLAIM_RESET_FLAGS \ + (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ + XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \ + XFS_IFILESTREAM); + +/* * Flags for inode locking. * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) * 1<<16 - 1<<32-1 -- lockdep annotation (integers) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 09983a3344a..b1e88d56069 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -681,15 +681,15 @@ xfs_inode_item_unlock( * where the cluster buffer may be unpinned before the inode is inserted into * the AIL during transaction committed processing. If the buffer is unpinned * before the inode item has been committed and inserted, then it is possible - * for the buffer to be written and IO completions before the inode is inserted + * for the buffer to be written and IO completes before the inode is inserted * into the AIL. In that case, we'd be inserting a clean, stale inode into the * AIL which will never get removed. It will, however, get reclaimed which * triggers an assert in xfs_inode_free() complaining about freein an inode * still in the AIL. * - * To avoid this, return a lower LSN than the one passed in so that the - * transaction committed code will not move the inode forward in the AIL but - * will still unpin it properly. + * To avoid this, just unpin the inode directly and return a LSN of -1 so the + * transaction committed code knows that it does not need to do any further + * processing on the item. */ STATIC xfs_lsn_t xfs_inode_item_committed( @@ -699,8 +699,10 @@ xfs_inode_item_committed( struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; - if (xfs_iflags_test(ip, XFS_ISTALE)) - return lsn - 1; + if (xfs_iflags_test(ip, XFS_ISTALE)) { + xfs_inode_item_unpin(lip, 0); + return -1; + } return lsn; } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 7c7bc2b786b..c83f63b33aa 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1361,7 +1361,7 @@ xfs_trans_item_committed( lip->li_flags |= XFS_LI_ABORTED; item_lsn = IOP_COMMITTED(lip, commit_lsn); - /* If the committed routine returns -1, item has been freed. */ + /* item_lsn of -1 means the item needs no further processing */ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) return; @@ -1474,7 +1474,7 @@ xfs_trans_committed_bulk( lip->li_flags |= XFS_LI_ABORTED; item_lsn = IOP_COMMITTED(lip, commit_lsn); - /* item_lsn of -1 means the item was freed */ + /* item_lsn of -1 means the item needs no further processing */ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) continue; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index b7a5fe7c52c..619720705bc 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -960,8 +960,11 @@ xfs_release( * be exposed to that problem. */ truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); - if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) - xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); + if (truncated) { + xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); + if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) + xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); + } } if (ip->i_d.di_nlink == 0) |