diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/extent-tree.c | 27 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 2 | ||||
-rw-r--r-- | fs/ext3/namei.c | 2 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 4 | ||||
-rw-r--r-- | fs/ext4/extents.c | 23 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 73 | ||||
-rw-r--r-- | fs/ext4/inode.c | 19 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 11 | ||||
-rw-r--r-- | fs/ext4/namei.c | 2 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 35 | ||||
-rw-r--r-- | fs/ext4/super.c | 14 | ||||
-rw-r--r-- | fs/fuse/dir.c | 51 | ||||
-rw-r--r-- | fs/lockd/svclock.c | 4 | ||||
-rw-r--r-- | fs/nfs/nfs4xdr.c | 21 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 2 | ||||
-rw-r--r-- | fs/nfsd/nfsd.h | 1 | ||||
-rw-r--r-- | fs/nfsd/nfssvc.c | 13 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 5 | ||||
-rw-r--r-- | fs/open.c | 2 | ||||
-rw-r--r-- | fs/proc/vmcore.c | 2 | ||||
-rw-r--r-- | fs/super.c | 25 | ||||
-rw-r--r-- | fs/sysfs/group.c | 70 |
22 files changed, 261 insertions, 147 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0236de71198..1204c8ef6f3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7466,6 +7466,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int err = 0; int ret; int level; + bool root_dropped = false; path = btrfs_alloc_path(); if (!path) { @@ -7523,6 +7524,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, while (1) { btrfs_tree_lock(path->nodes[level]); btrfs_set_lock_blocking(path->nodes[level]); + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; ret = btrfs_lookup_extent_info(trans, root, path->nodes[level]->start, @@ -7538,6 +7540,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, break; btrfs_tree_unlock(path->nodes[level]); + path->locks[level] = 0; WARN_ON(wc->refs[level] != 1); level--; } @@ -7552,11 +7555,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root, wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); while (1) { - if (!for_reloc && btrfs_need_cleaner_sleep(root)) { - pr_debug("btrfs: drop snapshot early exit\n"); - err = -EAGAIN; - goto out_end_trans; - } ret = walk_down_tree(trans, root, path, wc); if (ret < 0) { @@ -7584,7 +7582,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } BUG_ON(wc->level == 0); - if (btrfs_should_end_transaction(trans, tree_root)) { + if (btrfs_should_end_transaction(trans, tree_root) || + (!for_reloc && btrfs_need_cleaner_sleep(root))) { ret = btrfs_update_root(trans, tree_root, &root->root_key, root_item); @@ -7595,6 +7594,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } btrfs_end_transaction_throttle(trans, tree_root); + if (!for_reloc && btrfs_need_cleaner_sleep(root)) { + pr_debug("btrfs: drop snapshot early exit\n"); + err = -EAGAIN; + goto out_free; + } + trans = btrfs_start_transaction(tree_root, 0); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -7639,12 +7644,22 @@ int btrfs_drop_snapshot(struct btrfs_root *root, free_extent_buffer(root->commit_root); btrfs_put_fs_root(root); } + root_dropped = true; out_end_trans: btrfs_end_transaction_throttle(trans, tree_root); out_free: kfree(wc); btrfs_free_path(path); out: + /* + * So if we need to stop dropping the snapshot for whatever reason we + * need to make sure to add it back to the dead root list so that we + * keep trying to do the work later. This also cleans up roots if we + * don't have it in the radix (like when we recover after a power fail + * or unmount) so we don't leak memory. + */ + if (root_dropped == false) + btrfs_add_dead_root(root); if (err) btrfs_std_error(root->fs_info, err); return err; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 4ba2a69a60a..64a157becbe 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2495,7 +2495,7 @@ again: ret = scrub_extent(sctx, extent_logical, extent_len, extent_physical, extent_dev, flags, generation, extent_mirror_num, - extent_physical); + extent_logical - logical + physical); if (ret) goto out; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 998ea111e53..1194b1f0f83 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1780,11 +1780,11 @@ retry: inode->i_op = &ext3_file_inode_operations; inode->i_fop = &ext3_file_operations; ext3_set_aops(inode); + d_tmpfile(dentry, inode); err = ext3_orphan_add(handle, inode); if (err) goto err_drop_inode; mark_inode_dirty(inode); - d_tmpfile(dentry, inode); unlock_new_inode(inode); } ext3_journal_stop(handle); diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 58339393fa6..ddd715e42a5 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -38,8 +38,8 @@ ext4_group_t ext4_get_group_number(struct super_block *sb, ext4_group_t group; if (test_opt2(sb, STD_GROUP_SIZE)) - group = (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + - block) >> + group = (block - + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) >> (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3); else ext4_get_group_no_and_offset(sb, block, &group, NULL); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7097b0f680e..a61873808f7 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2835,6 +2835,9 @@ again: err = -EIO; break; } + /* Yield here to deal with large extent trees. + * Should be a no-op if we did IO above. */ + cond_resched(); if (WARN_ON(i + 1 > depth)) { err = -EIO; break; @@ -4261,8 +4264,8 @@ got_allocated_blocks: /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ ext4_discard_preallocations(inode); - ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), - ext4_ext_get_actual_len(&newex), fb_flags); + ext4_free_blocks(handle, inode, NULL, newblock, + EXT4_C2B(sbi, allocated_clusters), fb_flags); goto out2; } @@ -4382,8 +4385,9 @@ out2: } out3: - trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); - + trace_ext4_ext_map_blocks_exit(inode, flags, map, + err ? err : allocated); + ext4_es_lru_add(inode); return err ? err : allocated; } @@ -4405,9 +4409,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); +retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); + if (err == ENOMEM) { + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + if (err) { + ext4_std_error(inode->i_sb, err); + return; + } err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + ext4_std_error(inode->i_sb, err); } static void ext4_falloc_update_inode(struct inode *inode, diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index ee018d5f397..91cb110da1b 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -148,6 +148,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t end); static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, int nr_to_scan); +static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, + struct ext4_inode_info *locked_ei); int __init ext4_init_es(void) { @@ -439,7 +441,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, */ if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { if (in_range(es->es_lblk, ee_block, ee_len)) { - pr_warn("ES insert assertation failed for " + pr_warn("ES insert assertion failed for " "inode: %lu we can find an extent " "at block [%d/%d/%llu/%c], but we " "want to add an delayed/hole extent " @@ -458,7 +460,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, */ if (es->es_lblk < ee_block || ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "ex_status [%d/%d/%llu/%c] != " "es_status [%d/%d/%llu/%c]\n", inode->i_ino, ee_block, ee_len, ee_start, @@ -468,7 +470,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, } if (ee_status ^ es_status) { - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "ex_status [%d/%d/%llu/%c] != " "es_status [%d/%d/%llu/%c]\n", inode->i_ino, ee_block, ee_len, ee_start, @@ -481,7 +483,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, * that we don't want to add an written/unwritten extent. */ if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "can't find an extent at block %d but we want " "to add an written/unwritten extent " "[%d/%d/%llu/%llx]\n", inode->i_ino, @@ -519,7 +521,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, * We want to add a delayed/hole extent but this * block has been allocated. */ - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "We can find blocks but we want to add a " "delayed/hole extent [%d/%d/%llu/%llx]\n", inode->i_ino, es->es_lblk, es->es_len, @@ -527,13 +529,13 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, return; } else if (ext4_es_is_written(es)) { if (retval != es->es_len) { - pr_warn("ES insert assertation failed for " + pr_warn("ES insert assertion failed for " "inode: %lu retval %d != es_len %d\n", inode->i_ino, retval, es->es_len); return; } if (map.m_pblk != ext4_es_pblock(es)) { - pr_warn("ES insert assertation failed for " + pr_warn("ES insert assertion failed for " "inode: %lu m_pblk %llu != " "es_pblk %llu\n", inode->i_ino, map.m_pblk, @@ -549,7 +551,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, } } else if (retval == 0) { if (ext4_es_is_written(es)) { - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "We can't find the block but we want to add " "an written extent [%d/%d/%llu/%llx]\n", inode->i_ino, es->es_lblk, es->es_len, @@ -632,10 +634,8 @@ out: } /* - * ext4_es_insert_extent() adds a space to a extent status tree. - * - * ext4_es_insert_extent is called by ext4_da_write_begin and - * ext4_es_remove_extent. + * ext4_es_insert_extent() adds information to an inode's extent + * status tree. * * Return 0 on success, error code on failure. */ @@ -667,7 +667,13 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, err = __es_remove_extent(inode, lblk, end); if (err != 0) goto error; +retry: err = __es_insert_extent(inode, &newes); + if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, + EXT4_I(inode))) + goto retry; + if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) + err = 0; error: write_unlock(&EXT4_I(inode)->i_es_lock); @@ -746,8 +752,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, struct extent_status orig_es; ext4_lblk_t len1, len2; ext4_fsblk_t block; - int err = 0; + int err; +retry: + err = 0; es = __es_tree_search(&tree->root, lblk); if (!es) goto out; @@ -782,6 +790,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, if (err) { es->es_lblk = orig_es.es_lblk; es->es_len = orig_es.es_len; + if ((err == -ENOMEM) && + __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, + EXT4_I(inode))) + goto retry; goto out; } } else { @@ -891,22 +903,14 @@ static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, return -1; } -static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) +static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, + struct ext4_inode_info *locked_ei) { - struct ext4_sb_info *sbi = container_of(shrink, - struct ext4_sb_info, s_es_shrinker); struct ext4_inode_info *ei; struct list_head *cur, *tmp; LIST_HEAD(skiped); - int nr_to_scan = sc->nr_to_scan; int ret, nr_shrunk = 0; - ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); - trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); - - if (!nr_to_scan) - return ret; - spin_lock(&sbi->s_es_lru_lock); /* @@ -935,7 +939,7 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) continue; } - if (ei->i_es_lru_nr == 0) + if (ei->i_es_lru_nr == 0 || ei == locked_ei) continue; write_lock(&ei->i_es_lock); @@ -954,6 +958,27 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) list_splice_tail(&skiped, &sbi->s_es_lru); spin_unlock(&sbi->s_es_lru_lock); + if (locked_ei && nr_shrunk == 0) + nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); + + return nr_shrunk; +} + +static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) +{ + struct ext4_sb_info *sbi = container_of(shrink, + struct ext4_sb_info, s_es_shrinker); + int nr_to_scan = sc->nr_to_scan; + int ret, nr_shrunk; + + ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); + trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); + + if (!nr_to_scan) + return ret; + + nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); + ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); return ret; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0188e65e1f5..ba33c67d6e4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -465,7 +465,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, if (es_map->m_lblk != map->m_lblk || es_map->m_flags != map->m_flags || es_map->m_pblk != map->m_pblk) { - printk("ES cache assertation failed for inode: %lu " + printk("ES cache assertion failed for inode: %lu " "es_cached ex [%d/%d/%llu/%x] != " "found ex [%d/%d/%llu/%x] retval %d flags %x\n", inode->i_ino, es_map->m_lblk, es_map->m_len, @@ -514,10 +514,9 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, "logical block %lu\n", inode->i_ino, flags, map->m_len, (unsigned long) map->m_lblk); - ext4_es_lru_add(inode); - /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { + ext4_es_lru_add(inode); if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { map->m_pblk = ext4_es_pblock(&es) + map->m_lblk - es.es_lblk; @@ -558,7 +557,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, #ifdef ES_AGGRESSIVE_TEST if (retval != map->m_len) { - printk("ES len assertation failed for inode: %lu " + printk("ES len assertion failed for inode: %lu " "retval %d != map->m_len %d " "in %s (lookup)\n", inode->i_ino, retval, map->m_len, __func__); @@ -659,7 +658,7 @@ found: #ifdef ES_AGGRESSIVE_TEST if (retval != map->m_len) { - printk("ES len assertation failed for inode: %lu " + printk("ES len assertion failed for inode: %lu " "retval %d != map->m_len %d " "in %s (allocation)\n", inode->i_ino, retval, map->m_len, __func__); @@ -1529,11 +1528,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, "logical block %lu\n", inode->i_ino, map->m_len, (unsigned long) map->m_lblk); - ext4_es_lru_add(inode); - /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, &es)) { - + ext4_es_lru_add(inode); if (ext4_es_is_hole(&es)) { retval = 0; down_read((&EXT4_I(inode)->i_data_sem)); @@ -1642,7 +1639,7 @@ add_delayed: #ifdef ES_AGGRESSIVE_TEST if (retval != map->m_len) { - printk("ES len assertation failed for inode: %lu " + printk("ES len assertion failed for inode: %lu " "retval %d != map->m_len %d " "in %s (lookup)\n", inode->i_ino, retval, map->m_len, __func__); @@ -2163,7 +2160,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, mpd->io_submit.io_end->offset = ((loff_t)map->m_lblk) << inode->i_blkbits; - while (map->m_len) { + do { err = mpage_map_one_extent(handle, mpd); if (err < 0) { struct super_block *sb = inode->i_sb; @@ -2201,7 +2198,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, err = mpage_map_and_submit_buffers(mpd); if (err < 0) return err; - } + } while (map->m_len); /* Update on-disk size after IO is submitted */ disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a9ff5e5137c..4bbbf13bd74 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4740,11 +4740,16 @@ do_more: * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed */ + retry: new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); if (!new_entry) { - ext4_mb_unload_buddy(&e4b); - err = -ENOMEM; - goto error_return; + /* + * We use a retry loop because + * ext4_free_blocks() is not allowed to fail. + */ + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; } new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 234b834d5a9..35f55a0dbc4 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2316,11 +2316,11 @@ retry: inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); + d_tmpfile(dentry, inode); err = ext4_orphan_add(handle, inode); if (err) goto err_drop_inode; mark_inode_dirty(inode); - d_tmpfile(dentry, inode); unlock_new_inode(inode); } if (handle) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 48786cdb5e6..6625d210fb4 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -25,6 +25,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/mm.h> +#include <linux/ratelimit.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -55,7 +56,7 @@ void ext4_exit_pageio(void) static void buffer_io_error(struct buffer_head *bh) { char b[BDEVNAME_SIZE]; - printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", + printk_ratelimited(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); } @@ -308,6 +309,7 @@ ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) return io_end; } +/* BIO completion function for page writeback */ static void ext4_end_bio(struct bio *bio, int error) { ext4_io_end_t *io_end = bio->bi_private; @@ -318,18 +320,6 @@ static void ext4_end_bio(struct bio *bio, int error) if (test_bit(BIO_UPTODATE, &bio->bi_flags)) error = 0; - if (io_end->flag & EXT4_IO_END_UNWRITTEN) { - /* - * Link bio into list hanging from io_end. We have to do it - * atomically as bio completions can be racing against each - * other. - */ - bio->bi_private = xchg(&io_end->bio, bio); - } else { - ext4_finish_bio(bio); - bio_put(bio); - } - if (error) { struct inode *inode = io_end->inode; @@ -341,7 +331,24 @@ static void ext4_end_bio(struct bio *bio, int error) (unsigned long long) bi_sector >> (inode->i_blkbits - 9)); } - ext4_put_io_end_defer(io_end); + + if (io_end->flag & EXT4_IO_END_UNWRITTEN) { + /* + * Link bio into list hanging from io_end. We have to do it + * atomically as bio completions can be racing against each + * other. + */ + bio->bi_private = xchg(&io_end->bio, bio); + ext4_put_io_end_defer(io_end); + } else { + /* + * Drop io_end reference early. Inode can get freed once + * we finish the bio. + */ + ext4_put_io_end_defer(io_end); + ext4_finish_bio(bio); + bio_put(bio); + } } void ext4_io_submit(struct ext4_io_submit *io) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 85b3dd60169..bca26f34edf 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1702,12 +1702,6 @@ static inline void ext4_show_quota_options(struct seq_file *seq, if (sbi->s_qf_names[GRPQUOTA]) seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); - - if (test_opt(sb, USRQUOTA)) - seq_puts(seq, ",usrquota"); - - if (test_opt(sb, GRPQUOTA)) - seq_puts(seq, ",grpquota"); #endif } @@ -3624,10 +3618,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); - /* Do we have standard group size of blocksize * 8 blocks ? */ - if (sbi->s_blocks_per_group == blocksize << 3) - set_opt2(sb, STD_GROUP_SIZE); - for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; @@ -3697,6 +3687,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } + /* Do we have standard group size of clustersize * 8 blocks ? */ + if (sbi->s_blocks_per_group == clustersize << 3) + set_opt2(sb, STD_GROUP_SIZE); + /* * Test whether we have more sectors than will fit in sector_t, * and whether the max offset is addressable by the page cache. diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 0eda52738ec..72a5d5b0449 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1223,30 +1223,46 @@ static int fuse_direntplus_link(struct file *file, if (name.name[1] == '.' && name.len == 2) return 0; } + + if (invalid_nodeid(o->nodeid)) + return -EIO; + if (!fuse_valid_type(o->attr.mode)) + return -EIO; + fc = get_fuse_conn(dir); name.hash = full_name_hash(name.name, name.len); dentry = d_lookup(parent, &name); - if (dentry && dentry->d_inode) { + if (dentry) { inode = dentry->d_inode; - if (get_node_id(inode) == o->nodeid) { + if (!inode) { + d_drop(dentry); + } else if (get_node_id(inode) != o->nodeid || + ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { + err = d_invalidate(dentry); + if (err) + goto out; + } else if (is_bad_inode(inode)) { + err = -EIO; + goto out; + } else { struct fuse_inode *fi; fi = get_fuse_inode(inode); spin_lock(&fc->lock); fi->nlookup++; spin_unlock(&fc->lock); + fuse_change_attributes(inode, &o->attr, + entry_attr_timeout(o), + attr_version); + /* * The other branch to 'found' comes via fuse_iget() * which bumps nlookup inside */ goto found; } - err = d_invalidate(dentry); - if (err) - goto out; dput(dentry); - dentry = NULL; } dentry = d_alloc(parent, &name); @@ -1259,25 +1275,30 @@ static int fuse_direntplus_link(struct file *file, if (!inode) goto out; - alias = d_materialise_unique(dentry, inode); - err = PTR_ERR(alias); - if (IS_ERR(alias)) - goto out; + if (S_ISDIR(inode->i_mode)) { + mutex_lock(&fc->inst_mutex); + alias = fuse_d_add_directory(dentry, inode); + mutex_unlock(&fc->inst_mutex); + err = PTR_ERR(alias); + if (IS_ERR(alias)) { + iput(inode); + goto out; + } + } else { + alias = d_splice_alias(inode, dentry); + } + if (alias) { dput(dentry); dentry = alias; } found: - fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o), - attr_version); - fuse_change_entry_timeout(dentry, o); err = 0; out: - if (dentry) - dput(dentry); + dput(dentry); return err; } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 067778b0ccc..e066a390297 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -951,6 +951,7 @@ nlmsvc_retry_blocked(void) unsigned long timeout = MAX_SCHEDULE_TIMEOUT; struct nlm_block *block; + spin_lock(&nlm_blocked_lock); while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { block = list_entry(nlm_blocked.next, struct nlm_block, b_list); @@ -960,6 +961,7 @@ nlmsvc_retry_blocked(void) timeout = block->b_when - jiffies; break; } + spin_unlock(&nlm_blocked_lock); dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", block, block->b_when); @@ -969,7 +971,9 @@ nlmsvc_retry_blocked(void) retry_deferred_block(block); } else nlmsvc_grant_blocked(block); + spin_lock(&nlm_blocked_lock); } + spin_unlock(&nlm_blocked_lock); return timeout; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0abfb8466e7..3850b018815 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -999,6 +999,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, __be32 *p; __be32 *q; int len; + uint32_t bmval_len = 2; uint32_t bmval0 = 0; uint32_t bmval1 = 0; uint32_t bmval2 = 0; @@ -1010,7 +1011,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, * = 40 bytes, plus any contribution from variable-length fields * such as owner/group. */ - len = 20; + len = 8; /* Sigh */ if (iap->ia_valid & ATTR_SIZE) @@ -1040,8 +1041,6 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, } len += 4 + (XDR_QUADLEN(owner_grouplen) << 2); } - if (label) - len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2); if (iap->ia_valid & ATTR_ATIME_SET) len += 16; else if (iap->ia_valid & ATTR_ATIME) @@ -1050,15 +1049,22 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, len += 16; else if (iap->ia_valid & ATTR_MTIME) len += 4; + if (label) { + len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2); + bmval_len = 3; + } + + len += bmval_len << 2; p = reserve_space(xdr, len); /* * We write the bitmap length now, but leave the bitmap and the attribute * buffer length to be backfilled at the end of this routine. */ - *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(bmval_len); q = p; - p += 4; + /* Skip bitmap entries + attrlen */ + p += bmval_len + 1; if (iap->ia_valid & ATTR_SIZE) { bmval0 |= FATTR4_WORD0_SIZE; @@ -1112,10 +1118,11 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, len, ((char *)p - (char *)q) + 4); BUG(); } - len = (char *)p - (char *)q - 16; *q++ = htonl(bmval0); *q++ = htonl(bmval1); - *q++ = htonl(bmval2); + if (bmval_len == 3) + *q++ = htonl(bmval2); + len = (char *)p - (char *)(q + 1); *q = htonl(len); /* out: */ diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a7cee864e7b..0d4c410e458 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1293,7 +1293,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * According to RFC3010, this takes precedence over all other errors. */ status = nfserr_minor_vers_mismatch; - if (args->minorversion > nfsd_supported_minorversion) + if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0) goto out; status = nfs41_check_op_ordering(args); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 2bbd94e51ef..30f34ab0213 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -53,7 +53,6 @@ struct readdir_cd { extern struct svc_program nfsd_program; extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; -extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; extern spinlock_t nfsd_drc_lock; extern unsigned long nfsd_drc_max_mem; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 6b9f48ca4c2..760c85a6f53 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -116,7 +116,10 @@ struct svc_program nfsd_program = { }; -u32 nfsd_supported_minorversion = 1; +static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { + [0] = 1, + [1] = 1, +}; int nfsd_vers(int vers, enum vers_op change) { @@ -151,15 +154,13 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change) return -1; switch(change) { case NFSD_SET: - nfsd_supported_minorversion = minorversion; + nfsd_supported_minorversions[minorversion] = true; break; case NFSD_CLEAR: - if (minorversion == 0) - return -1; - nfsd_supported_minorversion = minorversion - 1; + nfsd_supported_minorversions[minorversion] = false; break; case NFSD_TEST: - return minorversion <= nfsd_supported_minorversion; + return nfsd_supported_minorversions[minorversion]; case NFSD_AVAIL: return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8ff6a0019b0..c827acb0e94 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -830,9 +830,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, flags = O_WRONLY|O_LARGEFILE; } *filp = dentry_open(&path, flags, current_cred()); - if (IS_ERR(*filp)) + if (IS_ERR(*filp)) { host_err = PTR_ERR(*filp); - else { + *filp = NULL; + } else { host_err = ima_file_check(*filp, may_flags); if (may_flags & NFSD_MAY_64BIT_COOKIE) diff --git a/fs/open.c b/fs/open.c index 9156cb050d0..d53e2989508 100644 --- a/fs/open.c +++ b/fs/open.c @@ -844,6 +844,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o if ((flags & O_TMPFILE_MASK) != O_TMPFILE) return -EINVAL; acc_mode = MAY_OPEN | ACC_MODE(flags); + if (!(acc_mode & MAY_WRITE)) + return -EINVAL; } else if (flags & O_PATH) { /* * If we have O_PATH in the open flag. Then we diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 28503172f2e..a1a16eb97c7 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -223,7 +223,7 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz) * regions in the 1st kernel pointed to by PT_LOAD entries) into * virtually contiguous user-space in ELF layout. */ -#ifdef CONFIG_MMU +#if defined(CONFIG_MMU) && !defined(CONFIG_S390) static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) { size_t size = vma->vm_end - vma->vm_start; diff --git a/fs/super.c b/fs/super.c index 7465d436420..68307c02922 100644 --- a/fs/super.c +++ b/fs/super.c @@ -336,19 +336,19 @@ EXPORT_SYMBOL(deactivate_super); * and want to turn it into a full-blown active reference. grab_super() * is called with sb_lock held and drops it. Returns 1 in case of * success, 0 if we had failed (superblock contents was already dead or - * dying when grab_super() had been called). + * dying when grab_super() had been called). Note that this is only + * called for superblocks not in rundown mode (== ones still on ->fs_supers + * of their type), so increment of ->s_count is OK here. */ static int grab_super(struct super_block *s) __releases(sb_lock) { - if (atomic_inc_not_zero(&s->s_active)) { - spin_unlock(&sb_lock); - return 1; - } - /* it's going away */ s->s_count++; spin_unlock(&sb_lock); - /* wait for it to die */ down_write(&s->s_umount); + if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) { + put_super(s); + return 1; + } up_write(&s->s_umount); put_super(s); return 0; @@ -463,11 +463,6 @@ retry: destroy_super(s); s = NULL; } - down_write(&old->s_umount); - if (unlikely(!(old->s_flags & MS_BORN))) { - deactivate_locked_super(old); - goto retry; - } return old; } } @@ -660,10 +655,10 @@ restart: if (hlist_unhashed(&sb->s_instances)) continue; if (sb->s_bdev == bdev) { - if (grab_super(sb)) /* drops sb_lock */ - return sb; - else + if (!grab_super(sb)) goto restart; + up_write(&sb->s_umount); + return sb; } } spin_unlock(&sb_lock); diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index aec3d5c98c9..09a1a25cd14 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -20,38 +20,64 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, const struct attribute_group *grp) { struct attribute *const* attr; - int i; + struct bin_attribute *const* bin_attr; - for (i = 0, attr = grp->attrs; *attr; i++, attr++) - sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); + if (grp->attrs) + for (attr = grp->attrs; *attr; attr++) + sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); + if (grp->bin_attrs) + for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) + sysfs_remove_bin_file(kobj, *bin_attr); } static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, const struct attribute_group *grp, int update) { struct attribute *const* attr; + struct bin_attribute *const* bin_attr; int error = 0, i; - for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { - umode_t mode = 0; + if (grp->attrs) { + for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { + umode_t mode = 0; + + /* + * In update mode, we're changing the permissions or + * visibility. Do this by first removing then + * re-adding (if required) the file. + */ + if (update) + sysfs_hash_and_remove(dir_sd, NULL, + (*attr)->name); + if (grp->is_visible) { + mode = grp->is_visible(kobj, *attr, i); + if (!mode) + continue; + } + error = sysfs_add_file_mode(dir_sd, *attr, + SYSFS_KOBJ_ATTR, + (*attr)->mode | mode); + if (unlikely(error)) + break; + } + if (error) { + remove_files(dir_sd, kobj, grp); + goto exit; + } + } - /* in update mode, we're changing the permissions or - * visibility. Do this by first removing then - * re-adding (if required) the file */ - if (update) - sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); - if (grp->is_visible) { - mode = grp->is_visible(kobj, *attr, i); - if (!mode) - continue; + if (grp->bin_attrs) { + for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { + if (update) + sysfs_remove_bin_file(kobj, *bin_attr); + error = sysfs_create_bin_file(kobj, *bin_attr); + if (error) + break; } - error = sysfs_add_file_mode(dir_sd, *attr, SYSFS_KOBJ_ATTR, - (*attr)->mode | mode); - if (unlikely(error)) - break; + if (error) + remove_files(dir_sd, kobj, grp); } - if (error) - remove_files(dir_sd, kobj, grp); +exit: return error; } @@ -67,8 +93,8 @@ static int internal_create_group(struct kobject *kobj, int update, /* Updates may happen before the object has been instantiated */ if (unlikely(update && !kobj->sd)) return -EINVAL; - if (!grp->attrs) { - WARN(1, "sysfs: attrs not set by subsystem for group: %s/%s\n", + if (!grp->attrs && !grp->bin_attrs) { + WARN(1, "sysfs: (bin_)attrs not set by subsystem for group: %s/%s\n", kobj->name, grp->name ? "" : grp->name); return -EINVAL; } |