summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2011-03-17 14:14:12 -0400
committerChris Metcalf <cmetcalf@tilera.com>2011-03-17 14:14:12 -0400
commit325d1605542960903c88409b199734a3d8fc6612 (patch)
tree9d8eeed393a3b5dcadd1ddb6b76634b464fd6bc7 /fs
parent3c5ead52ed68406c0ee789024c4ae581be8bcee4 (diff)
parent521cb40b0c44418a4fd36dc633f575813d59a43d (diff)
Merge tag 'v2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c52
-rw-r--r--fs/block_dev.c19
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/extent-tree.c44
-rw-r--r--fs/btrfs/extent_io.c165
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c114
-rw-r--r--fs/btrfs/inode.c132
-rw-r--r--fs/btrfs/ioctl.c7
-rw-r--r--fs/btrfs/lzo.c21
-rw-r--r--fs/btrfs/relocation.c13
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/volumes.c13
-rw-r--r--fs/ceph/dir.c30
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/compat.c8
-rw-r--r--fs/dcache.c26
-rw-r--r--fs/eventpoll.c95
-rw-r--r--fs/exofs/namei.c8
-rw-r--r--fs/ext2/namei.c9
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fuse/dir.c9
-rw-r--r--fs/fuse/file.c52
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/gfs2/dentry.c2
-rw-r--r--fs/gfs2/main.c9
-rw-r--r--fs/hfs/dir.c50
-rw-r--r--fs/inode.c31
-rw-r--r--fs/internal.h2
-rw-r--r--fs/jfs/namei.c2
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c14
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/inode.c7
-rw-r--r--fs/nfs/nfs4_fs.h10
-rw-r--r--fs/nfs/nfs4filelayoutdev.c4
-rw-r--r--fs/nfs/nfs4proc.c131
-rw-r--r--fs/nfs/nfs4state.c29
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/nfsd/nfs4xdr.c4
-rw-r--r--fs/nilfs2/btnode.c5
-rw-r--r--fs/nilfs2/btnode.h1
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c8
-rw-r--r--fs/nilfs2/page.c13
-rw-r--r--fs/nilfs2/page.h1
-rw-r--r--fs/nilfs2/segment.c3
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/ocfs2/dcache.c2
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/ocfs2/refcounttree.c7
-rw-r--r--fs/ocfs2/super.c28
-rw-r--r--fs/open.c8
-rw-r--r--fs/partitions/ldm.c5
-rw-r--r--fs/partitions/osf.c12
-rw-r--r--fs/proc/base.c30
-rw-r--r--fs/proc/inode.c8
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_sysctl.c7
-rw-r--r--fs/reiserfs/namei.c2
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/udf/namei.c11
-rw-r--r--fs/ufs/namei.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c11
-rw-r--r--fs/xfs/xfs_fsops.c3
74 files changed, 967 insertions, 442 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 15690bb1d3b..789b3afb342 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -140,6 +140,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
candidate->first = candidate->last = index;
candidate->offset_first = from;
candidate->to_last = to;
+ INIT_LIST_HEAD(&candidate->link);
candidate->usage = 1;
candidate->state = AFS_WBACK_PENDING;
init_waitqueue_head(&candidate->waitq);
diff --git a/fs/aio.c b/fs/aio.c
index fc557a3be0a..26869cde395 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -239,15 +239,23 @@ static void __put_ioctx(struct kioctx *ctx)
call_rcu(&ctx->rcu_head, ctx_rcu_free);
}
-#define get_ioctx(kioctx) do { \
- BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
- atomic_inc(&(kioctx)->users); \
-} while (0)
-#define put_ioctx(kioctx) do { \
- BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
- if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \
- __put_ioctx(kioctx); \
-} while (0)
+static inline void get_ioctx(struct kioctx *kioctx)
+{
+ BUG_ON(atomic_read(&kioctx->users) <= 0);
+ atomic_inc(&kioctx->users);
+}
+
+static inline int try_get_ioctx(struct kioctx *kioctx)
+{
+ return atomic_inc_not_zero(&kioctx->users);
+}
+
+static inline void put_ioctx(struct kioctx *kioctx)
+{
+ BUG_ON(atomic_read(&kioctx->users) <= 0);
+ if (unlikely(atomic_dec_and_test(&kioctx->users)))
+ __put_ioctx(kioctx);
+}
/* ioctx_alloc
* Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
@@ -601,8 +609,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
rcu_read_lock();
hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
- if (ctx->user_id == ctx_id && !ctx->dead) {
- get_ioctx(ctx);
+ /*
+ * RCU protects us against accessing freed memory but
+ * we have to be careful not to get a reference when the
+ * reference count already dropped to 0 (ctx->dead test
+ * is unreliable because of races).
+ */
+ if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
ret = ctx;
break;
}
@@ -1629,6 +1642,23 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
spin_lock_irq(&ctx->ctx_lock);
+ /*
+ * We could have raced with io_destroy() and are currently holding a
+ * reference to ctx which should be destroyed. We cannot submit IO
+ * since ctx gets freed as soon as io_submit() puts its reference. The
+ * check here is reliable: io_destroy() sets ctx->dead before waiting
+ * for outstanding IO and the barrier between these two is realized by
+ * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
+ * increment ctx->reqs_active before checking for ctx->dead and the
+ * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
+ * don't see ctx->dead set here, io_destroy() waits for our IO to
+ * finish.
+ */
+ if (ctx->dead) {
+ spin_unlock_irq(&ctx->ctx_lock);
+ ret = -EINVAL;
+ goto out_put_req;
+ }
aio_run_iocb(req);
if (!list_empty(&ctx->run_list)) {
/* drain the run list */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4fb8a343153..88928701959 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
if (ret)
goto out_del;
+ /*
+ * bdev could be deleted beneath us which would implicitly destroy
+ * the holder directory. Hold on to it.
+ */
+ kobject_get(bdev->bd_part->holder_dir);
list_add(&holder->list, &bdev->bd_holder_disks);
goto out_unlock;
@@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
del_symlink(bdev->bd_part->holder_dir,
&disk_to_dev(disk)->kobj);
+ kobject_put(bdev->bd_part->holder_dir);
list_del_init(&holder->list);
kfree(holder);
}
@@ -922,14 +928,15 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
* flush_disk - invalidates all buffer-cache entries on a disk
*
* @bdev: struct block device to be flushed
+ * @kill_dirty: flag to guide handling of dirty inodes
*
* Invalidates all buffer-cache entries on a disk. It should be called
* when a disk has been changed -- either by a media change or online
* resize.
*/
-static void flush_disk(struct block_device *bdev)
+static void flush_disk(struct block_device *bdev, bool kill_dirty)
{
- if (__invalidate_device(bdev)) {
+ if (__invalidate_device(bdev, kill_dirty)) {
char name[BDEVNAME_SIZE] = "";
if (bdev->bd_disk)
@@ -966,7 +973,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
"%s: detected capacity change from %lld to %lld\n",
name, bdev_size, disk_size);
i_size_write(bdev->bd_inode, disk_size);
- flush_disk(bdev);
+ flush_disk(bdev, false);
}
}
EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1026,7 @@ int check_disk_change(struct block_device *bdev)
if (!(events & DISK_EVENT_MEDIA_CHANGE))
return 0;
- flush_disk(bdev);
+ flush_disk(bdev, true);
if (bdops->revalidate_disk)
bdops->revalidate_disk(bdev->bd_disk);
return 1;
@@ -1600,7 +1607,7 @@ fail:
}
EXPORT_SYMBOL(lookup_bdev);
-int __invalidate_device(struct block_device *bdev)
+int __invalidate_device(struct block_device *bdev, bool kill_dirty)
{
struct super_block *sb = get_super(bdev);
int res = 0;
@@ -1613,7 +1620,7 @@ int __invalidate_device(struct block_device *bdev)
* hold).
*/
shrink_dcache_sb(sb);
- res = invalidate_inodes(sb);
+ res = invalidate_inodes(sb, kill_dirty);
drop_super(sb);
}
invalidate_bdev(bdev);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af605..7f78cc78fdd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
u64 disk_total; /* total bytes on disk, takes mirrors into
account */
+ /*
+ * we bump reservation progress every time we decrement
+ * bytes_reserved. This way people waiting for reservations
+ * know something good has happened and they can check
+ * for progress. The number here isn't to be trusted, it
+ * just shows reclaim activity
+ */
+ unsigned long reservation_progress;
+
int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
@@ -1254,6 +1263,7 @@ struct btrfs_root {
#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
+#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2228,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
u64 start, u64 end);
int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
u64 num_bytes);
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 type);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f3c96fc0143..7b3089b5c2d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
u64 max_reclaim;
u64 reclaimed = 0;
long time_left;
- int pause = 1;
int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0;
+ unsigned long progress;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
smp_mb();
reserved = space_info->bytes_reserved;
+ progress = space_info->reservation_progress;
if (reserved == 0)
return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
spin_lock(&space_info->lock);
- if (reserved > space_info->bytes_reserved) {
- loops = 0;
+ if (reserved > space_info->bytes_reserved)
reclaimed += reserved - space_info->bytes_reserved;
- } else {
- loops++;
- }
reserved = space_info->bytes_reserved;
spin_unlock(&space_info->lock);
+ loops++;
+
if (reserved == 0 || reclaimed >= max_reclaim)
break;
if (trans && trans->transaction->blocked)
return -EAGAIN;
- __set_current_state(TASK_INTERRUPTIBLE);
- time_left = schedule_timeout(pause);
+ time_left = schedule_timeout_interruptible(1);
/* We were interrupted, exit */
if (time_left)
break;
- pause <<= 1;
- if (pause > HZ / 10)
- pause = HZ / 10;
+ /* we've kicked the IO a few times, if anything has been freed,
+ * exit. There is no sense in looping here for a long time
+ * when we really need to commit the transaction, or there are
+ * just too many writers without enough free space
+ */
+
+ if (loops > 3) {
+ smp_mb();
+ if (progress != space_info->reservation_progress)
+ break;
+ }
}
return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
if (num_bytes) {
spin_lock(&space_info->lock);
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
}
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_reserved -= num_bytes;
+ sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
}
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
to_reserve = 0;
}
spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
btrfs_set_block_group_used(&cache->item, old_val);
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
cache->space_info->bytes_used += num_bytes;
cache->space_info->disk_used += num_bytes * factor;
spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
if (reserved) {
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (ret) {
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_reserved -= buf->len;
+ cache->space_info->reservation_progress++;
spin_unlock(&cache->space_info->lock);
}
goto out;
@@ -5376,7 +5387,7 @@ again:
num_bytes, data, 1);
goto again;
}
- if (ret == -ENOSPC) {
+ if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
struct btrfs_space_info *sinfo;
sinfo = __find_space_info(root->fs_info, data);
@@ -8065,6 +8076,13 @@ out:
return ret;
}
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 type)
+{
+ u64 alloc_flags = get_alloc_profile(root, type);
+ return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+}
+
/*
* helper to account the unused space of all the readonly block group in the
* list. takes mirrors into account.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 92ac5192c51..714adc4ac4c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
*/
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end, u64 max_bytes,
- unsigned long bits)
+ unsigned long bits, int contig)
{
struct rb_node *node;
struct extent_state *state;
u64 cur_start = *start;
u64 total_bytes = 0;
+ u64 last = 0;
int found = 0;
if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
state = rb_entry(node, struct extent_state, rb_node);
if (state->start > search_end)
break;
- if (state->end >= cur_start && (state->state & bits)) {
+ if (contig && found && state->start > last + 1)
+ break;
+ if (state->end >= cur_start && (state->state & bits) == bits) {
total_bytes += min(search_end, state->end) + 1 -
max(cur_start, state->start);
if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
*start = state->start;
found = 1;
}
+ last = state->end;
+ } else if (contig && found) {
+ break;
}
node = rb_next(node);
if (!node)
@@ -2912,6 +2918,46 @@ out:
return sector;
}
+/*
+ * helper function for fiemap, which doesn't want to see any holes.
+ * This maps until we find something past 'last'
+ */
+static struct extent_map *get_extent_skip_holes(struct inode *inode,
+ u64 offset,
+ u64 last,
+ get_extent_t *get_extent)
+{
+ u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
+ struct extent_map *em;
+ u64 len;
+
+ if (offset >= last)
+ return NULL;
+
+ while(1) {
+ len = last - offset;
+ if (len == 0)
+ break;
+ len = (len + sectorsize - 1) & ~(sectorsize - 1);
+ em = get_extent(inode, NULL, 0, offset, len, 0);
+ if (!em || IS_ERR(em))
+ return em;
+
+ /* if this isn't a hole return it */
+ if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
+ em->block_start != EXTENT_MAP_HOLE) {
+ return em;
+ }
+
+ /* this is a hole, advance to the next extent */
+ offset = extent_map_end(em);
+ free_extent_map(em);
+ if (offset >= last)
+ break;
+ }
+ return NULL;
+}
+
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
@@ -2921,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u32 flags = 0;
u32 found_type;
u64 last;
+ u64 last_for_get_extent = 0;
u64 disko = 0;
+ u64 isize = i_size_read(inode);
struct btrfs_key found_key;
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
struct btrfs_file_extent_item *item;
int end = 0;
- u64 em_start = 0, em_len = 0;
+ u64 em_start = 0;
+ u64 em_len = 0;
+ u64 em_end = 0;
unsigned long emflags;
- int hole = 0;
if (len == 0)
return -EINVAL;
@@ -2940,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return -ENOMEM;
path->leave_spinning = 1;
+ /*
+ * lookup the last file extent. We're not using i_size here
+ * because there might be preallocation past i_size
+ */
ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
path, inode->i_ino, -1, 0);
if (ret < 0) {
@@ -2953,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
found_type = btrfs_key_type(&found_key);
- /* No extents, just return */
+ /* No extents, but there might be delalloc bits */
if (found_key.objectid != inode->i_ino ||
found_type != BTRFS_EXTENT_DATA_KEY) {
- btrfs_free_path(path);
- return 0;
+ /* have to trust i_size as the end */
+ last = (u64)-1;
+ last_for_get_extent = isize;
+ } else {
+ /*
+ * remember the start of the last extent. There are a
+ * bunch of different factors that go into the length of the
+ * extent, so its much less complex to remember where it started
+ */
+ last = found_key.offset;
+ last_for_get_extent = last + 1;
}
- last = found_key.offset;
btrfs_free_path(path);
+ /*
+ * we might have some extents allocated but more delalloc past those
+ * extents. so, we trust isize unless the start of the last extent is
+ * beyond isize
+ */
+ if (last < isize) {
+ last = (u64)-1;
+ last_for_get_extent = isize;
+ }
+
lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
&cached_state, GFP_NOFS);
- em = get_extent(inode, NULL, 0, off, max - off, 0);
+
+ em = get_extent_skip_holes(inode, off, last_for_get_extent,
+ get_extent);
if (!em)
goto out;
if (IS_ERR(em)) {
@@ -2973,22 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
while (!end) {
- hole = 0;
- off = em->start + em->len;
- if (off >= max)
- end = 1;
+ u64 offset_in_extent;
- if (em->block_start == EXTENT_MAP_HOLE) {
- hole = 1;
- goto next;
- }
+ /* break if the extent we found is outside the range */
+ if (em->start >= max || extent_map_end(em) < off)
+ break;
- em_start = em->start;
- em_len = em->len;
+ /*
+ * get_extent may return an extent that starts before our
+ * requested range. We have to make sure the ranges
+ * we return to fiemap always move forward and don't
+ * overlap, so adjust the offsets here
+ */
+ em_start = max(em->start, off);
+ /*
+ * record the offset from the start of the extent
+ * for adjusting the disk offset below
+ */
+ offset_in_extent = em_start - em->start;
+ em_end = extent_map_end(em);
+ em_len = em_end - em_start;
+ emflags = em->flags;
disko = 0;
flags = 0;
+ /*
+ * bump off for our next call to get_extent
+ */
+ off = extent_map_end(em);
+ if (off >= max)
+ end = 1;
+
if (em->block_start == EXTENT_MAP_LAST_BYTE) {
end = 1;
flags |= FIEMAP_EXTENT_LAST;
@@ -2999,42 +3088,34 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
} else {
- disko = em->block_start;
+ disko = em->block_start + offset_in_extent;
}
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
-next:
- emflags = em->flags;
free_extent_map(em);
em = NULL;
- if (!end) {
- em = get_extent(inode, NULL, 0, off, max - off, 0);
- if (!em)
- goto out;
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto out;
- }
- emflags = em->flags;
- }
-
- if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
+ if ((em_start >= last) || em_len == (u64)-1 ||
+ (last == (u64)-1 && isize <= em_end)) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
- if (em_start == last) {
+ /* now scan forward to see if this is really the last extent. */
+ em = get_extent_skip_holes(inode, off, last_for_get_extent,
+ get_extent);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+ if (!em) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
-
- if (!hole) {
- ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
- em_len, flags);
- if (ret)
- goto out_free;
- }
+ ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+ em_len, flags);
+ if (ret)
+ goto out_free;
}
out_free:
free_extent_map(em);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd06..9318dfefd59 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
- u64 max_bytes, unsigned long bits);
+ u64 max_bytes, unsigned long bits, int contig);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7084140d594..f447b783bb8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
/* Flush processor's dcache for this page */
flush_dcache_page(page);
+
+ /*
+ * if we get a partial write, we can end up with
+ * partially up to date pages. These add
+ * a lot of complexity, so make sure they don't
+ * happen by forcing this copy to be retried.
+ *
+ * The rest of the btrfs_file_write code will fall
+ * back to page at a time copies after we return 0.
+ */
+ if (!PageUptodate(page) && copied < count)
+ copied = 0;
+
iov_iter_advance(i, copied);
write_bytes -= copied;
total_copied += copied;
@@ -763,6 +776,27 @@ out:
}
/*
+ * on error we return an unlocked page and the error value
+ * on success we return a locked page and 0
+ */
+static int prepare_uptodate_page(struct page *page, u64 pos)
+{
+ int ret = 0;
+
+ if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
+ ret = btrfs_readpage(NULL, page);
+ if (ret)
+ return ret;
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ return -EIO;
+ }
+ }
+ return 0;
+}
+
+/*
* this gets pages into the page cache and locks them down, it also properly
* waits for data=ordered extents to finish before allowing the pages to be
* modified.
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
unsigned long index = pos >> PAGE_CACHE_SHIFT;
struct inode *inode = fdentry(file)->d_inode;
int err = 0;
+ int faili = 0;
u64 start_pos;
u64 last_pos;
@@ -794,15 +829,24 @@ again:
for (i = 0; i < num_pages; i++) {
pages[i] = grab_cache_page(inode->i_mapping, index + i);
if (!pages[i]) {
- int c;
- for (c = i - 1; c >= 0; c--) {
- unlock_page(pages[c]);
- page_cache_release(pages[c]);
- }
- return -ENOMEM;
+ faili = i - 1;
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ if (i == 0)
+ err = prepare_uptodate_page(pages[i], pos);
+ if (i == num_pages - 1)
+ err = prepare_uptodate_page(pages[i],
+ pos + write_bytes);
+ if (err) {
+ page_cache_release(pages[i]);
+ faili = i - 1;
+ goto fail;
}
wait_on_page_writeback(pages[i]);
}
+ err = 0;
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -842,6 +886,14 @@ again:
WARN_ON(!PageLocked(pages[i]));
}
return 0;
+fail:
+ while (faili >= 0) {
+ unlock_page(pages[faili]);
+ page_cache_release(pages[faili]);
+ faili--;
+ }
+ return err;
+
}
static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct page *pinned[2];
struct page **pages = NULL;
struct iov_iter i;
loff_t *ppos = &iocb->ki_pos;
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
- pinned[0] = NULL;
- pinned[1] = NULL;
-
start_pos = pos;
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
first_index = pos >> PAGE_CACHE_SHIFT;
last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
- /*
- * there are lots of better ways to do this, but this code
- * makes sure the first and last page in the file range are
- * up to date and ready for cow
- */
- if ((pos & (PAGE_CACHE_SIZE - 1))) {
- pinned[0] = grab_cache_page(inode->i_mapping, first_index);
- if (!PageUptodate(pinned[0])) {
- ret = btrfs_readpage(NULL, pinned[0]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[0]);
- } else {
- unlock_page(pinned[0]);
- }
- }
- if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
- pinned[1] = grab_cache_page(inode->i_mapping, last_index);
- if (!PageUptodate(pinned[1])) {
- ret = btrfs_readpage(NULL, pinned[1]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[1]);
- } else {
- unlock_page(pinned[1]);
- }
- }
-
while (iov_iter_count(&i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
size_t write_bytes = min(iov_iter_count(&i),
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, &i);
- dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+
+ /*
+ * if we have trouble faulting in the pages, fall
+ * back to one page at a time
+ */
+ if (copied < write_bytes)
+ nrptrs = 1;
+
+ if (copied == 0)
+ dirty_pages = 0;
+ else
+ dirty_pages = (copied + offset +
+ PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
if (num_pages > dirty_pages) {
if (copied > 0)
@@ -1069,10 +1103,6 @@ out:
err = ret;
kfree(pages);
- if (pinned[0])
- page_cache_release(pinned[0]);
- if (pinned[1])
- page_cache_release(pinned[1]);
*ppos = pos;
/*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fb9bd7832b6..9007bbd01db 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1913,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
private = 0;
if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
- (u64)-1, 1, EXTENT_DIRTY)) {
+ (u64)-1, 1, EXTENT_DIRTY, 0)) {
ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
start, &private_failure);
if (ret == 0) {
@@ -4821,10 +4821,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
/*
- * 1 item for inode ref
+ * 2 items for inode and inode ref
* 2 items for dir items
+ * 1 item for parent inode
*/
- trans = btrfs_start_transaction(root, 3);
+ trans = btrfs_start_transaction(root, 5);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto fail;
@@ -5280,6 +5281,128 @@ out:
return em;
}
+struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
+ size_t pg_offset, u64 start, u64 len,
+ int create)
+{
+ struct extent_map *em;
+ struct extent_map *hole_em = NULL;
+ u64 range_start = start;
+ u64 end;
+ u64 found;
+ u64 found_end;
+ int err = 0;
+
+ em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
+ if (IS_ERR(em))
+ return em;
+ if (em) {
+ /*
+ * if our em maps to a hole, there might
+ * actually be delalloc bytes behind it
+ */
+ if (em->block_start != EXTENT_MAP_HOLE)
+ return em;
+ else
+ hole_em = em;
+ }
+
+ /* check to see if we've wrapped (len == -1 or similar) */
+ end = start + len;
+ if (end < start)
+ end = (u64)-1;
+ else
+ end -= 1;
+
+ em = NULL;
+
+ /* ok, we didn't find anything, lets look for delalloc */
+ found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
+ end, len, EXTENT_DELALLOC, 1);
+ found_end = range_start + found;
+ if (found_end < range_start)
+ found_end = (u64)-1;
+
+ /*
+ * we didn't find anything useful, return
+ * the original results from get_extent()
+ */
+ if (range_start > end || found_end <= start) {
+ em = hole_em;
+ hole_em = NULL;
+ goto out;
+ }
+
+ /* adjust the range_start to make sure it doesn't
+ * go backwards from the start they passed in
+ */
+ range_start = max(start,range_start);
+ found = found_end - range_start;
+
+ if (found > 0) {
+ u64 hole_start = start;
+ u64 hole_len = len;
+
+ em = alloc_extent_map(GFP_NOFS);
+ if (!em) {
+ err = -ENOMEM;
+ goto out;
+ }
+ /*
+ * when btrfs_get_extent can't find anything it
+ * returns one huge hole
+ *
+ * make sure what it found really fits our range, and
+ * adjust to make sure it is based on the start from
+ * the caller
+ */
+ if (hole_em) {
+ u64 calc_end = extent_map_end(hole_em);
+
+ if (calc_end <= start || (hole_em->start > end)) {
+ free_extent_map(hole_em);
+ hole_em = NULL;
+ } else {
+ hole_start = max(hole_em->start, start);
+ hole_len = calc_end - hole_start;
+ }
+ }
+ em->bdev = NULL;
+ if (hole_em && range_start > hole_start) {
+ /* our hole starts before our delalloc, so we
+ * have to return just the parts of the hole
+ * that go until the delalloc starts
+ */
+ em->len = min(hole_len,
+ range_start - hole_start);
+ em->start = hole_start;
+ em->orig_start = hole_start;
+ /*
+ * don't adjust block start at all,
+ * it is fixed at EXTENT_MAP_HOLE
+ */
+ em->block_start = hole_em->block_start;
+ em->block_len = hole_len;
+ } else {
+ em->start = range_start;
+ em->len = found;
+ em->orig_start = range_start;
+ em->block_start = EXTENT_MAP_DELALLOC;
+ em->block_len = found;
+ }
+ } else if (hole_em) {
+ return hole_em;
+ }
+out:
+
+ free_extent_map(hole_em);
+ if (err) {
+ free_extent_map(em);
+ return ERR_PTR(err);
+ }
+ return em;
+}
+
static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
u64 start, u64 len)
{
@@ -5934,6 +6057,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
if (!skip_sum) {
dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
if (!dip->csums) {
+ kfree(dip);
ret = -ENOMEM;
goto free_ordered;
}
@@ -6102,7 +6226,7 @@ out:
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
- return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent);
+ return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
}
int btrfs_readpage(struct file *file, struct page *page)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index be2d4f6aaa5..5fdb2abc4fa 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1071,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
if (copy_from_user(&flags, arg, sizeof(flags)))
return -EFAULT;
- if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC)
+ if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
return -EINVAL;
if (flags & ~BTRFS_SUBVOL_RDONLY)
return -EOPNOTSUPP;
+ if (!is_owner_or_cap(inode))
+ return -EACCES;
+
down_write(&root->fs_info->subvol_sem);
/* nothing to do */
@@ -1097,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
goto out_reset;
}
- ret = btrfs_update_root(trans, root,
+ ret = btrfs_update_root(trans, root->fs_info->tree_root,
&root->root_key, &root->root_item);
btrfs_commit_transaction(trans, root);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399d..a178f5ebea7 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
unsigned long tot_out;
unsigned long tot_len;
char *buf;
+ bool may_late_unmap, need_unmap;
data_in = kmap(pages_in[0]);
tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
tot_in += in_len;
working_bytes = in_len;
+ may_late_unmap = need_unmap = false;
/* fast path: avoid using the working buffer */
if (in_page_bytes_left >= in_len) {
buf = data_in + in_offset;
bytes = in_len;
+ may_late_unmap = true;
goto cont;
}
@@ -329,14 +332,17 @@ cont:
if (working_bytes == 0 && tot_in >= tot_len)
break;
- kunmap(pages_in[page_in_index]);
- page_in_index++;
- if (page_in_index >= total_pages_in) {
+ if (page_in_index + 1 >= total_pages_in) {
ret = -1;
- data_in = NULL;
goto done;
}
- data_in = kmap(pages_in[page_in_index]);
+
+ if (may_late_unmap)
+ need_unmap = true;
+ else
+ kunmap(pages_in[page_in_index]);
+
+ data_in = kmap(pages_in[++page_in_index]);
in_page_bytes_left = PAGE_CACHE_SIZE;
in_offset = 0;
@@ -346,6 +352,8 @@ cont:
out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
&out_len);
+ if (need_unmap)
+ kunmap(pages_in[page_in_index - 1]);
if (ret != LZO_E_OK) {
printk(KERN_WARNING "btrfs decompress failed\n");
ret = -1;
@@ -363,8 +371,7 @@ cont:
break;
}
done:
- if (data_in)
- kunmap(pages_in[page_in_index]);
+ kunmap(pages_in[page_in_index]);
return ret;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 0825e4ed944..31ade5802ae 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3654,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
u32 item_size;
int ret;
int err = 0;
+ int progress = 0;
path = btrfs_alloc_path();
if (!path)
@@ -3666,9 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
while (1) {
+ progress++;
trans = btrfs_start_transaction(rc->extent_root, 0);
BUG_ON(IS_ERR(trans));
-
+restart:
if (update_backref_cache(trans, &rc->backref_cache)) {
btrfs_end_transaction(trans, rc->extent_root);
continue;
@@ -3781,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
}
}
+ if (trans && progress && err == -ENOSPC) {
+ ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
+ rc->block_group->flags);
+ if (ret == 0) {
+ err = 0;
+ progress = 0;
+ goto restart;
+ }
+ }
btrfs_release_path(rc->extent_root, path);
clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a004008f7d2..d39a9895d93 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
- Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
+ Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
+ Opt_enospc_debug, Opt_err,
};
static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
{Opt_space_cache, "space_cache"},
{Opt_clear_cache, "clear_cache"},
{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
+ {Opt_enospc_debug, "enospc_debug"},
{Opt_err, NULL},
};
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_user_subvol_rm_allowed:
btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
break;
+ case Opt_enospc_debug:
+ btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
+ break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index af7dbca1527..dd13eb81ee4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1338,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
ret = btrfs_shrink_device(device, 0);
if (ret)
- goto error_brelse;
+ goto error_undo;
ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
if (ret)
- goto error_brelse;
+ goto error_undo;
device->in_fs_metadata = 0;
@@ -1416,6 +1416,13 @@ out:
mutex_unlock(&root->fs_info->volume_mutex);
mutex_unlock(&uuid_mutex);
return ret;
+error_undo:
+ if (device->writeable) {
+ list_add(&device->dev_alloc_list,
+ &root->fs_info->fs_devices->alloc_list);
+ root->fs_info->fs_devices->rw_devices++;
+ }
+ goto error_brelse;
}
/*
@@ -1633,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->dev_root = root->fs_info->dev_root;
device->bdev = bdev;
device->in_fs_metadata = 1;
- device->mode = 0;
+ device->mode = FMODE_EXCL;
set_blocksize(device->bdev, 4096);
if (seeding_dev) {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index f0aef787a10..ebafa65a29b 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -60,7 +60,6 @@ int ceph_init_dentry(struct dentry *dentry)
}
di->dentry = dentry;
di->lease_session = NULL;
- di->parent_inode = igrab(dentry->d_parent->d_inode);
dentry->d_fsdata = di;
dentry->d_time = jiffies;
ceph_dentry_lru_add(dentry);
@@ -410,7 +409,7 @@ more:
spin_lock(&inode->i_lock);
if (ci->i_release_count == fi->dir_release_count) {
dout(" marking %p complete\n", inode);
- ci->i_ceph_flags |= CEPH_I_COMPLETE;
+ /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
ci->i_max_offset = filp->f_pos;
}
spin_unlock(&inode->i_lock);
@@ -497,6 +496,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
/* .snap dir? */
if (err == -ENOENT &&
+ ceph_snap(parent) == CEPH_NOSNAP &&
strcmp(dentry->d_name.name,
fsc->mount_options->snapdir_name) == 0) {
struct inode *inode = ceph_get_snapdir(parent);
@@ -993,7 +993,7 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *dir;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
dir = dentry->d_parent->d_inode;
@@ -1030,28 +1030,8 @@ out_touch:
static void ceph_dentry_release(struct dentry *dentry)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
- struct inode *parent_inode = NULL;
- u64 snapid = CEPH_NOSNAP;
- if (!IS_ROOT(dentry)) {
- parent_inode = di->parent_inode;
- if (parent_inode)
- snapid = ceph_snap(parent_inode);
- }
- dout("dentry_release %p parent %p\n", dentry, parent_inode);
- if (parent_inode && snapid != CEPH_SNAPDIR) {
- struct ceph_inode_info *ci = ceph_inode(parent_inode);
-
- spin_lock(&parent_inode->i_lock);
- if (ci->i_shared_gen == di->lease_shared_gen ||
- snapid <= CEPH_MAXSNAP) {
- dout(" clearing %p complete (d_release)\n",
- parent_inode);
- ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
- ci->i_release_count++;
- }
- spin_unlock(&parent_inode->i_lock);
- }
+ dout("dentry_release %p\n", dentry);
if (di) {
ceph_dentry_lru_del(dentry);
if (di->lease_session)
@@ -1059,8 +1039,6 @@ static void ceph_dentry_release(struct dentry *dentry)
kmem_cache_free(ceph_dentry_cachep, di);
dentry->d_fsdata = NULL;
}
- if (parent_inode)
- iput(parent_inode);
}
static int ceph_snapdir_d_revalidate(struct dentry *dentry,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5625463aa47..193bfa5e9cb 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -707,7 +707,7 @@ static int fill_inode(struct inode *inode,
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
dout(" marking %p complete (empty)\n", inode);
- ci->i_ceph_flags |= CEPH_I_COMPLETE;
+ /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
ci->i_max_offset = 2;
}
break;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 88fcaa21b80..20b907d76ae 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -207,7 +207,6 @@ struct ceph_dentry_info {
struct dentry *dentry;
u64 time;
u64 offset;
- struct inode *parent_inode;
};
struct ceph_inode_xattrs_info {
diff --git a/fs/compat.c b/fs/compat.c
index f6fd0a00e6c..691c3fd8ce1 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1228,7 +1228,9 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_readv(file, vec, vlen, &pos);
+ ret = -ESPIPE;
+ if (file->f_mode & FMODE_PREAD)
+ ret = compat_readv(file, vec, vlen, &pos);
fput_light(file, fput_needed);
return ret;
}
@@ -1285,7 +1287,9 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_writev(file, vec, vlen, &pos);
+ ret = -ESPIPE;
+ if (file->f_mode & FMODE_PWRITE)
+ ret = compat_writev(file, vec, vlen, &pos);
fput_light(file, fput_needed);
return ret;
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 2a6bd9a4ae9..611ffe928c0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1523,6 +1523,28 @@ struct dentry * d_alloc_root(struct inode * root_inode)
}
EXPORT_SYMBOL(d_alloc_root);
+static struct dentry * __d_find_any_alias(struct inode *inode)
+{
+ struct dentry *alias;
+
+ if (list_empty(&inode->i_dentry))
+ return NULL;
+ alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
+ __dget(alias);
+ return alias;
+}
+
+static struct dentry * d_find_any_alias(struct inode *inode)
+{
+ struct dentry *de;
+
+ spin_lock(&inode->i_lock);
+ de = __d_find_any_alias(inode);
+ spin_unlock(&inode->i_lock);
+ return de;
+}
+
+
/**
* d_obtain_alias - find or allocate a dentry for a given inode
* @inode: inode to allocate the dentry for
@@ -1552,7 +1574,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
if (IS_ERR(inode))
return ERR_CAST(inode);
- res = d_find_alias(inode);
+ res = d_find_any_alias(inode);
if (res)
goto out_iput;
@@ -1565,7 +1587,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
spin_lock(&inode->i_lock);
- res = __d_find_alias(inode, 0);
+ res = __d_find_any_alias(inode);
if (res) {
spin_unlock(&inode->i_lock);
dput(tmp);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 267d0ada454..4a09af9e9a6 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
* cleanup path and it is also acquired by eventpoll_release_file()
* if a file has been pushed inside an epoll set and it is then
* close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
+ * It is also acquired when inserting an epoll fd onto another epoll
+ * fd. We do this so that we walk the epoll tree and ensure that this
+ * insertion does not create a cycle of epoll file descriptors, which
+ * could lead to deadlock. We need a global mutex to prevent two
+ * simultaneous inserts (A into B and B into A) from racing and
+ * constructing a cycle without either insert observing that it is
+ * going to.
* It is possible to drop the "ep->mtx" and to use the global
* mutex "epmutex" (together with "ep->lock") to have it working,
* but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
*/
static DEFINE_MUTEX(epmutex);
+/* Used to check for epoll file descriptor inclusion loops */
+static struct nested_calls poll_loop_ncalls;
+
/* Used for safe wake up implementation */
static struct nested_calls poll_safewake_ncalls;
@@ -1198,6 +1208,62 @@ retry:
return res;
}
+/**
+ * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
+ * API, to verify that adding an epoll file inside another
+ * epoll structure, does not violate the constraints, in
+ * terms of closed loops, or too deep chains (which can
+ * result in excessive stack usage).
+ *
+ * @priv: Pointer to the epoll file to be currently checked.
+ * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
+ * data structure pointer.
+ * @call_nests: Current dept of the @ep_call_nested() call stack.
+ *
+ * Returns: Returns zero if adding the epoll @file inside current epoll
+ * structure @ep does not violate the constraints, or -1 otherwise.
+ */
+static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
+{
+ int error = 0;
+ struct file *file = priv;
+ struct eventpoll *ep = file->private_data;
+ struct rb_node *rbp;
+ struct epitem *epi;
+
+ mutex_lock(&ep->mtx);
+ for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ epi = rb_entry(rbp, struct epitem, rbn);
+ if (unlikely(is_file_epoll(epi->ffd.file))) {
+ error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ ep_loop_check_proc, epi->ffd.file,
+ epi->ffd.file->private_data, current);
+ if (error != 0)
+ break;
+ }
+ }
+ mutex_unlock(&ep->mtx);
+
+ return error;
+}
+
+/**
+ * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
+ * another epoll file (represented by @ep) does not create
+ * closed loops or too deep chains.
+ *
+ * @ep: Pointer to the epoll private data structure.
+ * @file: Pointer to the epoll file to be checked.
+ *
+ * Returns: Returns zero if adding the epoll @file inside current epoll
+ * structure @ep does not violate the constraints, or -1 otherwise.
+ */
+static int ep_loop_check(struct eventpoll *ep, struct file *file)
+{
+ return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ ep_loop_check_proc, file, ep, current);
+}
+
/*
* Open an eventpoll file descriptor.
*/
@@ -1246,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
struct epoll_event __user *, event)
{
int error;
+ int did_lock_epmutex = 0;
struct file *file, *tfile;
struct eventpoll *ep;
struct epitem *epi;
@@ -1287,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
*/
ep = file->private_data;
+ /*
+ * When we insert an epoll file descriptor, inside another epoll file
+ * descriptor, there is the change of creating closed loops, which are
+ * better be handled here, than in more critical paths.
+ *
+ * We hold epmutex across the loop check and the insert in this case, in
+ * order to prevent two separate inserts from racing and each doing the
+ * insert "at the same time" such that ep_loop_check passes on both
+ * before either one does the insert, thereby creating a cycle.
+ */
+ if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
+ mutex_lock(&epmutex);
+ did_lock_epmutex = 1;
+ error = -ELOOP;
+ if (ep_loop_check(ep, tfile) != 0)
+ goto error_tgt_fput;
+ }
+
+
mutex_lock(&ep->mtx);
/*
@@ -1322,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
mutex_unlock(&ep->mtx);
error_tgt_fput:
+ if (unlikely(did_lock_epmutex))
+ mutex_unlock(&epmutex);
+
fput(tfile);
error_fput:
fput(file);
@@ -1441,6 +1530,12 @@ static int __init eventpoll_init(void)
EP_ITEM_COST;
BUG_ON(max_user_watches < 0);
+ /*
+ * Initialize the structure used to perform epoll file descriptor
+ * inclusion loops checks.
+ */
+ ep_nested_calls_init(&poll_loop_ncalls);
+
/* Initialize the structure used to perform safe poll wait head wake ups */
ep_nested_calls_init(&poll_safewake_ncalls);
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 264e95d0283..4d70db110cf 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -272,7 +272,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
err = exofs_set_link(new_dir, new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME;
if (dir_de)
@@ -286,12 +285,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= EXOFS_LINK_MAX)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = exofs_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
@@ -299,7 +295,7 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode->i_ctime = CURRENT_TIME;
exofs_delete_entry(old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2e1d8341d82..adb91855ccd 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -344,7 +344,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -356,12 +355,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
if (new_dir->i_nlink >= EXT2_LINK_MAX)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = ext2_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
@@ -369,12 +365,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
/*
* Like most other Unix systems, set the ctime for inodes on a
* rename.
- * inode_dec_link_count() will mark the inode dirty.
*/
old_inode->i_ctime = CURRENT_TIME_SEC;
+ mark_inode_dirty(old_inode);
ext2_delete_entry (old_de, old_page);
- inode_dec_link_count(old_inode);
if (dir_de) {
if (old_dir != new_dir)
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index f88f752babd..adae3fb7451 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,7 +43,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/* This is not negative dentry. Always valid. */
@@ -54,7 +54,7 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/*
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bfed8447ed8..8bd0ef9286c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,7 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
{
struct inode *inode;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = entry->d_inode;
@@ -1283,8 +1283,11 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
if (err)
return err;
- if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
- return 0;
+ if (attr->ia_valid & ATTR_OPEN) {
+ if (fc->atomic_o_trunc)
+ return 0;
+ file = NULL;
+ }
if (attr->ia_valid & ATTR_SIZE)
is_truncate = true;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 95da1bc1c82..9e0832dbb1e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,18 +86,52 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
return ff;
}
+static void fuse_release_async(struct work_struct *work)
+{
+ struct fuse_req *req;
+ struct fuse_conn *fc;
+ struct path path;
+
+ req = container_of(work, struct fuse_req, misc.release.work);
+ path = req->misc.release.path;
+ fc = get_fuse_conn(path.dentry->d_inode);
+
+ fuse_put_request(fc, req);
+ path_put(&path);
+}
+
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
{
- path_put(&req->misc.release.path);
+ if (fc->destroy_req) {
+ /*
+ * If this is a fuseblk mount, then it's possible that
+ * releasing the path will result in releasing the
+ * super block and sending the DESTROY request. If
+ * the server is single threaded, this would hang.
+ * For this reason do the path_put() in a separate
+ * thread.
+ */
+ atomic_inc(&req->count);
+ INIT_WORK(&req->misc.release.work, fuse_release_async);
+ schedule_work(&req->misc.release.work);
+ } else {
+ path_put(&req->misc.release.path);
+ }
}
-static void fuse_file_put(struct fuse_file *ff)
+static void fuse_file_put(struct fuse_file *ff, bool sync)
{
if (atomic_dec_and_test(&ff->count)) {
struct fuse_req *req = ff->reserved_req;
- req->end = fuse_release_end;
- fuse_request_send_background(ff->fc, req);
+ if (sync) {
+ fuse_request_send(ff->fc, req);
+ path_put(&req->misc.release.path);
+ fuse_put_request(ff->fc, req);
+ } else {
+ req->end = fuse_release_end;
+ fuse_request_send_background(ff->fc, req);
+ }
kfree(ff);
}
}
@@ -219,8 +253,12 @@ void fuse_release_common(struct file *file, int opcode)
* Normally this will send the RELEASE request, however if
* some asynchronous READ or WRITE requests are outstanding,
* the sending will be delayed.
+ *
+ * Make the release synchronous if this is a fuseblk mount,
+ * synchronous RELEASE is allowed (and desirable) in this case
+ * because the server can be trusted not to screw up.
*/
- fuse_file_put(ff);
+ fuse_file_put(ff, ff->fc->destroy_req != NULL);
}
static int fuse_open(struct inode *inode, struct file *file)
@@ -558,7 +596,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
page_cache_release(page);
}
if (req->ff)
- fuse_file_put(req->ff);
+ fuse_file_put(req->ff, false);
}
static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1137,7 +1175,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
{
__free_page(req->pages[0]);
- fuse_file_put(req->ff);
+ fuse_file_put(req->ff, false);
}
static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ae5744a2f9e..d4286947bc2 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
#include <linux/rwsem.h>
#include <linux/rbtree.h>
#include <linux/poll.h>
+#include <linux/workqueue.h>
/** Max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32
@@ -262,7 +263,10 @@ struct fuse_req {
/** Data for asynchronous requests */
union {
struct {
- struct fuse_release_in in;
+ union {
+ struct fuse_release_in in;
+ struct work_struct work;
+ };
struct path path;
} release;
struct fuse_init_in init_in;
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 4a456338b87..0da8da2c991 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
int error;
int had_lock = 0;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
parent = dget_parent(dentry);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 85ba027d1c4..72c31a315d9 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -59,14 +59,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
struct address_space *mapping = (struct address_space *)(gl + 1);
gfs2_init_glock_once(gl);
- memset(mapping, 0, sizeof(*mapping));
- INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
- spin_lock_init(&mapping->tree_lock);
- spin_lock_init(&mapping->i_mmap_lock);
- INIT_LIST_HEAD(&mapping->private_list);
- spin_lock_init(&mapping->private_lock);
- INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
- INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+ address_space_init_once(mapping);
}
/**
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index afa66aaa223..b4d70b13be9 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -238,46 +238,22 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
}
/*
- * hfs_unlink()
+ * hfs_remove()
*
- * This is the unlink() entry in the inode_operations structure for
- * regular HFS directories. The purpose is to delete an existing
- * file, given the inode for the parent directory and the name
- * (and its length) of the existing file.
- */
-static int hfs_unlink(struct inode *dir, struct dentry *dentry)
-{
- struct inode *inode;
- int res;
-
- inode = dentry->d_inode;
- res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
- if (res)
- return res;
-
- drop_nlink(inode);
- hfs_delete_inode(inode);
- inode->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(inode);
-
- return res;
-}
-
-/*
- * hfs_rmdir()
+ * This serves as both unlink() and rmdir() in the inode_operations
+ * structure for regular HFS directories. The purpose is to delete
+ * an existing child, given the inode for the parent directory and
+ * the name (and its length) of the existing directory.
*
- * This is the rmdir() entry in the inode_operations structure for
- * regular HFS directories. The purpose is to delete an existing
- * directory, given the inode for the parent directory and the name
- * (and its length) of the existing directory.
+ * HFS does not have hardlinks, so both rmdir and unlink set the
+ * link count to 0. The only difference is the emptiness check.
*/
-static int hfs_rmdir(struct inode *dir, struct dentry *dentry)
+static int hfs_remove(struct inode *dir, struct dentry *dentry)
{
- struct inode *inode;
+ struct inode *inode = dentry->d_inode;
int res;
- inode = dentry->d_inode;
- if (inode->i_size != 2)
+ if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
return -ENOTEMPTY;
res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
if (res)
@@ -307,7 +283,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* Unlink destination if it already exists */
if (new_dentry->d_inode) {
- res = hfs_unlink(new_dir, new_dentry);
+ res = hfs_remove(new_dir, new_dentry);
if (res)
return res;
}
@@ -332,9 +308,9 @@ const struct file_operations hfs_dir_operations = {
const struct inode_operations hfs_dir_inode_operations = {
.create = hfs_create,
.lookup = hfs_lookup,
- .unlink = hfs_unlink,
+ .unlink = hfs_remove,
.mkdir = hfs_mkdir,
- .rmdir = hfs_rmdir,
+ .rmdir = hfs_remove,
.rename = hfs_rename,
.setattr = hfs_inode_setattr,
};
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f..0647d80accf 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -295,6 +295,20 @@ static void destroy_inode(struct inode *inode)
call_rcu(&inode->i_rcu, i_callback);
}
+void address_space_init_once(struct address_space *mapping)
+{
+ memset(mapping, 0, sizeof(*mapping));
+ INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
+ spin_lock_init(&mapping->tree_lock);
+ spin_lock_init(&mapping->i_mmap_lock);
+ INIT_LIST_HEAD(&mapping->private_list);
+ spin_lock_init(&mapping->private_lock);
+ INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
+ INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+ mutex_init(&mapping->unmap_mutex);
+}
+EXPORT_SYMBOL(address_space_init_once);
+
/*
* These are initializations that only need to be done
* once, because the fields are idempotent across use
@@ -308,13 +322,7 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->i_devices);
INIT_LIST_HEAD(&inode->i_wb_list);
INIT_LIST_HEAD(&inode->i_lru);
- INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
- spin_lock_init(&inode->i_data.tree_lock);
- spin_lock_init(&inode->i_data.i_mmap_lock);
- INIT_LIST_HEAD(&inode->i_data.private_list);
- spin_lock_init(&inode->i_data.private_lock);
- INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
- INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
+ address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
@@ -540,11 +548,14 @@ void evict_inodes(struct super_block *sb)
/**
* invalidate_inodes - attempt to free all inodes on a superblock
* @sb: superblock to operate on
+ * @kill_dirty: flag to guide handling of dirty inodes
*
* Attempts to free all inodes for a given superblock. If there were any
* busy inodes return a non-zero value, else zero.
+ * If @kill_dirty is set, discard dirty inodes too, otherwise treat
+ * them as busy.
*/
-int invalidate_inodes(struct super_block *sb)
+int invalidate_inodes(struct super_block *sb, bool kill_dirty)
{
int busy = 0;
struct inode *inode, *next;
@@ -556,6 +567,10 @@ int invalidate_inodes(struct super_block *sb)
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
continue;
+ if (inode->i_state & I_DIRTY && !kill_dirty) {
+ busy = 1;
+ continue;
+ }
if (atomic_read(&inode->i_count)) {
busy = 1;
continue;
diff --git a/fs/internal.h b/fs/internal.h
index 0663568b124..9b976b57d7f 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -112,4 +112,4 @@ extern void release_open_intent(struct nameidata *);
*/
extern int get_nr_dirty_inodes(void);
extern void evict_inodes(struct super_block *);
-extern int invalidate_inodes(struct super_block *);
+extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 81ead850ddb..5a2b269428a 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1600,7 +1600,7 @@ out:
static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/*
* This is not negative dentry. Always valid.
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ce7337ddfdb..6e6777f1b4b 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,7 +213,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
new_de = minix_find_entry(new_dentry, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
minix_set_link(new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -225,18 +224,15 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= info->s_link_max)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = minix_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
minix_delete_entry(old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
minix_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/namei.c b/fs/namei.c
index 0087cf9c2c6..a4689eb2df2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1546,6 +1546,7 @@ static int path_walk(const char *name, struct nameidata *nd)
/* nd->path had been dropped */
current->total_link_count = 0;
nd->path = save;
+ nd->inode = save.dentry->d_inode;
path_get(&nd->path);
nd->flags |= LOOKUP_REVAL;
result = link_path_walk(name, nd);
@@ -2455,22 +2456,29 @@ struct file *do_filp_open(int dfd, const char *pathname,
/* !O_CREAT, simple open */
error = do_path_lookup(dfd, pathname, flags, &nd);
if (unlikely(error))
- goto out_filp;
+ goto out_filp2;
error = -ELOOP;
if (!(nd.flags & LOOKUP_FOLLOW)) {
if (nd.inode->i_op->follow_link)
- goto out_path;
+ goto out_path2;
}
error = -ENOTDIR;
if (nd.flags & LOOKUP_DIRECTORY) {
if (!nd.inode->i_op->lookup)
- goto out_path;
+ goto out_path2;
}
audit_inode(pathname, nd.path.dentry);
filp = finish_open(&nd, open_flag, acc_mode);
+out2:
release_open_intent(&nd);
return filp;
+out_path2:
+ path_put(&nd.path);
+out_filp2:
+ filp = ERR_PTR(error);
+ goto out2;
+
creat:
/* OK, have to create the file. Find the parent. */
error = path_init_rcu(dfd, pathname,
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b0b9537169..d1edf26025d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1244,7 +1244,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
*/
br_write_lock(vfsmount_lock);
if (mnt_get_count(mnt) != 2) {
- br_write_lock(vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
return -EBUSY;
}
br_write_unlock(vfsmount_lock);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cc600e77bb..2f8e61816d7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
#include <linux/inet.h>
#include <linux/nfs_xdr.h>
#include <linux/slab.h>
+#include <linux/compat.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
*/
u64 nfs_compat_user_ino64(u64 fileid)
{
- int ino;
+#ifdef CONFIG_COMPAT
+ compat_ulong_t ino;
+#else
+ unsigned long ino;
+#endif
if (enable_ino64)
return fileid;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a747407314..1be36cf65bf 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -298,6 +298,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
#if defined(CONFIG_NFS_V4_1)
struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
+extern void nfs4_schedule_session_recovery(struct nfs4_session *);
+#else
+static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
+{
+}
#endif /* CONFIG_NFS_V4_1 */
extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +312,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
-extern void nfs4_schedule_state_recovery(struct nfs_client *);
+extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern void nfs4_schedule_state_manager(struct nfs_client *);
-extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
-extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
+extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f5c9b125e8c..b73c34375f6 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -219,6 +219,10 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
goto out_err;
}
buf = kmalloc(rlen + 1, GFP_KERNEL);
+ if (!buf) {
+ dprintk("%s: Not enough memory\n", __func__);
+ goto out_err;
+ }
buf[rlen] = '\0';
memcpy(buf, r_addr, rlen);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78936a8f40a..0a07e353a96 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -256,12 +256,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_state_mark_reclaim_nograce(clp, state);
- goto do_state_recovery;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
- goto do_state_recovery;
+ nfs4_schedule_lease_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -272,7 +273,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR: %d Reset session\n", __func__,
errorcode);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_session_recovery(clp->cl_session);
exception->retry = 1;
break;
#endif /* defined(CONFIG_NFS_V4_1) */
@@ -295,8 +296,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
}
/* We failed to handle the error */
return nfs4_map_errors(ret);
-do_state_recovery:
- nfs4_schedule_state_recovery(clp);
+wait_on_recovery:
ret = nfs4_wait_clnt_recover(clp);
if (ret == 0)
exception->retry = 1;
@@ -435,8 +435,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
clp = res->sr_session->clp;
do_renew_lease(clp, timestamp);
/* Check sequence flags */
- if (atomic_read(&clp->cl_count) > 1)
- nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
+ if (res->sr_status_flags != 0)
+ nfs4_schedule_lease_recovery(clp);
break;
case -NFS4ERR_DELAY:
/* The server detected a resend of the RPC call and
@@ -1255,14 +1255,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_state_recovery(
- server->nfs_client);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session);
goto out;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
/* Don't recall a delegation if it was lost */
- nfs4_schedule_state_recovery(server->nfs_client);
+ nfs4_schedule_lease_recovery(server->nfs_client);
goto out;
case -ERESTARTSYS:
/*
@@ -1271,7 +1270,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
*/
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+ nfs4_schedule_stateid_recovery(server, state);
case -EKEYEXPIRED:
/*
* User RPCSEC_GSS context has expired.
@@ -1587,7 +1586,7 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
!test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
break;
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
ret = -EIO;
}
return ret;
@@ -3178,7 +3177,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
if (task->tk_status < 0) {
/* Unless we're shutting down, schedule state recovery! */
if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
return;
}
do_renew_lease(clp, timestamp);
@@ -3252,6 +3251,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
}
}
+static int buf_to_pages_noslab(const void *buf, size_t buflen,
+ struct page **pages, unsigned int *pgbase)
+{
+ struct page *newpage, **spages;
+ int rc = 0;
+ size_t len;
+ spages = pages;
+
+ do {
+ len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
+ newpage = alloc_page(GFP_KERNEL);
+
+ if (newpage == NULL)
+ goto unwind;
+ memcpy(page_address(newpage), buf, len);
+ buf += len;
+ buflen -= len;
+ *pages++ = newpage;
+ rc++;
+ } while (buflen != 0);
+
+ return rc;
+
+unwind:
+ for(; rc > 0; rc--)
+ __free_page(spages[rc-1]);
+ return -ENOMEM;
+}
+
struct nfs4_cached_acl {
int cached;
size_t len;
@@ -3420,13 +3448,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
.rpc_argp = &arg,
.rpc_resp = &res,
};
- int ret;
+ int ret, i;
if (!nfs4_server_supports_acls(server))
return -EOPNOTSUPP;
+ i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
+ if (i < 0)
+ return i;
nfs_inode_return_delegation(inode);
- buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
+
+ /*
+ * Free each page after tx, so the only ref left is
+ * held by the network stack
+ */
+ for (; i > 0; i--)
+ put_page(pages[i-1]);
+
/*
* Acl update can result in inode attribute update.
* so mark the attribute cache invalid.
@@ -3464,12 +3502,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_state_mark_reclaim_nograce(clp, state);
- goto do_state_recovery;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
- goto do_state_recovery;
+ nfs4_schedule_lease_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -3480,7 +3519,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_session_recovery(clp->cl_session);
task->tk_status = 0;
return -EAGAIN;
#endif /* CONFIG_NFS_V4_1 */
@@ -3497,9 +3536,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
}
task->tk_status = nfs4_map_errors(task->tk_status);
return 0;
-do_state_recovery:
+wait_on_recovery:
rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
- nfs4_schedule_state_recovery(clp);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
task->tk_status = 0;
@@ -4110,7 +4148,7 @@ static void nfs4_lock_release(void *calldata)
task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
data->arg.lock_seqid);
if (!IS_ERR(task))
- rpc_put_task(task);
+ rpc_put_task_async(task);
dprintk("%s: cancelling lock!\n", __func__);
} else
nfs_free_seqid(data->arg.lock_seqid);
@@ -4134,23 +4172,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
{
- struct nfs_client *clp = server->nfs_client;
- struct nfs4_state *state = lsp->ls_state;
-
switch (error) {
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_EXPIRED:
+ lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
if (new_lock_owner != 0 ||
(lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
- nfs4_state_mark_reclaim_nograce(clp, state);
- lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ nfs4_schedule_stateid_recovery(server, lsp->ls_state);
break;
case -NFS4ERR_STALE_STATEID:
- if (new_lock_owner != 0 ||
- (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
- nfs4_state_mark_reclaim_reboot(clp, state);
lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ case -NFS4ERR_EXPIRED:
+ nfs4_schedule_lease_recovery(server->nfs_client);
};
}
@@ -4366,12 +4399,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_EXPIRED:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
+ nfs4_schedule_lease_recovery(server->nfs_client);
+ goto out;
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_state_recovery(server->nfs_client);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session);
goto out;
case -ERESTARTSYS:
/*
@@ -4381,7 +4416,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OPENMODE:
- nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+ nfs4_schedule_stateid_recovery(server, state);
err = 0;
goto out;
case -EKEYEXPIRED:
@@ -4988,10 +5023,20 @@ int nfs4_proc_create_session(struct nfs_client *clp)
int status;
unsigned *ptr;
struct nfs4_session *session = clp->cl_session;
+ long timeout = 0;
+ int err;
dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
- status = _nfs4_proc_create_session(clp);
+ do {
+ status = _nfs4_proc_create_session(clp);
+ if (status == -NFS4ERR_DELAY) {
+ err = nfs4_delay(clp->cl_rpcclient, &timeout);
+ if (err)
+ status = err;
+ }
+ } while (status == -NFS4ERR_DELAY);
+
if (status)
goto out;
@@ -5100,7 +5145,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return -EAGAIN;
default:
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
}
return 0;
}
@@ -5187,7 +5232,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
if (IS_ERR(task))
ret = PTR_ERR(task);
else
- rpc_put_task(task);
+ rpc_put_task_async(task);
dprintk("<-- %s status=%d\n", __func__, ret);
return ret;
}
@@ -5203,8 +5248,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
goto out;
}
ret = rpc_wait_for_completion_task(task);
- if (!ret)
+ if (!ret) {
+ struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
+
+ if (task->tk_status == 0)
+ nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
ret = task->tk_status;
+ }
rpc_put_task(task);
out:
dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5241,7 +5291,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return -EAGAIN;
default:
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
}
return 0;
}
@@ -5309,6 +5359,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
status = PTR_ERR(task);
goto out;
}
+ status = nfs4_wait_for_completion_rpc_task(task);
+ if (status == 0)
+ status = task->tk_status;
rpc_put_task(task);
return 0;
out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e6742b57a04..0592288f9f0 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1007,9 +1007,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
}
/*
- * Schedule a state recovery attempt
+ * Schedule a lease recovery attempt
*/
-void nfs4_schedule_state_recovery(struct nfs_client *clp)
+void nfs4_schedule_lease_recovery(struct nfs_client *clp)
{
if (!clp)
return;
@@ -1018,7 +1018,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
nfs4_schedule_state_manager(clp);
}
-int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
+static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1032,7 +1032,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
return 1;
}
-int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
+static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1041,6 +1041,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
return 1;
}
+void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ nfs4_state_mark_reclaim_nograce(clp, state);
+ nfs4_schedule_state_manager(clp);
+}
+
static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
{
struct inode *inode = state->inode;
@@ -1436,10 +1444,15 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
}
#ifdef CONFIG_NFS_V4_1
+void nfs4_schedule_session_recovery(struct nfs4_session *session)
+{
+ nfs4_schedule_lease_recovery(session->clp);
+}
+
void nfs41_handle_recall_slot(struct nfs_client *clp)
{
set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1447,7 +1460,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
clp->cl_boot_time = CURRENT_TIME;
nfs4_state_start_reclaim_nograce(clp);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
}
@@ -1455,7 +1468,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
{
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
nfs4_state_start_reclaim_reboot(clp);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
}
@@ -1475,7 +1488,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
{
nfs_expire_all_delegations(clp);
if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e2c168b6ee..94d50e86a12 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1660,7 +1660,7 @@ static void encode_create_session(struct xdr_stream *xdr,
p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
*p++ = cpu_to_be32(OP_CREATE_SESSION);
- p = xdr_encode_hyper(p, clp->cl_ex_clid);
+ p = xdr_encode_hyper(p, clp->cl_clientid);
*p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
*p++ = cpu_to_be32(args->flags); /*flags */
@@ -4694,7 +4694,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
goto out_overflow;
- xdr_decode_hyper(p, &clp->cl_ex_clid);
+ xdr_decode_hyper(p, &clp->cl_clientid);
p = xdr_inline_decode(xdr, 12);
if (unlikely(!p))
goto out_overflow;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a2002..c541093a5bf 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
/* Default path we try to mount. "%s" gets replaced by our IP address */
#define NFS_ROOT "/tftpboot/%s"
+/* Default NFSROOT mount options. */
+#define NFS_DEF_OPTIONS "udp"
+
/* Parameters passed from the kernel command line */
static char nfs_root_parms[256] __initdata = "";
/* Text-based mount options passed to super.c */
-static char nfs_root_options[256] __initdata = "";
+static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
/* Address of NFS server */
static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
}
static int __init root_nfs_cat(char *dest, const char *src,
- const size_t destlen)
+ const size_t destlen)
{
+ size_t len = strlen(dest);
+
+ if (len && dest[len - 1] != ',')
+ if (strlcat(dest, ",", destlen) > destlen)
+ return -1;
+
if (strlcat(dest, src, destlen) > destlen)
return -1;
return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
if (root_nfs_cat(nfs_root_options, incoming,
sizeof(nfs_root_options)))
return -1;
-
- /*
- * Possibly prepare for more options to be appended
- */
- if (nfs_root_options[0] != '\0' &&
- nfs_root_options[strlen(nfs_root_options)] != ',')
- if (root_nfs_cat(nfs_root_options, ",",
- sizeof(nfs_root_options)))
- return -1;
-
return 0;
}
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
*/
static int __init root_nfs_data(char *cmdline)
{
- char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
+ char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
int len, retval = -1;
char *tmp = NULL;
const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
* Append mandatory options for nfsroot so they override
* what has come before
*/
- snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4",
+ snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
&servaddr);
- if (root_nfs_cat(nfs_root_options, addr_option,
+ if (root_nfs_cat(nfs_root_options, mand_options,
sizeof(nfs_root_options)))
goto out_optionstoolong;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51acdd..6481d537d69 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -180,7 +180,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
task_setup_data.rpc_client = NFS_CLIENT(dir);
task = rpc_run_task(&task_setup_data);
if (!IS_ERR(task))
- rpc_put_task(task);
+ rpc_put_task_async(task);
return 1;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046c..42b92d7a9cc 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1292,6 +1292,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
+ if (how & FLUSH_SYNC)
+ rpc_wait_for_completion_task(task);
rpc_put_task(task);
return 0;
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index cde36cb0f34..02eb4edf0ec 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -432,7 +432,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
*/
- p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4);
+ p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 54b60bfceb8..7b566ec14e1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2445,15 +2445,16 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
static struct nfs4_delegation *
find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
{
- struct nfs4_delegation *dp = NULL;
+ struct nfs4_delegation *dp;
spin_lock(&recall_lock);
- list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
- if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
- break;
- }
+ list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
+ if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) {
+ spin_unlock(&recall_lock);
+ return dp;
+ }
spin_unlock(&recall_lock);
- return dp;
+ return NULL;
}
int share_access_to_flags(u32 share_access)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1275b865507..615f0a9f060 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
u32 dummy;
char *machine_name;
- int i;
+ int i, j;
int nr_secflavs;
READ_BUF(16);
@@ -1215,7 +1215,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
READ_BUF(4);
READ32(dummy);
READ_BUF(dummy * 4);
- for (i = 0; i < dummy; ++i)
+ for (j = 0; j < dummy; ++j)
READ32(dummy);
break;
case RPC_AUTH_GSS:
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 388e9e8f528..85f7baa15f5 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -35,11 +35,6 @@
#include "btnode.h"
-void nilfs_btnode_cache_init_once(struct address_space *btnc)
-{
- nilfs_mapping_init_once(btnc);
-}
-
static const struct address_space_operations def_btnode_aops = {
.sync_page = block_sync_page,
};
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 79037494f1e..1b8ebd888c2 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
struct buffer_head *newbh;
};
-void nilfs_btnode_cache_init_once(struct address_space *);
void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
void nilfs_btnode_cache_clear(struct address_space *);
struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 6a0e2a189f6..a0babd2bff6 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -454,9 +454,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
struct backing_dev_info *bdi = inode->i_sb->s_bdi;
INIT_LIST_HEAD(&shadow->frozen_buffers);
- nilfs_mapping_init_once(&shadow->frozen_data);
+ address_space_init_once(&shadow->frozen_data);
nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
- nilfs_mapping_init_once(&shadow->frozen_btnodes);
+ address_space_init_once(&shadow->frozen_btnodes);
nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
mi->mi_shadow = shadow;
return 0;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 98034271cd0..161791d2645 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -397,7 +397,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
- inc_nlink(old_inode);
nilfs_set_link(new_dir, new_de, new_page, old_inode);
nilfs_mark_inode_dirty(new_dir);
new_inode->i_ctime = CURRENT_TIME;
@@ -411,13 +410,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= NILFS_LINK_MAX)
goto out_dir;
}
- inc_nlink(old_inode);
err = nilfs_add_link(new_dentry, old_inode);
- if (err) {
- drop_nlink(old_inode);
- nilfs_mark_inode_dirty(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de) {
inc_nlink(new_dir);
nilfs_mark_inode_dirty(new_dir);
@@ -431,7 +426,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode->i_ctime = CURRENT_TIME;
nilfs_delete_entry(old_de, old_page);
- drop_nlink(old_inode);
if (dir_de) {
nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0c432416cfe..a585b35fd6b 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
return nc;
}
-void nilfs_mapping_init_once(struct address_space *mapping)
-{
- memset(mapping, 0, sizeof(*mapping));
- INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
- spin_lock_init(&mapping->tree_lock);
- INIT_LIST_HEAD(&mapping->private_list);
- spin_lock_init(&mapping->private_lock);
-
- spin_lock_init(&mapping->i_mmap_lock);
- INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
- INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
-}
-
void nilfs_mapping_init(struct address_space *mapping,
struct backing_dev_info *bdi,
const struct address_space_operations *aops)
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 622df27cd89..2a00953ebd5 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
void nilfs_copy_back_pages(struct address_space *, struct address_space *);
void nilfs_clear_dirty_pages(struct address_space *);
-void nilfs_mapping_init_once(struct address_space *mapping);
void nilfs_mapping_init(struct address_space *mapping,
struct backing_dev_info *bdi,
const struct address_space_operations *aops);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55ebae5c7f3..2de9f636792 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -430,7 +430,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
nilfs_segctor_map_segsum_entry(
sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
- if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
+ if (NILFS_I(inode)->i_root &&
+ !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
/* skip finfo */
}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 58fd707174e..1673b3d9984 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1279,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
#ifdef CONFIG_NILFS_XATTR
init_rwsem(&ii->xattr_sem);
#endif
- nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
+ address_space_init_once(&ii->i_btnode_cache);
ii->i_bmap = &ii->i_bmap_data;
inode_init_once(&ii->vfs_inode);
}
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 6d80ecc7834..7eb90403fc8 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -56,7 +56,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
int ret = 0; /* if all else fails, just return false */
struct ocfs2_super *osb;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = dentry->d_inode;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 43e56b97f9c..6180da1e37e 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -405,9 +405,9 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
ocfs2_quota_trans_credits(sb);
}
-/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
- * bitmap block for the new bit) dx_root update for free list */
-#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1)
+/* data block for new dir/symlink, allocation of directory block, dx_root
+ * update for free list */
+#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + OCFS2_SUBALLOC_ALLOC + 1)
static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
{
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index b5f9160e93e..19ebc5aad39 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3228,7 +3228,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
u32 num_clusters, unsigned int e_flags)
{
int ret, delete, index, credits = 0;
- u32 new_bit, new_len;
+ u32 new_bit, new_len, orig_num_clusters;
unsigned int set_len;
struct ocfs2_super *osb = OCFS2_SB(sb);
handle_t *handle;
@@ -3261,6 +3261,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
goto out;
}
+ orig_num_clusters = num_clusters;
+
while (num_clusters) {
ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
p_cluster, num_clusters,
@@ -3348,7 +3350,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
* in write-back mode.
*/
if (context->get_clusters == ocfs2_di_get_clusters) {
- ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
+ ret = ocfs2_cow_sync_writeback(sb, context, cpos,
+ orig_num_clusters);
if (ret)
mlog_errno(ret);
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 38f986d2447..36c423fb063 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1316,7 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb,
struct mount_options *mopt,
int is_remount)
{
- int status;
+ int status, user_stack = 0;
char *p;
u32 tmp;
@@ -1459,6 +1459,15 @@ static int ocfs2_parse_options(struct super_block *sb,
memcpy(mopt->cluster_stack, args[0].from,
OCFS2_STACK_LABEL_LEN);
mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
+ /*
+ * Open code the memcmp here as we don't have
+ * an osb to pass to
+ * ocfs2_userspace_stack().
+ */
+ if (memcmp(mopt->cluster_stack,
+ OCFS2_CLASSIC_CLUSTER_STACK,
+ OCFS2_STACK_LABEL_LEN))
+ user_stack = 1;
break;
case Opt_inode64:
mopt->mount_opt |= OCFS2_MOUNT_INODE64;
@@ -1514,13 +1523,16 @@ static int ocfs2_parse_options(struct super_block *sb,
}
}
- /* Ensure only one heartbeat mode */
- tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
- OCFS2_MOUNT_HB_NONE);
- if (hweight32(tmp) != 1) {
- mlog(ML_ERROR, "Invalid heartbeat mount options\n");
- status = 0;
- goto bail;
+ if (user_stack == 0) {
+ /* Ensure only one heartbeat mode */
+ tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
+ OCFS2_MOUNT_HB_GLOBAL |
+ OCFS2_MOUNT_HB_NONE);
+ if (hweight32(tmp) != 1) {
+ mlog(ML_ERROR, "Invalid heartbeat mount options\n");
+ status = 0;
+ goto bail;
+ }
}
status = 1;
diff --git a/fs/open.c b/fs/open.c
index 5a2c6ebc22b..b47aab39c05 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -233,6 +233,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
+
+ /* It's not possible punch hole on append only file */
+ if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
+ return -EPERM;
+
+ if (IS_IMMUTABLE(inode))
+ return -EPERM;
+
/*
* Revalidate the write permissions, in case security policy has
* changed since the files were opened.
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 789c625c7aa..b10e3540d5b 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -251,6 +251,11 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
}
vm->vblk_size = get_unaligned_be32(data + 0x08);
+ if (vm->vblk_size == 0) {
+ ldm_error ("Illegal VBLK size");
+ return false;
+ }
+
vm->vblk_offset = get_unaligned_be32(data + 0x0C);
vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index 48cec7cbca1..be03a0b08b4 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,10 +10,13 @@
#include "check.h"
#include "osf.h"
+#define MAX_OSF_PARTITIONS 8
+
int osf_partition(struct parsed_partitions *state)
{
int i;
int slot = 1;
+ unsigned int npartitions;
Sector sect;
unsigned char *data;
struct disklabel {
@@ -45,7 +48,7 @@ int osf_partition(struct parsed_partitions *state)
u8 p_fstype;
u8 p_frag;
__le16 p_cpg;
- } d_partitions[8];
+ } d_partitions[MAX_OSF_PARTITIONS];
} * label;
struct d_partition * partition;
@@ -63,7 +66,12 @@ int osf_partition(struct parsed_partitions *state)
put_dev_sector(sect);
return 0;
}
- for (i = 0 ; i < le16_to_cpu(label->d_npartitions); i++, partition++) {
+ npartitions = le16_to_cpu(label->d_npartitions);
+ if (npartitions > MAX_OSF_PARTITIONS) {
+ put_dev_sector(sect);
+ return 0;
+ }
+ for (i = 0 ; i < npartitions; i++, partition++) {
if (slot == state->limit)
break;
if (le32_to_cpu(partition->p_size))
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9d096e82b20..d49c4b5d2c3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2620,35 +2620,6 @@ static const struct pid_entry proc_base_stuff[] = {
&proc_self_inode_operations, NULL, {}),
};
-/*
- * Exceptional case: normally we are not allowed to unhash a busy
- * directory. In this case, however, we can do it - no aliasing problems
- * due to the way we treat inodes.
- */
-static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
- struct inode *inode;
- struct task_struct *task;
-
- if (nd->flags & LOOKUP_RCU)
- return -ECHILD;
-
- inode = dentry->d_inode;
- task = get_proc_task(inode);
- if (task) {
- put_task_struct(task);
- return 1;
- }
- d_drop(dentry);
- return 0;
-}
-
-static const struct dentry_operations proc_base_dentry_operations =
-{
- .d_revalidate = proc_base_revalidate,
- .d_delete = pid_delete_dentry,
-};
-
static struct dentry *proc_base_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
@@ -2685,7 +2656,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
if (p->fop)
inode->i_fop = p->fop;
ei->op = p->op;
- d_set_d_op(dentry, &proc_base_dentry_operations);
d_add(dentry, inode);
error = NULL;
out:
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 176ce4cda68..d6a7ca1fdac 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -27,6 +27,7 @@
static void proc_evict_inode(struct inode *inode)
{
struct proc_dir_entry *de;
+ struct ctl_table_header *head;
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
@@ -38,8 +39,11 @@ static void proc_evict_inode(struct inode *inode)
de = PROC_I(inode)->pde;
if (de)
pde_put(de);
- if (PROC_I(inode)->sysctl)
- sysctl_head_put(PROC_I(inode)->sysctl);
+ head = PROC_I(inode)->sysctl;
+ if (head) {
+ rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
+ sysctl_head_put(head);
+ }
}
struct vfsmount *proc_mnt;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d9396a4fc7f..927cbd115e5 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -233,7 +233,7 @@ void __init proc_device_tree_init(void)
return;
root = of_find_node_by_path("/");
if (root == NULL) {
- printk(KERN_ERR "/proc/device-tree: can't find root\n");
+ pr_debug("/proc/device-tree: can't find root\n");
return;
}
proc_device_tree_add_node(root, proc_device_tree);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92a34e..8eb2522111c 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -408,15 +408,18 @@ static int proc_sys_compare(const struct dentry *parent,
const struct dentry *dentry, const struct inode *inode,
unsigned int len, const char *str, const struct qstr *name)
{
+ struct ctl_table_header *head;
/* Although proc doesn't have negative dentries, rcu-walk means
* that inode here can be NULL */
+ /* AV: can it, indeed? */
if (!inode)
- return 0;
+ return 1;
if (name->len != len)
return 1;
if (memcmp(name->name, str, len))
return 1;
- return !sysctl_is_seen(PROC_I(inode)->sysctl);
+ head = rcu_dereference(PROC_I(inode)->sysctl);
+ return !head || !sysctl_is_seen(head);
}
static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ba5f51ec345..68fdf45cc6c 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -771,7 +771,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
dentry, inode, &security);
if (retval) {
- dir->i_nlink--;
+ DEC_DIR_INODE_NLINK(dir)
goto out_failed;
}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 3cfb2e93364..5c11ca82b78 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -978,8 +978,6 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
- return -ECHILD;
return -EPERM;
}
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b427b1208c2..e474fbcf8bd 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -245,7 +245,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
new_de = sysv_find_entry(new_dentry, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
sysv_set_link(new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -257,18 +256,15 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = sysv_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
sysv_delete_entry(old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
sysv_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2be0f9eb86d..b7c338d5e9d 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,8 @@
#include <linux/crc-itu-t.h>
#include <linux/exportfs.h>
+enum { UDF_MAX_LINKS = 0xffff };
+
static inline int udf_match(int len1, const unsigned char *name1, int len2,
const unsigned char *name2)
{
@@ -650,7 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct udf_inode_info *iinfo;
err = -EMLINK;
- if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
+ if (dir->i_nlink >= UDF_MAX_LINKS)
goto out;
err = -EIO;
@@ -1034,9 +1036,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
struct fileIdentDesc cfi, *fi;
int err;
- if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
+ if (inode->i_nlink >= UDF_MAX_LINKS)
return -EMLINK;
- }
fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
if (!fi) {
@@ -1131,9 +1132,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
goto end_rename;
retval = -EMLINK;
- if (!new_inode &&
- new_dir->i_nlink >=
- (256 << sizeof(new_dir->i_nlink)) - 1)
+ if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
goto end_rename;
}
if (!nfi) {
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 12f39b9e443..d6f681535eb 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -306,7 +306,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
ufs_set_link(new_dir, new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -318,12 +317,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= UFS_LINK_MAX)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = ufs_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
@@ -331,12 +327,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
/*
* Like most other Unix systems, set the ctime for inodes on a
* rename.
- * inode_dec_link_count() will mark the inode dirty.
*/
old_inode->i_ctime = CURRENT_TIME_SEC;
ufs_delete_entry(old_dir, old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
ufs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 05201ae719e..d61611c8801 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -152,6 +152,8 @@ xfs_ioc_trim(
if (!capable(CAP_SYS_ADMIN))
return -XFS_ERROR(EPERM);
+ if (!blk_queue_discard(q))
+ return -XFS_ERROR(EOPNOTSUPP);
if (copy_from_user(&range, urange, sizeof(range)))
return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f5e2a19e0f8..0ca0e3c024d 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -695,14 +695,19 @@ xfs_ioc_fsgeometry_v1(
xfs_mount_t *mp,
void __user *arg)
{
- xfs_fsop_geom_v1_t fsgeo;
+ xfs_fsop_geom_t fsgeo;
int error;
- error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3);
+ error = xfs_fs_geometry(mp, &fsgeo, 3);
if (error)
return -error;
- if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+ /*
+ * Caller should have passed an argument of type
+ * xfs_fsop_geom_v1_t. This is a proper subset of the
+ * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
+ */
+ if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
return -XFS_ERROR(EFAULT);
return 0;
}
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cec89dd5d7d..85668efb3e3 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -53,6 +53,9 @@ xfs_fs_geometry(
xfs_fsop_geom_t *geo,
int new_version)
{
+
+ memset(geo, 0, sizeof(*geo));
+
geo->blocksize = mp->m_sb.sb_blocksize;
geo->rtextsize = mp->m_sb.sb_rextsize;
geo->agblocks = mp->m_sb.sb_agblocks;