summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig10
-rw-r--r--fs/btrfs/extent-tree.c6
-rw-r--r--fs/btrfs/extent_map.c13
-rw-r--r--fs/btrfs/extent_map.h1
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c10
-rw-r--r--fs/btrfs/free-space-cache.c20
-rw-r--r--fs/btrfs/inode.c137
-rw-r--r--fs/btrfs/ioctl.c129
-rw-r--r--fs/btrfs/qgroup.c20
-rw-r--r--fs/btrfs/send.c4
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/transaction.c19
-rw-r--r--fs/btrfs/tree-log.c10
-rw-r--r--fs/btrfs/volumes.c23
-rw-r--r--fs/buffer.c1
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifsfs.c1
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/cifs/file.c141
-rw-r--r--fs/cifs/smb1ops.c8
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/debugfs/inode.c2
-rw-r--r--fs/ecryptfs/crypto.c2
-rw-r--r--fs/ecryptfs/kthread.c6
-rw-r--r--fs/ecryptfs/mmap.c12
-rw-r--r--fs/eventpoll.c22
-rw-r--r--fs/exec.c3
-rw-r--r--fs/ext4/Kconfig2
-rw-r--r--fs/ext4/extents.c22
-rw-r--r--fs/ext4/file.c8
-rw-r--r--fs/ext4/fsync.c2
-rw-r--r--fs/ext4/inode.c99
-rw-r--r--fs/ext4/namei.c6
-rw-r--r--fs/ext4/super.c30
-rw-r--r--fs/f2fs/acl.c14
-rw-r--r--fs/f2fs/checkpoint.c3
-rw-r--r--fs/f2fs/data.c18
-rw-r--r--fs/f2fs/debug.c50
-rw-r--r--fs/f2fs/dir.c18
-rw-r--r--fs/f2fs/f2fs.h20
-rw-r--r--fs/f2fs/file.c26
-rw-r--r--fs/f2fs/gc.c102
-rw-r--r--fs/f2fs/hash.c18
-rw-r--r--fs/f2fs/inode.c4
-rw-r--r--fs/f2fs/namei.c34
-rw-r--r--fs/f2fs/node.c56
-rw-r--r--fs/f2fs/recovery.c18
-rw-r--r--fs/f2fs/segment.c48
-rw-r--r--fs/f2fs/segment.h15
-rw-r--r--fs/f2fs/super.c112
-rw-r--r--fs/f2fs/xattr.c7
-rw-r--r--fs/file.c2
-rw-r--r--fs/fuse/Kconfig16
-rw-r--r--fs/fuse/cuse.c36
-rw-r--r--fs/fuse/dev.c5
-rw-r--r--fs/fuse/file.c5
-rw-r--r--fs/gfs2/lock_dlm.c1
-rw-r--r--fs/gfs2/rgrp.c35
-rw-r--r--fs/jbd/journal.c3
-rw-r--r--fs/jbd2/transaction.c30
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/dir.c16
-rw-r--r--fs/nfs/nfs4proc.c18
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/read.c10
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfs/write.c10
-rw-r--r--fs/proc/generic.c13
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/pstore/ram.c14
-rw-r--r--fs/pstore/ram_core.c9
-rw-r--r--fs/seq_file.c2
-rw-r--r--fs/splice.c4
-rw-r--r--fs/udf/super.c3
-rw-r--r--fs/xfs/xfs_buf.c12
-rw-r--r--fs/xfs/xfs_buf.h6
-rw-r--r--fs/xfs/xfs_buf_item.c49
-rw-r--r--fs/xfs/xfs_buf_item.h2
-rw-r--r--fs/xfs/xfs_dir2_block.c6
-rw-r--r--fs/xfs/xfs_qm_syscalls.c4
-rw-r--r--fs/xfs/xfs_trans_buf.c27
84 files changed, 1056 insertions, 642 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index cfe512fd1ca..780725a463b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -68,16 +68,6 @@ source "fs/quota/Kconfig"
source "fs/autofs4/Kconfig"
source "fs/fuse/Kconfig"
-config CUSE
- tristate "Character device in Userspace support"
- depends on FUSE_FS
- help
- This FUSE extension allows character devices to be
- implemented in userspace.
-
- If you want to develop or use userspace character device
- based on CUSE, answer Y or M.
-
config GENERIC_ACL
bool
select FS_POSIX_ACL
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 521e9d4424f..a8b8adc0507 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3997,7 +3997,7 @@ again:
* We make the other tasks wait for the flush only when we can flush
* all things.
*/
- if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) {
+ if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
flushing = true;
space_info->flush = 1;
}
@@ -5560,7 +5560,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
int empty_cluster = 2 * 1024 * 1024;
struct btrfs_space_info *space_info;
int loop = 0;
- int index = 0;
+ int index = __get_raid_index(data);
int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
bool found_uncached_bg = false;
@@ -6788,11 +6788,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
&wc->flags[level]);
if (ret < 0) {
btrfs_tree_unlock_rw(eb, path->locks[level]);
+ path->locks[level] = 0;
return ret;
}
BUG_ON(wc->refs[level] == 0);
if (wc->refs[level] == 1) {
btrfs_tree_unlock_rw(eb, path->locks[level]);
+ path->locks[level] = 0;
return 1;
}
}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index f169d6b11d7..2e8cae63d24 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -171,6 +171,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
return 0;
+ if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
+ test_bit(EXTENT_FLAG_LOGGING, &next->flags))
+ return 0;
+
if (extent_map_end(prev) == next->start &&
prev->flags == next->flags &&
prev->bdev == next->bdev &&
@@ -255,7 +259,8 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
if (!em)
goto out;
- list_move(&em->list, &tree->modified_extents);
+ if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
+ list_move(&em->list, &tree->modified_extents);
em->generation = gen;
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
em->mod_start = em->start;
@@ -280,6 +285,12 @@ out:
}
+void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
+{
+ clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
+ try_merge_map(tree, em);
+}
+
/**
* add_extent_mapping - add new extent map to the extent tree
* @tree: tree to insert new map in
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 922943ce29e..c6598c89cff 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -69,6 +69,7 @@ void free_extent_map(struct extent_map *em);
int __init extent_map_init(void);
void extent_map_exit(void);
int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
+void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len);
#endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index bd38cef4235..94aa53b3872 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -460,8 +460,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
if (!contig)
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
- if (!contig && (offset >= ordered->file_offset + ordered->len ||
- offset < ordered->file_offset)) {
+ if (offset >= ordered->file_offset + ordered->len ||
+ offset < ordered->file_offset) {
unsigned long bytes_left;
sums->len = this_sum_bytes;
this_sum_bytes = 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77061bf43ed..f76b1fd160d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2241,6 +2241,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
if (lockend <= lockstart)
lockend = lockstart + root->sectorsize;
+ lockend--;
len = lockend - lockstart + 1;
len = max_t(u64, len, root->sectorsize);
@@ -2307,9 +2308,12 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
}
}
- *offset = start;
- free_extent_map(em);
- break;
+ if (!test_bit(EXTENT_FLAG_PREALLOC,
+ &em->flags)) {
+ *offset = start;
+ free_extent_map(em);
+ break;
+ }
}
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 59ea2e4349c..0be7a8742a4 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1862,11 +1862,13 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *info;
- int ret = 0;
+ int ret;
+ bool re_search = false;
spin_lock(&ctl->tree_lock);
again:
+ ret = 0;
if (!bytes)
goto out_lock;
@@ -1879,17 +1881,17 @@ again:
info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1, 0);
if (!info) {
- /* the tree logging code might be calling us before we
- * have fully loaded the free space rbtree for this
- * block group. So it is possible the entry won't
- * be in the rbtree yet at all. The caching code
- * will make sure not to put it in the rbtree if
- * the logging code has pinned it.
+ /*
+ * If we found a partial bit of our free space in a
+ * bitmap but then couldn't find the other part this may
+ * be a problem, so WARN about it.
*/
+ WARN_ON(re_search);
goto out_lock;
}
}
+ re_search = false;
if (!info->bitmap) {
unlink_free_space(ctl, info);
if (offset == info->offset) {
@@ -1935,8 +1937,10 @@ again:
}
ret = remove_from_bitmap(ctl, info, &offset, &bytes);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN) {
+ re_search = true;
goto again;
+ }
BUG_ON(ret); /* logic error */
out_lock:
spin_unlock(&ctl->tree_lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 16d9e8e191e..cc93b23ca35 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,7 +88,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
};
-static int btrfs_setsize(struct inode *inode, loff_t newsize);
+static int btrfs_setsize(struct inode *inode, struct iattr *attr);
static int btrfs_truncate(struct inode *inode);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct inode *inode,
@@ -2478,6 +2478,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
continue;
}
nr_truncate++;
+
+ /* 1 for the orphan item deletion. */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+ ret = btrfs_orphan_add(trans, inode);
+ btrfs_end_transaction(trans, root);
+ if (ret)
+ goto out;
+
ret = btrfs_truncate(inode);
} else {
nr_unlink++;
@@ -3665,6 +3677,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
block_end - cur_offset, 0);
if (IS_ERR(em)) {
err = PTR_ERR(em);
+ em = NULL;
break;
}
last_byte = min(extent_map_end(em), block_end);
@@ -3748,16 +3761,27 @@ next:
return err;
}
-static int btrfs_setsize(struct inode *inode, loff_t newsize)
+static int btrfs_setsize(struct inode *inode, struct iattr *attr)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
loff_t oldsize = i_size_read(inode);
+ loff_t newsize = attr->ia_size;
+ int mask = attr->ia_valid;
int ret;
if (newsize == oldsize)
return 0;
+ /*
+ * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+ * special case where we need to update the times despite not having
+ * these flags set. For all other operations the VFS set these flags
+ * explicitly if it wants a timestamp update.
+ */
+ if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
+ inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
+
if (newsize > oldsize) {
truncate_pagecache(inode, oldsize, newsize);
ret = btrfs_cont_expand(inode, oldsize, newsize);
@@ -3783,9 +3807,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags);
+ /*
+ * 1 for the orphan item we're going to add
+ * 1 for the orphan item deletion.
+ */
+ trans = btrfs_start_transaction(root, 2);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ /*
+ * We need to do this in case we fail at _any_ point during the
+ * actual truncate. Once we do the truncate_setsize we could
+ * invalidate pages which forces any outstanding ordered io to
+ * be instantly completed which will give us extents that need
+ * to be truncated. If we fail to get an orphan inode down we
+ * could have left over extents that were never meant to live,
+ * so we need to garuntee from this point on that everything
+ * will be consistent.
+ */
+ ret = btrfs_orphan_add(trans, inode);
+ btrfs_end_transaction(trans, root);
+ if (ret)
+ return ret;
+
/* we don't support swapfiles, so vmtruncate shouldn't fail */
truncate_setsize(inode, newsize);
ret = btrfs_truncate(inode);
+ if (ret && inode->i_nlink)
+ btrfs_orphan_del(NULL, inode);
}
return ret;
@@ -3805,7 +3854,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
return err;
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
- err = btrfs_setsize(inode, attr->ia_size);
+ err = btrfs_setsize(inode, attr);
if (err)
return err;
}
@@ -5572,10 +5621,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
return em;
if (em) {
/*
- * if our em maps to a hole, there might
- * actually be delalloc bytes behind it
+ * if our em maps to
+ * - a hole or
+ * - a pre-alloc extent,
+ * there might actually be delalloc bytes behind it.
*/
- if (em->block_start != EXTENT_MAP_HOLE)
+ if (em->block_start != EXTENT_MAP_HOLE &&
+ !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
return em;
else
hole_em = em;
@@ -5657,6 +5709,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
*/
em->block_start = hole_em->block_start;
em->block_len = hole_len;
+ if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
+ set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
} else {
em->start = range_start;
em->len = found;
@@ -6915,11 +6969,9 @@ static int btrfs_truncate(struct inode *inode)
/*
* 1 for the truncate slack space
- * 1 for the orphan item we're going to add
- * 1 for the orphan item deletion
* 1 for updating the inode.
*/
- trans = btrfs_start_transaction(root, 4);
+ trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto out;
@@ -6930,12 +6982,6 @@ static int btrfs_truncate(struct inode *inode)
min_size);
BUG_ON(ret);
- ret = btrfs_orphan_add(trans, inode);
- if (ret) {
- btrfs_end_transaction(trans, root);
- goto out;
- }
-
/*
* setattr is responsible for setting the ordered_data_close flag,
* but that is only tested during the last file release. That
@@ -7004,12 +7050,6 @@ static int btrfs_truncate(struct inode *inode)
ret = btrfs_orphan_del(trans, inode);
if (ret)
err = ret;
- } else if (ret && inode->i_nlink > 0) {
- /*
- * Failed to do the truncate, remove us from the in memory
- * orphan list.
- */
- ret = btrfs_orphan_del(NULL, inode);
}
if (trans) {
@@ -7531,41 +7571,61 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
*/
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
{
- struct list_head *head = &root->fs_info->delalloc_inodes;
struct btrfs_inode *binode;
struct inode *inode;
struct btrfs_delalloc_work *work, *next;
struct list_head works;
+ struct list_head splice;
int ret = 0;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
INIT_LIST_HEAD(&works);
-
+ INIT_LIST_HEAD(&splice);
+again:
spin_lock(&root->fs_info->delalloc_lock);
- while (!list_empty(head)) {
- binode = list_entry(head->next, struct btrfs_inode,
+ list_splice_init(&root->fs_info->delalloc_inodes, &splice);
+ while (!list_empty(&splice)) {
+ binode = list_entry(splice.next, struct btrfs_inode,
delalloc_inodes);
+
+ list_del_init(&binode->delalloc_inodes);
+
inode = igrab(&binode->vfs_inode);
if (!inode)
- list_del_init(&binode->delalloc_inodes);
+ continue;
+
+ list_add_tail(&binode->delalloc_inodes,
+ &root->fs_info->delalloc_inodes);
spin_unlock(&root->fs_info->delalloc_lock);
- if (inode) {
- work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
- if (!work) {
- ret = -ENOMEM;
- goto out;
- }
- list_add_tail(&work->list, &works);
- btrfs_queue_worker(&root->fs_info->flush_workers,
- &work->work);
+
+ work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
+ if (unlikely(!work)) {
+ ret = -ENOMEM;
+ goto out;
}
+ list_add_tail(&work->list, &works);
+ btrfs_queue_worker(&root->fs_info->flush_workers,
+ &work->work);
+
cond_resched();
spin_lock(&root->fs_info->delalloc_lock);
}
spin_unlock(&root->fs_info->delalloc_lock);
+ list_for_each_entry_safe(work, next, &works, list) {
+ list_del_init(&work->list);
+ btrfs_wait_and_free_delalloc_work(work);
+ }
+
+ spin_lock(&root->fs_info->delalloc_lock);
+ if (!list_empty(&root->fs_info->delalloc_inodes)) {
+ spin_unlock(&root->fs_info->delalloc_lock);
+ goto again;
+ }
+ spin_unlock(&root->fs_info->delalloc_lock);
+
/* the filemap_flush will queue IO into the worker threads, but
* we have to make sure the IO is actually started and that
* ordered extents get created before we return
@@ -7578,11 +7638,18 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
atomic_read(&root->fs_info->async_delalloc_pages) == 0));
}
atomic_dec(&root->fs_info->async_submit_draining);
+ return 0;
out:
list_for_each_entry_safe(work, next, &works, list) {
list_del_init(&work->list);
btrfs_wait_and_free_delalloc_work(work);
}
+
+ if (!list_empty_careful(&splice)) {
+ spin_lock(&root->fs_info->delalloc_lock);
+ list_splice_tail(&splice, &root->fs_info->delalloc_inodes);
+ spin_unlock(&root->fs_info->delalloc_lock);
+ }
return ret;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4b4516770f0..5b22d45d3c6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1339,7 +1339,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
- return -EINPROGRESS;
+ mnt_drop_write_file(file);
+ return -EINVAL;
}
mutex_lock(&root->fs_info->volume_mutex);
@@ -1362,6 +1363,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
printk(KERN_INFO "btrfs: resizing devid %llu\n",
(unsigned long long)devid);
}
+
device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
if (!device) {
printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
@@ -1369,9 +1371,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
ret = -EINVAL;
goto out_free;
}
- if (device->fs_devices && device->fs_devices->seeding) {
+
+ if (!device->writeable) {
printk(KERN_INFO "btrfs: resizer unable to apply on "
- "seeding device %llu\n",
+ "readonly device %llu\n",
(unsigned long long)devid);
ret = -EINVAL;
goto out_free;
@@ -1443,8 +1446,8 @@ out_free:
kfree(vol_args);
out:
mutex_unlock(&root->fs_info->volume_mutex);
- mnt_drop_write_file(file);
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+ mnt_drop_write_file(file);
return ret;
}
@@ -2095,13 +2098,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
if (err)
goto out_dput;
-
- /* check if subvolume may be deleted by a non-root user */
- err = btrfs_may_delete(dir, dentry, 1);
- if (err)
- goto out_dput;
}
+ /* check if subvolume may be deleted by a user */
+ err = btrfs_may_delete(dir, dentry, 1);
+ if (err)
+ goto out_dput;
+
if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
err = -EINVAL;
goto out_dput;
@@ -2183,19 +2186,20 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
struct btrfs_ioctl_defrag_range_args *range;
int ret;
- if (btrfs_root_readonly(root))
- return -EROFS;
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
- return -EINPROGRESS;
+ mnt_drop_write_file(file);
+ return -EINVAL;
}
- ret = mnt_want_write_file(file);
- if (ret) {
- atomic_set(&root->fs_info->mutually_exclusive_operation_running,
- 0);
- return ret;
+
+ if (btrfs_root_readonly(root)) {
+ ret = -EROFS;
+ goto out;
}
switch (inode->i_mode & S_IFMT) {
@@ -2247,8 +2251,8 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
ret = -EINVAL;
}
out:
- mnt_drop_write_file(file);
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+ mnt_drop_write_file(file);
return ret;
}
@@ -2263,7 +2267,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
- return -EINPROGRESS;
+ return -EINVAL;
}
mutex_lock(&root->fs_info->volume_mutex);
@@ -2300,7 +2304,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
mnt_drop_write_file(file);
- return -EINPROGRESS;
+ return -EINVAL;
}
mutex_lock(&root->fs_info->volume_mutex);
@@ -2316,8 +2320,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
kfree(vol_args);
out:
mutex_unlock(&root->fs_info->volume_mutex);
- mnt_drop_write_file(file);
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+ mnt_drop_write_file(file);
return ret;
}
@@ -3437,8 +3441,8 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ioctl_balance_args *bargs;
struct btrfs_balance_control *bctl;
+ bool need_unlock; /* for mut. excl. ops lock */
int ret;
- int need_to_clear_lock = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -3447,14 +3451,61 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
if (ret)
return ret;
- mutex_lock(&fs_info->volume_mutex);
+again:
+ if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
+ mutex_lock(&fs_info->volume_mutex);
+ mutex_lock(&fs_info->balance_mutex);
+ need_unlock = true;
+ goto locked;
+ }
+
+ /*
+ * mut. excl. ops lock is locked. Three possibilites:
+ * (1) some other op is running
+ * (2) balance is running
+ * (3) balance is paused -- special case (think resume)
+ */
mutex_lock(&fs_info->balance_mutex);
+ if (fs_info->balance_ctl) {
+ /* this is either (2) or (3) */
+ if (!atomic_read(&fs_info->balance_running)) {
+ mutex_unlock(&fs_info->balance_mutex);
+ if (!mutex_trylock(&fs_info->volume_mutex))
+ goto again;
+ mutex_lock(&fs_info->balance_mutex);
+
+ if (fs_info->balance_ctl &&
+ !atomic_read(&fs_info->balance_running)) {
+ /* this is (3) */
+ need_unlock = false;
+ goto locked;
+ }
+
+ mutex_unlock(&fs_info->balance_mutex);
+ mutex_unlock(&fs_info->volume_mutex);
+ goto again;
+ } else {
+ /* this is (2) */
+ mutex_unlock(&fs_info->balance_mutex);
+ ret = -EINPROGRESS;
+ goto out;
+ }
+ } else {
+ /* this is (1) */
+ mutex_unlock(&fs_info->balance_mutex);
+ pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+locked:
+ BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running));
if (arg) {
bargs = memdup_user(arg, sizeof(*bargs));
if (IS_ERR(bargs)) {
ret = PTR_ERR(bargs);
- goto out;
+ goto out_unlock;
}
if (bargs->flags & BTRFS_BALANCE_RESUME) {
@@ -3474,13 +3525,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
bargs = NULL;
}
- if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
- 1)) {
- pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
+ if (fs_info->balance_ctl) {
ret = -EINPROGRESS;
goto out_bargs;
}
- need_to_clear_lock = 1;
bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
if (!bctl) {
@@ -3501,11 +3549,17 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
}
do_balance:
- ret = btrfs_balance(bctl, bargs);
/*
- * bctl is freed in __cancel_balance or in free_fs_info if
- * restriper was paused all the way until unmount
+ * Ownership of bctl and mutually_exclusive_operation_running
+ * goes to to btrfs_balance. bctl is freed in __cancel_balance,
+ * or, if restriper was paused all the way until unmount, in
+ * free_fs_info. mutually_exclusive_operation_running is
+ * cleared in __cancel_balance.
*/
+ need_unlock = false;
+
+ ret = btrfs_balance(bctl, bargs);
+
if (arg) {
if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT;
@@ -3513,12 +3567,12 @@ do_balance:
out_bargs:
kfree(bargs);
-out:
- if (need_to_clear_lock)
- atomic_set(&root->fs_info->mutually_exclusive_operation_running,
- 0);
+out_unlock:
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
+ if (need_unlock)
+ atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+out:
mnt_drop_write_file(file);
return ret;
}
@@ -3698,6 +3752,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
goto drop_write;
}
+ if (!sa->qgroupid) {
+ ret = -EINVAL;
+ goto out;
+ }
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index fe9d02c45f8..a5c85623432 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -379,6 +379,13 @@ next1:
ret = add_relation_rb(fs_info, found_key.objectid,
found_key.offset);
+ if (ret == -ENOENT) {
+ printk(KERN_WARNING
+ "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
+ (unsigned long long)found_key.objectid,
+ (unsigned long long)found_key.offset);
+ ret = 0; /* ignore the error */
+ }
if (ret)
goto out;
next2:
@@ -956,17 +963,28 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid)
{
struct btrfs_root *quota_root;
+ struct btrfs_qgroup *qgroup;
int ret = 0;
quota_root = fs_info->quota_root;
if (!quota_root)
return -EINVAL;
+ /* check if there are no relations to this qgroup */
+ spin_lock(&fs_info->qgroup_lock);
+ qgroup = find_qgroup_rb(fs_info, qgroupid);
+ if (qgroup) {
+ if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) {
+ spin_unlock(&fs_info->qgroup_lock);
+ return -EBUSY;
+ }
+ }
+ spin_unlock(&fs_info->qgroup_lock);
+
ret = del_qgroup_item(trans, quota_root, qgroupid);
spin_lock(&fs_info->qgroup_lock);
del_qgroup_rb(quota_root->fs_info, qgroupid);
-
spin_unlock(&fs_info->qgroup_lock);
return ret;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 54454542ad4..321b7fb4e44 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1814,8 +1814,10 @@ static int name_cache_insert(struct send_ctx *sctx,
(unsigned long)nce->ino);
if (!nce_head) {
nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
- if (!nce_head)
+ if (!nce_head) {
+ kfree(nce);
return -ENOMEM;
+ }
INIT_LIST_HEAD(nce_head);
ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 99545df1b86..d8982e9601d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -267,7 +267,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
function, line, errstr);
return;
}
- trans->transaction->aborted = errno;
+ ACCESS_ONCE(trans->transaction->aborted) = errno;
__btrfs_std_error(root->fs_info, function, line, errno, NULL);
}
/*
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 87fac9a21ea..f15494699f3 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1468,7 +1468,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
goto cleanup_transaction;
}
- if (cur_trans->aborted) {
+ /* Stop the commit early if ->aborted is set */
+ if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
goto cleanup_transaction;
}
@@ -1574,6 +1575,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1);
+ /* ->aborted might be set after the previous check, so check it */
+ if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+ ret = cur_trans->aborted;
+ goto cleanup_transaction;
+ }
/*
* the reloc mutex makes sure that we stop
* the balancing code from coming in and moving
@@ -1657,6 +1663,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
goto cleanup_transaction;
}
+ /*
+ * The tasks which save the space cache and inode cache may also
+ * update ->aborted, check it.
+ */
+ if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+ ret = cur_trans->aborted;
+ mutex_unlock(&root->fs_info->tree_log_mutex);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+ goto cleanup_transaction;
+ }
+
btrfs_prepare_extent_commit(trans, root);
cur_trans = root->fs_info->running_transaction;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 83186c7e45d..9027bb1e746 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3357,6 +3357,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
if (skip_csum)
return 0;
+ if (em->compress_type) {
+ csum_offset = 0;
+ csum_len = block_len;
+ }
+
/* block start is already adjusted for the file extent offset. */
ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
em->block_start + csum_offset,
@@ -3410,13 +3415,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
em = list_entry(extents.next, struct extent_map, list);
list_del_init(&em->list);
- clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
/*
* If we had an error we just need to delete everybody from our
* private list.
*/
if (ret) {
+ clear_em_logging(tree, em);
free_extent_map(em);
continue;
}
@@ -3424,8 +3429,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
write_unlock(&tree->lock);
ret = log_one_extent(trans, inode, root, em, path);
- free_extent_map(em);
write_lock(&tree->lock);
+ clear_em_logging(tree, em);
+ free_extent_map(em);
}
WARN_ON(!list_empty(&extents));
write_unlock(&tree->lock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5cce6aa7401..15f6efdf646 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1431,7 +1431,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
}
} else {
ret = btrfs_get_bdev_and_sb(device_path,
- FMODE_READ | FMODE_EXCL,
+ FMODE_WRITE | FMODE_EXCL,
root->fs_info->bdev_holder, 0,
&bdev, &bh);
if (ret)
@@ -2614,7 +2614,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
chunk_used = btrfs_block_group_used(&cache->item);
- user_thresh = div_factor_fine(cache->key.offset, bargs->usage);
+ if (bargs->usage == 0)
+ user_thresh = 0;
+ else if (bargs->usage > 100)
+ user_thresh = cache->key.offset;
+ else
+ user_thresh = div_factor_fine(cache->key.offset,
+ bargs->usage);
+
if (chunk_used < user_thresh)
ret = 0;
@@ -2959,6 +2966,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
unset_balance_control(fs_info);
ret = del_balance_item(fs_info->tree_root);
BUG_ON(ret);
+
+ atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
}
void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
@@ -3138,8 +3147,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
out:
if (bctl->flags & BTRFS_BALANCE_RESUME)
__cancel_balance(fs_info);
- else
+ else {
kfree(bctl);
+ atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+ }
return ret;
}
@@ -3156,7 +3167,6 @@ static int balance_kthread(void *data)
ret = btrfs_balance(fs_info->balance_ctl, NULL);
}
- atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
@@ -3179,7 +3189,6 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
return 0;
}
- WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
if (IS_ERR(tsk))
return PTR_ERR(tsk);
@@ -3233,6 +3242,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
btrfs_balance_sys(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
+ WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
+
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
@@ -3496,7 +3507,7 @@ struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
{ 1, 1, 2, 2, 2, 2 /* raid1 */ },
{ 1, 2, 1, 1, 1, 2 /* dup */ },
{ 1, 1, 0, 2, 1, 1 /* raid0 */ },
- { 1, 1, 0, 1, 1, 1 /* single */ },
+ { 1, 1, 1, 1, 1, 1 /* single */ },
};
static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
diff --git a/fs/buffer.c b/fs/buffer.c
index c017a2dfb90..7a75c3e0fd5 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2935,6 +2935,7 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
void *kaddr = kmap_atomic(bh->b_page);
memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
kunmap_atomic(kaddr);
+ flush_dcache_page(bh->b_page);
}
}
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index ce5cbd717bf..210fce2df30 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,6 +226,8 @@ compose_mount_options_out:
compose_mount_options_err:
kfree(mountdata);
mountdata = ERR_PTR(rc);
+ kfree(*devname);
+ *devname = NULL;
goto compose_mount_options_out;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f653835d067..de7f9168a11 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -228,7 +228,6 @@ cifs_alloc_inode(struct super_block *sb)
cifs_set_oplock_level(cifs_inode, 0);
cifs_inode->delete_pending = false;
cifs_inode->invalid_mapping = false;
- cifs_inode->leave_pages_clean = false;
cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
cifs_inode->server_eof = 0;
cifs_inode->uniqueid = 0;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index aea1eec6491..e6899cea1c3 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -386,6 +386,7 @@ struct smb_version_values {
unsigned int cap_unix;
unsigned int cap_nt_find;
unsigned int cap_large_files;
+ unsigned int oplock_read;
};
#define HEADER_SIZE(server) (server->vals->header_size)
@@ -1030,7 +1031,6 @@ struct cifsInodeInfo {
bool clientCanCacheAll; /* read and writebehind oplock */
bool delete_pending; /* DELETE_ON_CLOSE is set */
bool invalid_mapping; /* pagecache is invalid */
- bool leave_pages_clean; /* protected by i_mutex, not set pages dirty */
unsigned long time; /* jiffies of last update of inode */
u64 server_eof; /* current file size on server -- protected by i_lock */
u64 uniqueid; /* server inode number */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 17c3643e595..12b3da39733 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1917,7 +1917,7 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
}
case AF_INET6: {
struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr;
- struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)&rhs;
+ struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)rhs;
return ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr);
}
default:
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0a6677ba212..8ea6ca50a66 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -238,6 +238,23 @@ out:
return rc;
}
+static bool
+cifs_has_mand_locks(struct cifsInodeInfo *cinode)
+{
+ struct cifs_fid_locks *cur;
+ bool has_locks = false;
+
+ down_read(&cinode->lock_sem);
+ list_for_each_entry(cur, &cinode->llist, llist) {
+ if (!list_empty(&cur->locks)) {
+ has_locks = true;
+ break;
+ }
+ }
+ up_read(&cinode->lock_sem);
+ return has_locks;
+}
+
struct cifsFileInfo *
cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
struct tcon_link *tlink, __u32 oplock)
@@ -248,6 +265,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
struct cifsFileInfo *cfile;
struct cifs_fid_locks *fdlocks;
struct cifs_tcon *tcon = tlink_tcon(tlink);
+ struct TCP_Server_Info *server = tcon->ses->server;
cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
if (cfile == NULL)
@@ -276,12 +294,22 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
mutex_init(&cfile->fh_mutex);
+ /*
+ * If the server returned a read oplock and we have mandatory brlocks,
+ * set oplock level to None.
+ */
+ if (oplock == server->vals->oplock_read &&
+ cifs_has_mand_locks(cinode)) {
+ cFYI(1, "Reset oplock val from read to None due to mand locks");
+ oplock = 0;
+ }
+
spin_lock(&cifs_file_list_lock);
- if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE)
+ if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
oplock = fid->pending_open->oplock;
list_del(&fid->pending_open->olist);
- tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
+ server->ops->set_fid(cfile, fid, oplock);
list_add(&cfile->tlist, &tcon->openFileList);
/* if readable file instance put first in list*/
@@ -1422,6 +1450,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
+ struct inode *inode = cfile->dentry->d_inode;
if (posix_lck) {
int posix_lock_type;
@@ -1459,6 +1488,21 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
if (!rc)
goto out;
+ /*
+ * Windows 7 server can delay breaking lease from read to None
+ * if we set a byte-range lock on a file - break it explicitly
+ * before sending the lock to the server to be sure the next
+ * read won't conflict with non-overlapted locks due to
+ * pagereading.
+ */
+ if (!CIFS_I(inode)->clientCanCacheAll &&
+ CIFS_I(inode)->clientCanCacheRead) {
+ cifs_invalidate_mapping(inode);
+ cFYI(1, "Set no oplock for inode=%p due to mand locks",
+ inode);
+ CIFS_I(inode)->clientCanCacheRead = false;
+ }
+
rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
type, 1, 0, wait_flag);
if (rc) {
@@ -2103,15 +2147,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
} else {
rc = copied;
pos += copied;
- /*
- * When we use strict cache mode and cifs_strict_writev was run
- * with level II oplock (indicated by leave_pages_clean field of
- * CIFS_I(inode)), we can leave pages clean - cifs_strict_writev
- * sent the data to the server itself.
- */
- if (!CIFS_I(inode)->leave_pages_clean ||
- !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO))
- set_page_dirty(page);
+ set_page_dirty(page);
}
if (rc > 0) {
@@ -2462,8 +2498,8 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
}
static ssize_t
-cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos, bool cache_ex)
+cifs_writev(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
@@ -2485,12 +2521,8 @@ cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov,
server->vals->exclusive_lock_type, NULL,
CIFS_WRITE_OP)) {
mutex_lock(&inode->i_mutex);
- if (!cache_ex)
- cinode->leave_pages_clean = true;
rc = __generic_file_aio_write(iocb, iov, nr_segs,
- &iocb->ki_pos);
- if (!cache_ex)
- cinode->leave_pages_clean = false;
+ &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
}
@@ -2517,60 +2549,32 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
struct cifsFileInfo *cfile = (struct cifsFileInfo *)
iocb->ki_filp->private_data;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
- ssize_t written, written2;
- /*
- * We need to store clientCanCacheAll here to prevent race
- * conditions - this value can be changed during an execution
- * of generic_file_aio_write. For CIFS it can be changed from
- * true to false only, but for SMB2 it can be changed both from
- * true to false and vice versa. So, we can end up with a data
- * stored in the cache, not marked dirty and not sent to the
- * server if this value changes its state from false to true
- * after cifs_write_end.
- */
- bool cache_ex = cinode->clientCanCacheAll;
- bool cache_read = cinode->clientCanCacheRead;
- int rc;
- loff_t saved_pos;
+ ssize_t written;
- if (cache_ex) {
+ if (cinode->clientCanCacheAll) {
if (cap_unix(tcon->ses) &&
- ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) &&
- (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(
- tcon->fsUnixInfo.Capability)))
+ (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
+ && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
return generic_file_aio_write(iocb, iov, nr_segs, pos);
- return cifs_pagecache_writev(iocb, iov, nr_segs, pos, cache_ex);
+ return cifs_writev(iocb, iov, nr_segs, pos);
}
-
/*
- * For files without exclusive oplock in strict cache mode we need to
- * write the data to the server exactly from the pos to pos+len-1 rather
- * than flush all affected pages because it may cause a error with
- * mandatory locks on these pages but not on the region from pos to
- * ppos+len-1.
+ * For non-oplocked files in strict cache mode we need to write the data
+ * to the server exactly from the pos to pos+len-1 rather than flush all
+ * affected pages because it may cause a error with mandatory locks on
+ * these pages but not on the region from pos to ppos+len-1.
*/
written = cifs_user_writev(iocb, iov, nr_segs, pos);
- if (!cache_read || written <= 0)
- return written;
-
- saved_pos = iocb->ki_pos;
- iocb->ki_pos = pos;
- /* we have a read oplock - need to store a data in the page cache */
- if (cap_unix(tcon->ses) &&
- ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) &&
- (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(
- tcon->fsUnixInfo.Capability)))
- written2 = generic_file_aio_write(iocb, iov, nr_segs, pos);
- else
- written2 = cifs_pagecache_writev(iocb, iov, nr_segs, pos,
- cache_ex);
- /* errors occured during writing - invalidate the page cache */
- if (written2 < 0) {
- rc = cifs_invalidate_mapping(inode);
- if (rc)
- written = (ssize_t)rc;
- else
- iocb->ki_pos = saved_pos;
+ if (written > 0 && cinode->clientCanCacheRead) {
+ /*
+ * Windows 7 server can delay breaking level2 oplock if a write
+ * request comes - break it on the client to prevent reading
+ * an old data.
+ */
+ cifs_invalidate_mapping(inode);
+ cFYI(1, "Set no oplock for inode=%p after a write operation",
+ inode);
+ cinode->clientCanCacheRead = false;
}
return written;
}
@@ -3577,6 +3581,13 @@ void cifs_oplock_break(struct work_struct *work)
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
int rc = 0;
+ if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
+ cifs_has_mand_locks(cinode)) {
+ cFYI(1, "Reset oplock to None for inode=%p due to mand locks",
+ inode);
+ cinode->clientCanCacheRead = false;
+ }
+
if (inode && S_ISREG(inode->i_mode)) {
if (cinode->clientCanCacheRead)
break_lease(inode, O_RDONLY);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index a5d234c8d5d..47bc5a87f94 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -53,6 +53,13 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf,
mutex_unlock(&server->srv_mutex);
return rc;
}
+
+ /*
+ * The response to this call was already factored into the sequence
+ * number when the call went out, so we must adjust it back downward
+ * after signing here.
+ */
+ --server->sequence_number;
rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
mutex_unlock(&server->srv_mutex);
@@ -952,4 +959,5 @@ struct smb_version_values smb1_values = {
.cap_unix = CAP_UNIX,
.cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND,
.cap_large_files = CAP_LARGE_FILES,
+ .oplock_read = OPLOCK_READ,
};
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d79de7bc443..c9c7aa7ed96 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -708,6 +708,7 @@ struct smb_version_values smb20_values = {
.cap_unix = 0,
.cap_nt_find = SMB2_NT_FIND,
.cap_large_files = SMB2_LARGE_FILES,
+ .oplock_read = SMB2_OPLOCK_LEVEL_II,
};
struct smb_version_values smb21_values = {
@@ -725,6 +726,7 @@ struct smb_version_values smb21_values = {
.cap_unix = 0,
.cap_nt_find = SMB2_NT_FIND,
.cap_large_files = SMB2_LARGE_FILES,
+ .oplock_read = SMB2_OPLOCK_LEVEL_II,
};
struct smb_version_values smb30_values = {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 76d974c952f..1a528680ec5 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -144,9 +144,6 @@ smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec,
*sent = 0;
- if (ssocket == NULL)
- return -ENOTSOCK; /* BB eventually add reconnect code here */
-
smb_msg.msg_name = (struct sockaddr *) &server->dstaddr;
smb_msg.msg_namelen = sizeof(struct sockaddr);
smb_msg.msg_control = NULL;
@@ -291,6 +288,9 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
struct socket *ssocket = server->ssocket;
int val = 1;
+ if (ssocket == NULL)
+ return -ENOTSOCK;
+
cFYI(1, "Sending smb: smb_len=%u", smb_buf_length);
dump_smb(iov[0].iov_base, iov[0].iov_len);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 153bb1e42e6..a5f12b7e228 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -176,7 +176,7 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
opts->uid = uid;
break;
case Opt_gid:
- if (match_octal(&args[0], &option))
+ if (match_int(&args[0], &option))
return -EINVAL;
gid = make_kgid(current_user_ns(), option);
if (!gid_valid(gid))
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index ea993128155..a7b0c2dfb3d 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1935,7 +1935,7 @@ static const unsigned char filename_rev_map[256] = {
* @src: Source location for the filename to encode
* @src_size: Size of the source in bytes
*/
-void ecryptfs_encode_for_filename(unsigned char *dst, size_t *dst_size,
+static void ecryptfs_encode_for_filename(unsigned char *dst, size_t *dst_size,
unsigned char *src, size_t src_size)
{
size_t num_blocks;
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 809e67d05ca..f1ea610362c 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -102,12 +102,12 @@ int __init ecryptfs_init_kthread(void)
void ecryptfs_destroy_kthread(void)
{
- struct ecryptfs_open_req *req;
+ struct ecryptfs_open_req *req, *tmp;
mutex_lock(&ecryptfs_kthread_ctl.mux);
ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
- list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list,
- kthread_ctl_list) {
+ list_for_each_entry_safe(req, tmp, &ecryptfs_kthread_ctl.req_list,
+ kthread_ctl_list) {
list_del(&req->kthread_ctl_list);
*req->lower_file = ERR_PTR(-EIO);
complete(&req->done);
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index bd1d57f98f7..564a1fa34b9 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -338,7 +338,8 @@ static int ecryptfs_write_begin(struct file *file,
if (prev_page_end_size
>= i_size_read(page->mapping->host)) {
zero_user(page, 0, PAGE_CACHE_SIZE);
- } else {
+ SetPageUptodate(page);
+ } else if (len < PAGE_CACHE_SIZE) {
rc = ecryptfs_decrypt_page(page);
if (rc) {
printk(KERN_ERR "%s: Error decrypting "
@@ -348,8 +349,8 @@ static int ecryptfs_write_begin(struct file *file,
ClearPageUptodate(page);
goto out;
}
+ SetPageUptodate(page);
}
- SetPageUptodate(page);
}
}
/* If creating a page or more of holes, zero them out via truncate.
@@ -499,6 +500,13 @@ static int ecryptfs_write_end(struct file *file,
}
goto out;
}
+ if (!PageUptodate(page)) {
+ if (copied < PAGE_CACHE_SIZE) {
+ rc = 0;
+ goto out;
+ }
+ SetPageUptodate(page);
+ }
/* Fills in zeros if 'to' goes beyond inode size */
rc = fill_zeros_to_end_of_page(page, to);
if (rc) {
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index be56b21435f..9fec1836057 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1313,7 +1313,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
* otherwise we might miss an event that happens between the
* f_op->poll() call and the new event set registering.
*/
- epi->event.events = event->events;
+ epi->event.events = event->events; /* need barrier below */
pt._key = event->events;
epi->event.data = event->data; /* protected by mtx */
if (epi->event.events & EPOLLWAKEUP) {
@@ -1324,6 +1324,26 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
}
/*
+ * The following barrier has two effects:
+ *
+ * 1) Flush epi changes above to other CPUs. This ensures
+ * we do not miss events from ep_poll_callback if an
+ * event occurs immediately after we call f_op->poll().
+ * We need this because we did not take ep->lock while
+ * changing epi above (but ep_poll_callback does take
+ * ep->lock).
+ *
+ * 2) We also need to ensure we do not miss _past_ events
+ * when calling f_op->poll(). This barrier also
+ * pairs with the barrier in wq_has_sleeper (see
+ * comments for wq_has_sleeper).
+ *
+ * This barrier will now guarantee ep_poll_callback or f_op->poll
+ * (or both) will notice the readiness of an item.
+ */
+ smp_mb();
+
+ /*
* Get current event bits. We can safely use the file* here because
* its usage count has been increased by the caller of this function.
*/
diff --git a/fs/exec.c b/fs/exec.c
index 18c45cac368..20df02c1cc7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -434,8 +434,9 @@ static int count(struct user_arg_ptr argv, int max)
if (IS_ERR(p))
return -EFAULT;
- if (i++ >= max)
+ if (i >= max)
return -E2BIG;
+ ++i;
if (fatal_signal_pending(current))
return -ERESTARTNOHAND;
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 0a475c88185..987358740cb 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -41,6 +41,7 @@ config EXT4_USE_FOR_EXT23
config EXT4_FS_POSIX_ACL
bool "Ext4 POSIX Access Control Lists"
+ depends on EXT4_FS
select FS_POSIX_ACL
help
POSIX Access Control Lists (ACLs) support permissions for users and
@@ -53,6 +54,7 @@ config EXT4_FS_POSIX_ACL
config EXT4_FS_SECURITY
bool "Ext4 Security Labels"
+ depends on EXT4_FS
help
Security labels support alternative access control models
implemented by security modules like SELinux. This option
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 26af22832a8..5ae1674ec12 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2226,13 +2226,14 @@ errout:
* removes index from the index block.
*/
static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path)
+ struct ext4_ext_path *path, int depth)
{
int err;
ext4_fsblk_t leaf;
/* free index block */
- path--;
+ depth--;
+ path = path + depth;
leaf = ext4_idx_pblock(path->p_idx);
if (unlikely(path->p_hdr->eh_entries == 0)) {
EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
@@ -2257,6 +2258,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
ext4_free_blocks(handle, inode, NULL, leaf, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
+
+ while (--depth >= 0) {
+ if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
+ break;
+ path--;
+ err = ext4_ext_get_access(handle, inode, path);
+ if (err)
+ break;
+ path->p_idx->ei_block = (path+1)->p_idx->ei_block;
+ err = ext4_ext_dirty(handle, inode, path);
+ if (err)
+ break;
+ }
return err;
}
@@ -2599,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
/* if this leaf is free, then we should
* remove it from index block above */
if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
- err = ext4_ext_rm_idx(handle, inode, path + depth);
+ err = ext4_ext_rm_idx(handle, inode, path, depth);
out:
return err;
@@ -2802,7 +2816,7 @@ again:
/* index is empty, remove it;
* handle must be already prepared by the
* truncatei_leaf() */
- err = ext4_ext_rm_idx(handle, inode, path + i);
+ err = ext4_ext_rm_idx(handle, inode, path, i);
}
/* root level has p_bh == NULL, brelse() eats this */
brelse(path[i].p_bh);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d07c27ca594..405565a6227 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -108,14 +108,6 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
/* Unaligned direct AIO must be serialized; see comment above */
if (unaligned_aio) {
- static unsigned long unaligned_warn_time;
-
- /* Warn about this once per day */
- if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
- ext4_msg(inode->i_sb, KERN_WARNING,
- "Unaligned AIO/DIO on inode %ld by %s; "
- "performance will be poor.",
- inode->i_ino, current->comm);
mutex_lock(ext4_aio_mutex(inode));
ext4_unwritten_wait(inode);
}
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index dfbc1fe9667..3278e64e57b 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -109,8 +109,6 @@ static int __sync_inode(struct inode *inode, int datasync)
*
* What we do is just kick off a commit and wait on it. This will snapshot the
* inode to disk.
- *
- * i_mutex lock is held when entering and exiting this function
*/
int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cb1c1ab2720..cbfe13bf5b2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2880,8 +2880,6 @@ static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offs
static void ext4_invalidatepage(struct page *page, unsigned long offset)
{
- journal_t *journal = EXT4_JOURNAL(page->mapping->host);
-
trace_ext4_invalidatepage(page, offset);
/*
@@ -2889,16 +2887,34 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
*/
if (ext4_should_dioread_nolock(page->mapping->host))
ext4_invalidatepage_free_endio(page, offset);
+
+ /* No journalling happens on data buffers when this function is used */
+ WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
+
+ block_invalidatepage(page, offset);
+}
+
+static int __ext4_journalled_invalidatepage(struct page *page,
+ unsigned long offset)
+{
+ journal_t *journal = EXT4_JOURNAL(page->mapping->host);
+
+ trace_ext4_journalled_invalidatepage(page, offset);
+
/*
* If it's a full truncate we just forget about the pending dirtying
*/
if (offset == 0)
ClearPageChecked(page);
- if (journal)
- jbd2_journal_invalidatepage(journal, page, offset);
- else
- block_invalidatepage(page, offset);
+ return jbd2_journal_invalidatepage(journal, page, offset);
+}
+
+/* Wrapper for aops... */
+static void ext4_journalled_invalidatepage(struct page *page,
+ unsigned long offset)
+{
+ WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
}
static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -3264,7 +3280,7 @@ static const struct address_space_operations ext4_journalled_aops = {
.write_end = ext4_journalled_write_end,
.set_page_dirty = ext4_journalled_set_page_dirty,
.bmap = ext4_bmap,
- .invalidatepage = ext4_invalidatepage,
+ .invalidatepage = ext4_journalled_invalidatepage,
.releasepage = ext4_releasepage,
.direct_IO = ext4_direct_IO,
.is_partially_uptodate = block_is_partially_uptodate,
@@ -4305,6 +4321,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
}
/*
+ * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
+ * buffers that are attached to a page stradding i_size and are undergoing
+ * commit. In that case we have to wait for commit to finish and try again.
+ */
+static void ext4_wait_for_tail_page_commit(struct inode *inode)
+{
+ struct page *page;
+ unsigned offset;
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+ tid_t commit_tid = 0;
+ int ret;
+
+ offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+ /*
+ * All buffers in the last page remain valid? Then there's nothing to
+ * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
+ * blocksize case
+ */
+ if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
+ return;
+ while (1) {
+ page = find_lock_page(inode->i_mapping,
+ inode->i_size >> PAGE_CACHE_SHIFT);
+ if (!page)
+ return;
+ ret = __ext4_journalled_invalidatepage(page, offset);
+ unlock_page(page);
+ page_cache_release(page);
+ if (ret != -EBUSY)
+ return;
+ commit_tid = 0;
+ read_lock(&journal->j_state_lock);
+ if (journal->j_committing_transaction)
+ commit_tid = journal->j_committing_transaction->t_tid;
+ read_unlock(&journal->j_state_lock);
+ if (commit_tid)
+ jbd2_log_wait_commit(journal, commit_tid);
+ }
+}
+
+/*
* ext4_setattr()
*
* Called from notify_change.
@@ -4417,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
}
if (attr->ia_valid & ATTR_SIZE) {
- if (attr->ia_size != i_size_read(inode)) {
- truncate_setsize(inode, attr->ia_size);
- /* Inode size will be reduced, wait for dio in flight.
- * Temporarily disable dioread_nolock to prevent
- * livelock. */
+ if (attr->ia_size != inode->i_size) {
+ loff_t oldsize = inode->i_size;
+
+ i_size_write(inode, attr->ia_size);
+ /*
+ * Blocks are going to be removed from the inode. Wait
+ * for dio in flight. Temporarily disable
+ * dioread_nolock to prevent livelock.
+ */
if (orphan) {
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
- ext4_inode_resume_unlocked_dio(inode);
+ if (!ext4_should_journal_data(inode)) {
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+ ext4_inode_resume_unlocked_dio(inode);
+ } else
+ ext4_wait_for_tail_page_commit(inode);
}
+ /*
+ * Truncate pagecache after we've waited for commit
+ * in data=journal mode to make pages freeable.
+ */
+ truncate_pagecache(inode, oldsize, inode->i_size);
}
ext4_truncate(inode);
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index cac44828233..f9ed946a448 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -722,7 +722,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
ext4_warning(dir->i_sb, "Node failed checksum");
brelse(bh);
*err = ERR_BAD_DX_DIR;
- goto fail;
+ goto fail2;
}
set_buffer_verified(bh);
@@ -2368,7 +2368,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
}
inode->i_size = EXT4_I(inode)->i_disksize = blocksize;
- dir_block = ext4_bread(handle, inode, 0, 1, &err);
if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) {
if (!err) {
err = -EIO;
@@ -2648,7 +2647,8 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
struct ext4_iloc iloc;
int err = 0;
- if (!EXT4_SB(inode->i_sb)->s_journal)
+ if ((!EXT4_SB(inode->i_sb)->s_journal) &&
+ !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS))
return 0;
mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3cdb0a2fc64..3d4fb81bacd 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1645,9 +1645,7 @@ static int parse_options(char *options, struct super_block *sb,
unsigned int *journal_ioprio,
int is_remount)
{
-#ifdef CONFIG_QUOTA
struct ext4_sb_info *sbi = EXT4_SB(sb);
-#endif
char *p;
substring_t args[MAX_OPT_ARGS];
int token;
@@ -1696,6 +1694,16 @@ static int parse_options(char *options, struct super_block *sb,
}
}
#endif
+ if (test_opt(sb, DIOREAD_NOLOCK)) {
+ int blocksize =
+ BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+
+ if (blocksize < PAGE_CACHE_SIZE) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "dioread_nolock if block size != PAGE_SIZE");
+ return 0;
+ }
+ }
return 1;
}
@@ -2212,7 +2220,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
__func__, inode->i_ino, inode->i_size);
jbd_debug(2, "truncating inode %lu to %lld bytes\n",
inode->i_ino, inode->i_size);
+ mutex_lock(&inode->i_mutex);
ext4_truncate(inode);
+ mutex_unlock(&inode->i_mutex);
nr_truncates++;
} else {
ext4_msg(sb, KERN_DEBUG,
@@ -3223,6 +3233,10 @@ int ext4_calculate_overhead(struct super_block *sb)
memset(buf, 0, PAGE_SIZE);
cond_resched();
}
+ /* Add the journal blocks as well */
+ if (sbi->s_journal)
+ overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen);
+
sbi->s_overhead = overhead;
smp_wmb();
free_page((unsigned long) buf);
@@ -3436,15 +3450,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
clear_opt(sb, DELALLOC);
}
- blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- if (blocksize < PAGE_SIZE) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "dioread_nolock if block size != PAGE_SIZE");
- goto failed_mount;
- }
- }
-
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3486,6 +3491,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
goto failed_mount;
+ blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (blocksize < EXT4_MIN_BLOCK_SIZE ||
blocksize > EXT4_MAX_BLOCK_SIZE) {
ext4_msg(sb, KERN_ERR,
@@ -4725,7 +4731,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
ext4_setup_system_zone(sb);
- if (sbi->s_journal == NULL)
+ if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
ext4_commit_super(sb, 1);
#ifdef CONFIG_QUOTA
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index fed74d193ff..137af4255da 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -82,7 +82,6 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
case ACL_GROUP_OBJ:
case ACL_MASK:
case ACL_OTHER:
- acl->a_entries[i].e_id = ACL_UNDEFINED_ID;
entry = (struct f2fs_acl_entry *)((char *)entry +
sizeof(struct f2fs_acl_entry_short));
break;
@@ -192,15 +191,14 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
retval = f2fs_getxattr(inode, name_index, "", value, retval);
}
- if (retval < 0) {
- if (retval == -ENODATA)
- acl = NULL;
- else
- acl = ERR_PTR(retval);
- } else {
+ if (retval > 0)
acl = f2fs_acl_from_disk(value, retval);
- }
+ else if (retval == -ENODATA)
+ acl = NULL;
+ else
+ acl = ERR_PTR(retval);
kfree(value);
+
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6ef36c37e2b..ff3c8439af8 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -214,7 +214,6 @@ retry:
goto retry;
}
new->ino = ino;
- INIT_LIST_HEAD(&new->list);
/* add new_oentry into list which is sorted by inode number */
if (orphan) {
@@ -772,7 +771,7 @@ void init_orphan_info(struct f2fs_sb_info *sbi)
sbi->n_orphans = 0;
}
-int create_checkpoint_caches(void)
+int __init create_checkpoint_caches(void)
{
orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
sizeof(struct orphan_inode_entry), NULL);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 655aeabc1dd..7bd22a20112 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -16,6 +16,7 @@
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
+#include <linux/prefetch.h>
#include "f2fs.h"
#include "node.h"
@@ -546,6 +547,15 @@ redirty_out:
#define MAX_DESIRED_PAGES_WP 4096
+static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
+ void *data)
+{
+ struct address_space *mapping = data;
+ int ret = mapping->a_ops->writepage(page, wbc);
+ mapping_set_error(mapping, ret);
+ return ret;
+}
+
static int f2fs_write_data_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
@@ -562,7 +572,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
if (!S_ISDIR(inode->i_mode))
mutex_lock(&sbi->writepages);
- ret = generic_writepages(mapping, wbc);
+ ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
if (!S_ISDIR(inode->i_mode))
mutex_unlock(&sbi->writepages);
f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL));
@@ -688,6 +698,11 @@ static int f2fs_set_data_page_dirty(struct page *page)
return 0;
}
+static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
+{
+ return generic_block_bmap(mapping, block, get_data_block_ro);
+}
+
const struct address_space_operations f2fs_dblock_aops = {
.readpage = f2fs_read_data_page,
.readpages = f2fs_read_data_pages,
@@ -699,4 +714,5 @@ const struct address_space_operations f2fs_dblock_aops = {
.invalidatepage = f2fs_invalidate_data_page,
.releasepage = f2fs_release_data_page,
.direct_IO = f2fs_direct_IO,
+ .bmap = f2fs_bmap,
};
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 0e0380a588a..c8c37307b32 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -26,6 +26,7 @@
static LIST_HEAD(f2fs_stat_list);
static struct dentry *debugfs_root;
+static DEFINE_MUTEX(f2fs_stat_mutex);
static void update_general_status(struct f2fs_sb_info *sbi)
{
@@ -180,18 +181,14 @@ static int stat_show(struct seq_file *s, void *v)
int i = 0;
int j;
+ mutex_lock(&f2fs_stat_mutex);
list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) {
- mutex_lock(&si->stat_lock);
- if (!si->sbi) {
- mutex_unlock(&si->stat_lock);
- continue;
- }
update_general_status(si->sbi);
seq_printf(s, "\n=====[ partition info. #%d ]=====\n", i++);
- seq_printf(s, "[SB: 1] [CP: 2] [NAT: %d] [SIT: %d] ",
- si->nat_area_segs, si->sit_area_segs);
+ seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
+ si->sit_area_segs, si->nat_area_segs);
seq_printf(s, "[SSA: %d] [MAIN: %d",
si->ssa_area_segs, si->main_area_segs);
seq_printf(s, "(OverProv:%d Resv:%d)]\n\n",
@@ -286,8 +283,8 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n",
(si->base_mem + si->cache_mem) >> 10,
si->base_mem >> 10, si->cache_mem >> 10);
- mutex_unlock(&si->stat_lock);
}
+ mutex_unlock(&f2fs_stat_mutex);
return 0;
}
@@ -303,7 +300,7 @@ static const struct file_operations stat_fops = {
.release = single_release,
};
-static int init_stats(struct f2fs_sb_info *sbi)
+int f2fs_build_stats(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_stat_info *si;
@@ -313,9 +310,6 @@ static int init_stats(struct f2fs_sb_info *sbi)
return -ENOMEM;
si = sbi->stat_info;
- mutex_init(&si->stat_lock);
- list_add_tail(&si->stat_list, &f2fs_stat_list);
-
si->all_area_segs = le32_to_cpu(raw_super->segment_count);
si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit);
si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat);
@@ -325,21 +319,11 @@ static int init_stats(struct f2fs_sb_info *sbi)
si->main_area_zones = si->main_area_sections /
le32_to_cpu(raw_super->secs_per_zone);
si->sbi = sbi;
- return 0;
-}
-int f2fs_build_stats(struct f2fs_sb_info *sbi)
-{
- int retval;
-
- retval = init_stats(sbi);
- if (retval)
- return retval;
-
- if (!debugfs_root)
- debugfs_root = debugfs_create_dir("f2fs", NULL);
+ mutex_lock(&f2fs_stat_mutex);
+ list_add_tail(&si->stat_list, &f2fs_stat_list);
+ mutex_unlock(&f2fs_stat_mutex);
- debugfs_create_file("status", S_IRUGO, debugfs_root, NULL, &stat_fops);
return 0;
}
@@ -347,14 +331,22 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = sbi->stat_info;
+ mutex_lock(&f2fs_stat_mutex);
list_del(&si->stat_list);
- mutex_lock(&si->stat_lock);
- si->sbi = NULL;
- mutex_unlock(&si->stat_lock);
+ mutex_unlock(&f2fs_stat_mutex);
+
kfree(sbi->stat_info);
}
-void destroy_root_stats(void)
+void __init f2fs_create_root_stats(void)
+{
+ debugfs_root = debugfs_create_dir("f2fs", NULL);
+ if (debugfs_root)
+ debugfs_create_file("status", S_IRUGO, debugfs_root,
+ NULL, &stat_fops);
+}
+
+void f2fs_destroy_root_stats(void)
{
debugfs_remove_recursive(debugfs_root);
debugfs_root = NULL;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b4e24f32b54..989980e16d0 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -11,6 +11,7 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
+#include "node.h"
#include "acl.h"
static unsigned long dir_blocks(struct inode *inode)
@@ -74,7 +75,7 @@ static unsigned long dir_block_index(unsigned int level, unsigned int idx)
return bidx;
}
-static bool early_match_name(const char *name, int namelen,
+static bool early_match_name(const char *name, size_t namelen,
f2fs_hash_t namehash, struct f2fs_dir_entry *de)
{
if (le16_to_cpu(de->name_len) != namelen)
@@ -87,7 +88,7 @@ static bool early_match_name(const char *name, int namelen,
}
static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
- const char *name, int namelen, int *max_slots,
+ const char *name, size_t namelen, int *max_slots,
f2fs_hash_t namehash, struct page **res_page)
{
struct f2fs_dir_entry *de;
@@ -126,7 +127,7 @@ found:
}
static struct f2fs_dir_entry *find_in_level(struct inode *dir,
- unsigned int level, const char *name, int namelen,
+ unsigned int level, const char *name, size_t namelen,
f2fs_hash_t namehash, struct page **res_page)
{
int s = GET_DENTRY_SLOTS(namelen);
@@ -181,7 +182,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
struct qstr *child, struct page **res_page)
{
const char *name = child->name;
- int namelen = child->len;
+ size_t namelen = child->len;
unsigned long npages = dir_blocks(dir);
struct f2fs_dir_entry *de = NULL;
f2fs_hash_t name_hash;
@@ -308,6 +309,7 @@ static int init_inode_metadata(struct inode *inode, struct dentry *dentry)
ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
+ set_cold_node(inode, ipage);
init_dent_inode(dentry, ipage);
f2fs_put_page(ipage, 1);
}
@@ -381,7 +383,7 @@ int f2fs_add_link(struct dentry *dentry, struct inode *inode)
struct inode *dir = dentry->d_parent->d_inode;
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
const char *name = dentry->d_name.name;
- int namelen = dentry->d_name.len;
+ size_t namelen = dentry->d_name.len;
struct page *dentry_page = NULL;
struct f2fs_dentry_block *dentry_blk = NULL;
int slots = GET_DENTRY_SLOTS(namelen);
@@ -501,7 +503,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
}
if (inode) {
- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ inode->i_ctime = CURRENT_TIME;
drop_nlink(inode);
if (S_ISDIR(inode->i_mode)) {
drop_nlink(inode);
@@ -540,13 +542,13 @@ int f2fs_make_empty(struct inode *inode, struct inode *parent)
de = &dentry_blk->dentry[0];
de->name_len = cpu_to_le16(1);
- de->hash_code = 0;
+ de->hash_code = f2fs_dentry_hash(".", 1);
de->ino = cpu_to_le32(inode->i_ino);
memcpy(dentry_blk->filename[0], ".", 1);
set_de_type(de, inode);
de = &dentry_blk->dentry[1];
- de->hash_code = 0;
+ de->hash_code = f2fs_dentry_hash("..", 2);
de->name_len = cpu_to_le16(2);
de->ino = cpu_to_le32(parent->i_ino);
memcpy(dentry_blk->filename[1], "..", 2);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a18d63db2fb..c8e2d751ef9 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -211,11 +211,11 @@ struct dnode_of_data {
static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
struct page *ipage, struct page *npage, nid_t nid)
{
+ memset(dn, 0, sizeof(*dn));
dn->inode = inode;
dn->inode_page = ipage;
dn->node_page = npage;
dn->nid = nid;
- dn->inode_page_locked = 0;
}
/*
@@ -877,11 +877,13 @@ bool f2fs_empty_dir(struct inode *);
* super.c
*/
int f2fs_sync_fs(struct super_block *, int);
+extern __printf(3, 4)
+void f2fs_msg(struct super_block *, const char *, const char *, ...);
/*
* hash.c
*/
-f2fs_hash_t f2fs_dentry_hash(const char *, int);
+f2fs_hash_t f2fs_dentry_hash(const char *, size_t);
/*
* node.c
@@ -912,7 +914,7 @@ int restore_node_summary(struct f2fs_sb_info *, unsigned int,
void flush_nat_entries(struct f2fs_sb_info *);
int build_node_manager(struct f2fs_sb_info *);
void destroy_node_manager(struct f2fs_sb_info *);
-int create_node_manager_caches(void);
+int __init create_node_manager_caches(void);
void destroy_node_manager_caches(void);
/*
@@ -964,7 +966,7 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *);
void block_operations(struct f2fs_sb_info *);
void write_checkpoint(struct f2fs_sb_info *, bool, bool);
void init_orphan_info(struct f2fs_sb_info *);
-int create_checkpoint_caches(void);
+int __init create_checkpoint_caches(void);
void destroy_checkpoint_caches(void);
/*
@@ -984,9 +986,9 @@ int do_write_data_page(struct page *);
int start_gc_thread(struct f2fs_sb_info *);
void stop_gc_thread(struct f2fs_sb_info *);
block_t start_bidx_of_node(unsigned int);
-int f2fs_gc(struct f2fs_sb_info *, int);
+int f2fs_gc(struct f2fs_sb_info *);
void build_gc_manager(struct f2fs_sb_info *);
-int create_gc_caches(void);
+int __init create_gc_caches(void);
void destroy_gc_caches(void);
/*
@@ -1058,7 +1060,8 @@ struct f2fs_stat_info {
int f2fs_build_stats(struct f2fs_sb_info *);
void f2fs_destroy_stats(struct f2fs_sb_info *);
-void destroy_root_stats(void);
+void __init f2fs_create_root_stats(void);
+void f2fs_destroy_root_stats(void);
#else
#define stat_inc_call_count(si)
#define stat_inc_seg_count(si, type)
@@ -1068,7 +1071,8 @@ void destroy_root_stats(void);
static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
-static inline void destroy_root_stats(void) { }
+static inline void __init f2fs_create_root_stats(void) { }
+static inline void f2fs_destroy_root_stats(void) { }
#endif
extern const struct file_operations f2fs_dir_operations;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f9e085dfb1f..3191b52aafb 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -96,8 +96,9 @@ out:
}
static const struct vm_operations_struct f2fs_file_vm_ops = {
- .fault = filemap_fault,
- .page_mkwrite = f2fs_vm_page_mkwrite,
+ .fault = filemap_fault,
+ .page_mkwrite = f2fs_vm_page_mkwrite,
+ .remap_pages = generic_file_remap_pages,
};
static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode)
@@ -137,6 +138,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (ret)
return ret;
+ /* guarantee free sections for fsync */
+ f2fs_balance_fs(sbi);
+
mutex_lock(&inode->i_mutex);
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
@@ -160,15 +164,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (need_to_sync_dir(sbi, inode))
need_cp = true;
- f2fs_write_inode(inode, NULL);
-
if (need_cp) {
/* all the dirty node pages should be flushed for POR */
ret = f2fs_sync_fs(inode->i_sb, 1);
clear_inode_flag(F2FS_I(inode), FI_NEED_CP);
} else {
- while (sync_node_pages(sbi, inode->i_ino, &wbc) == 0)
- f2fs_write_inode(inode, NULL);
+ /* if there is no written node page, write its inode page */
+ while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
+ ret = f2fs_write_inode(inode, NULL);
+ if (ret)
+ goto out;
+ }
filemap_fdatawait_range(sbi->node_inode->i_mapping,
0, LONG_MAX);
}
@@ -405,6 +411,8 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
struct dnode_of_data dn;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ f2fs_balance_fs(sbi);
+
mutex_lock_op(sbi, DATA_TRUNC);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, index, RDONLY_NODE);
@@ -532,7 +540,6 @@ static long f2fs_fallocate(struct file *file, int mode,
loff_t offset, loff_t len)
{
struct inode *inode = file->f_path.dentry->d_inode;
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
long ret;
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -543,7 +550,10 @@ static long f2fs_fallocate(struct file *file, int mode,
else
ret = expand_inode_data(inode, offset, len, mode);
- f2fs_balance_fs(sbi);
+ if (!ret) {
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(inode);
+ }
return ret;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 644aa380827..c386910dacc 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -78,7 +78,7 @@ static int gc_thread_func(void *data)
sbi->bg_gc++;
- if (f2fs_gc(sbi, 1) == GC_NONE)
+ if (f2fs_gc(sbi) == GC_NONE)
wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
wait_ms = GC_THREAD_MAX_SLEEP_TIME;
@@ -390,9 +390,7 @@ next_step:
}
err = check_valid_map(sbi, segno, off);
- if (err == GC_ERROR)
- return err;
- else if (err == GC_NEXT)
+ if (err == GC_NEXT)
continue;
if (initial) {
@@ -426,32 +424,30 @@ next_step:
}
/*
- * Calculate start block index that this node page contains
+ * Calculate start block index indicating the given node offset.
+ * Be careful, caller should give this node offset only indicating direct node
+ * blocks. If any node offsets, which point the other types of node blocks such
+ * as indirect or double indirect node blocks, are given, it must be a caller's
+ * bug.
*/
block_t start_bidx_of_node(unsigned int node_ofs)
{
- block_t start_bidx;
- unsigned int bidx, indirect_blks;
- int dec;
+ unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
+ unsigned int bidx;
- indirect_blks = 2 * NIDS_PER_BLOCK + 4;
+ if (node_ofs == 0)
+ return 0;
- start_bidx = 1;
- if (node_ofs == 0) {
- start_bidx = 0;
- } else if (node_ofs <= 2) {
+ if (node_ofs <= 2) {
bidx = node_ofs - 1;
} else if (node_ofs <= indirect_blks) {
- dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
+ int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
bidx = node_ofs - 2 - dec;
} else {
- dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
+ int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
bidx = node_ofs - 5 - dec;
}
-
- if (start_bidx)
- start_bidx = bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
- return start_bidx;
+ return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
}
static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -556,9 +552,7 @@ next_step:
}
err = check_valid_map(sbi, segno, off);
- if (err == GC_ERROR)
- goto stop;
- else if (err == GC_NEXT)
+ if (err == GC_NEXT)
continue;
if (phase == 0) {
@@ -568,9 +562,7 @@ next_step:
/* Get an inode by ino with checking validity */
err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs);
- if (err == GC_ERROR)
- goto stop;
- else if (err == GC_NEXT)
+ if (err == GC_NEXT)
continue;
if (phase == 1) {
@@ -663,62 +655,44 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
return ret;
}
-int f2fs_gc(struct f2fs_sb_info *sbi, int nGC)
+int f2fs_gc(struct f2fs_sb_info *sbi)
{
- unsigned int segno;
- int old_free_secs, cur_free_secs;
- int gc_status, nfree;
struct list_head ilist;
+ unsigned int segno, i;
int gc_type = BG_GC;
+ int gc_status = GC_NONE;
INIT_LIST_HEAD(&ilist);
gc_more:
- nfree = 0;
- gc_status = GC_NONE;
+ if (!(sbi->sb->s_flags & MS_ACTIVE))
+ goto stop;
if (has_not_enough_free_secs(sbi))
- old_free_secs = reserved_sections(sbi);
- else
- old_free_secs = free_sections(sbi);
-
- while (sbi->sb->s_flags & MS_ACTIVE) {
- int i;
- if (has_not_enough_free_secs(sbi))
- gc_type = FG_GC;
+ gc_type = FG_GC;
- cur_free_secs = free_sections(sbi) + nfree;
+ if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
+ goto stop;
- /* We got free space successfully. */
- if (nGC < cur_free_secs - old_free_secs)
- break;
-
- if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
+ for (i = 0; i < sbi->segs_per_sec; i++) {
+ /*
+ * do_garbage_collect will give us three gc_status:
+ * GC_ERROR, GC_DONE, and GC_BLOCKED.
+ * If GC is finished uncleanly, we have to return
+ * the victim to dirty segment list.
+ */
+ gc_status = do_garbage_collect(sbi, segno + i, &ilist, gc_type);
+ if (gc_status != GC_DONE)
break;
-
- for (i = 0; i < sbi->segs_per_sec; i++) {
- /*
- * do_garbage_collect will give us three gc_status:
- * GC_ERROR, GC_DONE, and GC_BLOCKED.
- * If GC is finished uncleanly, we have to return
- * the victim to dirty segment list.
- */
- gc_status = do_garbage_collect(sbi, segno + i,
- &ilist, gc_type);
- if (gc_status != GC_DONE)
- goto stop;
- nfree++;
- }
}
-stop:
- if (has_not_enough_free_secs(sbi) || gc_status == GC_BLOCKED) {
+ if (has_not_enough_free_secs(sbi)) {
write_checkpoint(sbi, (gc_status == GC_BLOCKED), false);
- if (nfree)
+ if (has_not_enough_free_secs(sbi))
goto gc_more;
}
+stop:
mutex_unlock(&sbi->gc_mutex);
put_gc_inode(&ilist);
- BUG_ON(!list_empty(&ilist));
return gc_status;
}
@@ -727,7 +701,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
DIRTY_I(sbi)->v_ops = &default_v_ops;
}
-int create_gc_caches(void)
+int __init create_gc_caches(void)
{
winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
sizeof(struct inode_entry), NULL);
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index a60f04200f8..6eb8d269b53 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -42,7 +42,7 @@ static void TEA_transform(unsigned int buf[4], unsigned int const in[])
buf[1] += b1;
}
-static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num)
+static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)
{
unsigned pad, val;
int i;
@@ -69,13 +69,17 @@ static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num)
*buf++ = pad;
}
-f2fs_hash_t f2fs_dentry_hash(const char *name, int len)
+f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len)
{
- __u32 hash, minor_hash;
+ __u32 hash;
f2fs_hash_t f2fs_hash;
const char *p;
__u32 in[8], buf[4];
+ if ((len <= 2) && (name[0] == '.') &&
+ (name[1] == '.' || name[1] == '\0'))
+ return 0;
+
/* Initialize the default seed for the hash checksum functions */
buf[0] = 0x67452301;
buf[1] = 0xefcdab89;
@@ -83,15 +87,15 @@ f2fs_hash_t f2fs_dentry_hash(const char *name, int len)
buf[3] = 0x10325476;
p = name;
- while (len > 0) {
+ while (1) {
str2hashbuf(p, len, in, 4);
TEA_transform(buf, in);
- len -= 16;
p += 16;
+ if (len <= 16)
+ break;
+ len -= 16;
}
hash = buf[0];
- minor_hash = buf[1];
-
f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
return f2fs_hash;
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index df5fb381ebf..79424177732 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -203,6 +203,7 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
ri->i_generation = cpu_to_le32(inode->i_generation);
+ set_cold_node(inode, node_page);
set_page_dirty(node_page);
}
@@ -216,6 +217,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
inode->i_ino == F2FS_META_INO(sbi))
return 0;
+ if (wbc)
+ f2fs_balance_fs(sbi);
+
node_page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page))
return PTR_ERR(node_page);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 89b7675dc37..1a49b881bac 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -77,8 +77,8 @@ fail:
static int is_multimedia_file(const unsigned char *s, const char *sub)
{
- int slen = strlen(s);
- int sublen = strlen(sub);
+ size_t slen = strlen(s);
+ size_t sublen = strlen(sub);
int ret;
if (sublen > slen)
@@ -123,6 +123,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
nid_t ino = 0;
int err;
+ f2fs_balance_fs(sbi);
+
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -144,8 +146,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
if (!sbi->por_doing)
d_instantiate(dentry, inode);
unlock_new_inode(inode);
-
- f2fs_balance_fs(sbi);
return 0;
out:
clear_nlink(inode);
@@ -163,6 +163,8 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
int err;
+ f2fs_balance_fs(sbi);
+
inode->i_ctime = CURRENT_TIME;
atomic_inc(&inode->i_count);
@@ -172,8 +174,6 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
goto out;
d_instantiate(dentry, inode);
-
- f2fs_balance_fs(sbi);
return 0;
out:
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
@@ -223,6 +223,8 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
struct page *page;
int err = -ENOENT;
+ f2fs_balance_fs(sbi);
+
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de)
goto fail;
@@ -238,7 +240,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
/* In order to evict this inode, we set it dirty */
mark_inode_dirty(inode);
- f2fs_balance_fs(sbi);
fail:
return err;
}
@@ -249,9 +250,11 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
struct super_block *sb = dir->i_sb;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
- unsigned symlen = strlen(symname) + 1;
+ size_t symlen = strlen(symname) + 1;
int err;
+ f2fs_balance_fs(sbi);
+
inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -268,9 +271,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
d_instantiate(dentry, inode);
unlock_new_inode(inode);
-
- f2fs_balance_fs(sbi);
-
return err;
out:
clear_nlink(inode);
@@ -286,6 +286,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
int err;
+ f2fs_balance_fs(sbi);
+
inode = f2fs_new_inode(dir, S_IFDIR | mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -305,7 +307,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
d_instantiate(dentry, inode);
unlock_new_inode(inode);
- f2fs_balance_fs(sbi);
return 0;
out_fail:
@@ -336,6 +337,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
if (!new_valid_dev(rdev))
return -EINVAL;
+ f2fs_balance_fs(sbi);
+
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -350,9 +353,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
alloc_nid_done(sbi, inode->i_ino);
d_instantiate(dentry, inode);
unlock_new_inode(inode);
-
- f2fs_balance_fs(sbi);
-
return 0;
out:
clear_nlink(inode);
@@ -376,6 +376,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct f2fs_dir_entry *new_entry;
int err = -ENOENT;
+ f2fs_balance_fs(sbi);
+
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry)
goto out;
@@ -441,8 +443,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
mutex_unlock_op(sbi, RENAME);
-
- f2fs_balance_fs(sbi);
return 0;
out_dir:
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 19870361497..9bda63c9c16 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -484,12 +484,14 @@ static void truncate_node(struct dnode_of_data *dn)
struct node_info ni;
get_node_info(sbi, dn->nid, &ni);
+ if (dn->inode->i_blocks == 0) {
+ BUG_ON(ni.blk_addr != NULL_ADDR);
+ goto invalidate;
+ }
BUG_ON(ni.blk_addr == NULL_ADDR);
- if (ni.blk_addr != NULL_ADDR)
- invalidate_blocks(sbi, ni.blk_addr);
-
/* Deallocate node address */
+ invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, dn->inode, 1);
set_node_addr(sbi, &ni, NULL_ADDR);
@@ -499,7 +501,7 @@ static void truncate_node(struct dnode_of_data *dn)
} else {
sync_inode_page(dn);
}
-
+invalidate:
clear_node_page_dirty(dn->node_page);
F2FS_SET_SB_DIRT(sbi);
@@ -768,20 +770,12 @@ int remove_inode_page(struct inode *inode)
dn.inode_page_locked = 1;
truncate_node(&dn);
}
- if (inode->i_blocks == 1) {
- /* inernally call f2fs_put_page() */
- set_new_dnode(&dn, inode, page, page, ino);
- truncate_node(&dn);
- } else if (inode->i_blocks == 0) {
- struct node_info ni;
- get_node_info(sbi, inode->i_ino, &ni);
- /* called after f2fs_new_inode() is failed */
- BUG_ON(ni.blk_addr != NULL_ADDR);
- f2fs_put_page(page, 1);
- } else {
- BUG();
- }
+ /* 0 is possible, after f2fs_new_inode() is failed */
+ BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1);
+ set_new_dnode(&dn, inode, page, page, ino);
+ truncate_node(&dn);
+
mutex_unlock_op(sbi, NODE_TRUNC);
return 0;
}
@@ -834,17 +828,18 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
goto fail;
}
set_node_addr(sbi, &new_ni, NEW_ADDR);
+ set_cold_node(dn->inode, page);
dn->node_page = page;
sync_inode_page(dn);
set_page_dirty(page);
- set_cold_node(dn->inode, page);
if (ofs == 0)
inc_valid_inode_count(sbi);
return page;
fail:
+ clear_node_page_dirty(page);
f2fs_put_page(page, 1);
return ERR_PTR(err);
}
@@ -1093,7 +1088,6 @@ static int f2fs_write_node_page(struct page *page,
{
struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
nid_t nid;
- unsigned int nofs;
block_t new_addr;
struct node_info ni;
@@ -1110,7 +1104,6 @@ static int f2fs_write_node_page(struct page *page,
/* get old block addr of this node page */
nid = nid_of_node(page);
- nofs = ofs_of_node(page);
BUG_ON(page->index != nid);
get_node_info(sbi, nid, &ni);
@@ -1131,6 +1124,12 @@ static int f2fs_write_node_page(struct page *page,
return 0;
}
+/*
+ * It is very important to gather dirty pages and write at once, so that we can
+ * submit a big bio without interfering other data writes.
+ * Be default, 512 pages (2MB), a segment size, is quite reasonable.
+ */
+#define COLLECT_DIRTY_NODES 512
static int f2fs_write_node_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
@@ -1138,17 +1137,16 @@ static int f2fs_write_node_pages(struct address_space *mapping,
struct block_device *bdev = sbi->sb->s_bdev;
long nr_to_write = wbc->nr_to_write;
- if (wbc->for_kupdate)
- return 0;
-
- if (get_pages(sbi, F2FS_DIRTY_NODES) == 0)
- return 0;
-
+ /* First check balancing cached NAT entries */
if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
write_checkpoint(sbi, false, false);
return 0;
}
+ /* collect a number of dirty node pages and write together */
+ if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES)
+ return 0;
+
/* if mounting is failed, skip writing node pages */
wbc->nr_to_write = bio_get_nr_vecs(bdev);
sync_node_pages(sbi, 0, wbc);
@@ -1571,7 +1569,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
nid_t nid;
struct f2fs_nat_entry raw_ne;
int offset = -1;
- block_t old_blkaddr, new_blkaddr;
+ block_t new_blkaddr;
ne = list_entry(cur, struct nat_entry, list);
nid = nat_get_nid(ne);
@@ -1585,7 +1583,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
if (offset >= 0) {
raw_ne = nat_in_journal(sum, offset);
- old_blkaddr = le32_to_cpu(raw_ne.block_addr);
goto flush_now;
}
to_nat_page:
@@ -1607,7 +1604,6 @@ to_nat_page:
BUG_ON(!nat_blk);
raw_ne = nat_blk->entries[nid - start_nid];
- old_blkaddr = le32_to_cpu(raw_ne.block_addr);
flush_now:
new_blkaddr = nat_get_blkaddr(ne);
@@ -1741,7 +1737,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
kfree(nm_i);
}
-int create_node_manager_caches(void)
+int __init create_node_manager_caches(void)
{
nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
sizeof(struct nat_entry), NULL);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index b07e9b6ef37..f42e4060b39 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -67,7 +67,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
kunmap(page);
f2fs_put_page(page, 0);
} else {
- f2fs_add_link(&dent, inode);
+ err = f2fs_add_link(&dent, inode);
}
iput(dir);
out:
@@ -144,14 +144,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
goto out;
}
- INIT_LIST_HEAD(&entry->list);
- list_add_tail(&entry->list, head);
-
entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
if (IS_ERR(entry->inode)) {
err = PTR_ERR(entry->inode);
+ kmem_cache_free(fsync_entry_slab, entry);
goto out;
}
+
+ list_add_tail(&entry->list, head);
entry->blkaddr = blkaddr;
}
if (IS_INODE(page)) {
@@ -173,10 +173,9 @@ out:
static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi,
struct list_head *head)
{
- struct list_head *this;
- struct fsync_inode_entry *entry;
- list_for_each(this, head) {
- entry = list_entry(this, struct fsync_inode_entry, list);
+ struct fsync_inode_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, head, list) {
iput(entry->inode);
list_del(&entry->list);
kmem_cache_free(fsync_entry_slab, entry);
@@ -228,6 +227,9 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
/* Deallocate previous index in the node page */
inode = f2fs_iget_nowait(sbi->sb, ino);
+ if (IS_ERR(inode))
+ return;
+
truncate_hole(inode, bidx, bidx + 1);
iput(inode);
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 1b26e4ea101..4b009906658 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -12,57 +12,26 @@
#include <linux/f2fs_fs.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/prefetch.h>
#include <linux/vmalloc.h>
#include "f2fs.h"
#include "segment.h"
#include "node.h"
-static int need_to_flush(struct f2fs_sb_info *sbi)
-{
- unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) *
- sbi->segs_per_sec;
- int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
- >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
- int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
- >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
-
- if (sbi->por_doing)
- return 0;
-
- if (free_sections(sbi) <= (node_secs + 2 * dent_secs +
- reserved_sections(sbi)))
- return 1;
- return 0;
-}
-
/*
* This function balances dirty node and dentry pages.
* In addition, it controls garbage collection.
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi)
{
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = LONG_MAX,
- .for_reclaim = 0,
- };
-
- if (sbi->por_doing)
- return;
-
/*
- * We should do checkpoint when there are so many dirty node pages
- * with enough free segments. After then, we should do GC.
+ * We should do GC or end up with checkpoint, if there are so many dirty
+ * dir/node pages without enough free segments.
*/
- if (need_to_flush(sbi)) {
- sync_dirty_dir_inodes(sbi);
- sync_node_pages(sbi, 0, &wbc);
- }
-
if (has_not_enough_free_secs(sbi)) {
mutex_lock(&sbi->gc_mutex);
- f2fs_gc(sbi, 1);
+ f2fs_gc(sbi);
}
}
@@ -631,7 +600,6 @@ static void f2fs_end_io_write(struct bio *bio, int err)
if (page->mapping)
set_bit(AS_EIO, &page->mapping->flags);
set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
- set_page_dirty(page);
}
end_page_writeback(page);
dec_page_count(p->sbi, F2FS_WRITEBACK);
@@ -791,11 +759,10 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
return __get_segment_type_2(page, p_type);
case 4:
return __get_segment_type_4(page, p_type);
- case 6:
- return __get_segment_type_6(page, p_type);
- default:
- BUG();
}
+ /* NR_CURSEG_TYPE(6) logs by default */
+ BUG_ON(sbi->active_logs != NR_CURSEG_TYPE);
+ return __get_segment_type_6(page, p_type);
}
static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -1608,7 +1575,6 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_DIRTY_TYPE; i++) {
dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
- dirty_i->nr_dirty[i] = 0;
if (!dirty_i->dirty_segmap[i])
return -ENOMEM;
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 0948405af6f..66a288a52fd 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -459,7 +459,20 @@ static inline int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi)
{
- return free_sections(sbi) <= reserved_sections(sbi);
+ unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) *
+ sbi->segs_per_sec;
+ int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
+ >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
+ int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
+ >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
+
+ if (sbi->por_doing)
+ return false;
+
+ if (free_sections(sbi) <= (node_secs + 2 * dent_secs +
+ reserved_sections(sbi)))
+ return true;
+ return false;
}
static inline int utilization(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 13867322cf5..37fad04c866 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -53,6 +53,18 @@ static match_table_t f2fs_tokens = {
{Opt_err, NULL},
};
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
+ va_end(args);
+}
+
static void init_once(void *foo)
{
struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
@@ -119,15 +131,16 @@ static void f2fs_put_super(struct super_block *sb)
int f2fs_sync_fs(struct super_block *sb, int sync)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
- int ret = 0;
if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
return 0;
if (sync)
write_checkpoint(sbi, false, false);
+ else
+ f2fs_balance_fs(sbi);
- return ret;
+ return 0;
}
static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -148,8 +161,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
buf->f_bavail = user_block_count - valid_user_blocks(sbi);
- buf->f_files = valid_inode_count(sbi);
- buf->f_ffree = sbi->total_node_count - valid_node_count(sbi);
+ buf->f_files = sbi->total_node_count;
+ buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi);
buf->f_namelen = F2FS_MAX_NAME_LEN;
buf->f_fsid.val[0] = (u32)id;
@@ -248,7 +261,8 @@ static const struct export_operations f2fs_export_ops = {
.get_parent = f2fs_get_parent,
};
-static int parse_options(struct f2fs_sb_info *sbi, char *options)
+static int parse_options(struct super_block *sb, struct f2fs_sb_info *sbi,
+ char *options)
{
substring_t args[MAX_OPT_ARGS];
char *p;
@@ -287,7 +301,8 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
break;
#else
case Opt_nouser_xattr:
- pr_info("nouser_xattr options not supported\n");
+ f2fs_msg(sb, KERN_INFO,
+ "nouser_xattr options not supported");
break;
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
@@ -296,13 +311,13 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
break;
#else
case Opt_noacl:
- pr_info("noacl options not supported\n");
+ f2fs_msg(sb, KERN_INFO, "noacl options not supported");
break;
#endif
case Opt_active_logs:
if (args->from && match_int(args, &arg))
return -EINVAL;
- if (arg != 2 && arg != 4 && arg != 6)
+ if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
return -EINVAL;
sbi->active_logs = arg;
break;
@@ -310,8 +325,9 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
set_opt(sbi, DISABLE_EXT_IDENTIFY);
break;
default:
- pr_err("Unrecognized mount option \"%s\" or missing value\n",
- p);
+ f2fs_msg(sb, KERN_ERR,
+ "Unrecognized mount option \"%s\" or missing value",
+ p);
return -EINVAL;
}
}
@@ -338,23 +354,36 @@ static loff_t max_file_size(unsigned bits)
return result;
}
-static int sanity_check_raw_super(struct f2fs_super_block *raw_super)
+static int sanity_check_raw_super(struct super_block *sb,
+ struct f2fs_super_block *raw_super)
{
unsigned int blocksize;
- if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic))
+ if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
+ f2fs_msg(sb, KERN_INFO,
+ "Magic Mismatch, valid(0x%x) - read(0x%x)",
+ F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
return 1;
+ }
/* Currently, support only 4KB block size */
blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
- if (blocksize != PAGE_CACHE_SIZE)
+ if (blocksize != PAGE_CACHE_SIZE) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid blocksize (%u), supports only 4KB\n",
+ blocksize);
return 1;
+ }
if (le32_to_cpu(raw_super->log_sectorsize) !=
- F2FS_LOG_SECTOR_SIZE)
+ F2FS_LOG_SECTOR_SIZE) {
+ f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize");
return 1;
+ }
if (le32_to_cpu(raw_super->log_sectors_per_block) !=
- F2FS_LOG_SECTORS_PER_BLOCK)
+ F2FS_LOG_SECTORS_PER_BLOCK) {
+ f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block");
return 1;
+ }
return 0;
}
@@ -414,14 +443,17 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
if (!sbi)
return -ENOMEM;
- /* set a temporary block size */
- if (!sb_set_blocksize(sb, F2FS_BLKSIZE))
+ /* set a block size */
+ if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) {
+ f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
goto free_sbi;
+ }
/* read f2fs raw super block */
raw_super_buf = sb_bread(sb, 0);
if (!raw_super_buf) {
err = -EIO;
+ f2fs_msg(sb, KERN_ERR, "unable to read superblock");
goto free_sbi;
}
raw_super = (struct f2fs_super_block *)
@@ -439,12 +471,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
set_opt(sbi, POSIX_ACL);
#endif
/* parse mount options */
- if (parse_options(sbi, (char *)data))
+ if (parse_options(sb, sbi, (char *)data))
goto free_sb_buf;
/* sanity checking of raw super */
- if (sanity_check_raw_super(raw_super))
+ if (sanity_check_raw_super(sb, raw_super)) {
+ f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem");
goto free_sb_buf;
+ }
sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
sb->s_max_links = F2FS_LINK_MAX;
@@ -478,18 +512,23 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
/* get an inode for meta space */
sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
if (IS_ERR(sbi->meta_inode)) {
+ f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
err = PTR_ERR(sbi->meta_inode);
goto free_sb_buf;
}
err = get_valid_checkpoint(sbi);
- if (err)
+ if (err) {
+ f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
goto free_meta_inode;
+ }
/* sanity checking of checkpoint */
err = -EINVAL;
- if (sanity_check_ckpt(raw_super, sbi->ckpt))
+ if (sanity_check_ckpt(raw_super, sbi->ckpt)) {
+ f2fs_msg(sb, KERN_ERR, "Invalid F2FS checkpoint");
goto free_cp;
+ }
sbi->total_valid_node_count =
le32_to_cpu(sbi->ckpt->valid_node_count);
@@ -503,38 +542,41 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
INIT_LIST_HEAD(&sbi->dir_inode_list);
spin_lock_init(&sbi->dir_inode_lock);
- /* init super block */
- if (!sb_set_blocksize(sb, sbi->blocksize))
- goto free_cp;
-
init_orphan_info(sbi);
/* setup f2fs internal modules */
err = build_segment_manager(sbi);
- if (err)
+ if (err) {
+ f2fs_msg(sb, KERN_ERR,
+ "Failed to initialize F2FS segment manager");
goto free_sm;
+ }
err = build_node_manager(sbi);
- if (err)
+ if (err) {
+ f2fs_msg(sb, KERN_ERR,
+ "Failed to initialize F2FS node manager");
goto free_nm;
+ }
build_gc_manager(sbi);
/* get an inode for node space */
sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
if (IS_ERR(sbi->node_inode)) {
+ f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
err = PTR_ERR(sbi->node_inode);
goto free_nm;
}
/* if there are nt orphan nodes free them */
err = -EINVAL;
- if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
- recover_orphan_inodes(sbi))
+ if (recover_orphan_inodes(sbi))
goto free_node_inode;
/* read root inode and dentry */
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
if (IS_ERR(root)) {
+ f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
err = PTR_ERR(root);
goto free_node_inode;
}
@@ -548,8 +590,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
}
/* recover fsynced data */
- if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
- !test_opt(sbi, DISABLE_ROLL_FORWARD))
+ if (!test_opt(sbi, DISABLE_ROLL_FORWARD))
recover_fsync_data(sbi);
/* After POR, we can run background GC thread */
@@ -599,7 +640,7 @@ static struct file_system_type f2fs_fs_type = {
.fs_flags = FS_REQUIRES_DEV,
};
-static int init_inodecache(void)
+static int __init init_inodecache(void)
{
f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
sizeof(struct f2fs_inode_info), NULL);
@@ -634,14 +675,17 @@ static int __init init_f2fs_fs(void)
err = create_checkpoint_caches();
if (err)
goto fail;
- return register_filesystem(&f2fs_fs_type);
+ err = register_filesystem(&f2fs_fs_type);
+ if (err)
+ goto fail;
+ f2fs_create_root_stats();
fail:
return err;
}
static void __exit exit_f2fs_fs(void)
{
- destroy_root_stats();
+ f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
destroy_checkpoint_caches();
destroy_gc_caches();
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 7d52e8dc0c5..8038c049650 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -208,7 +208,7 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
struct page *page;
void *base_addr;
int error = 0, found = 0;
- int value_len, name_len;
+ size_t value_len, name_len;
if (name == NULL)
return -EINVAL;
@@ -304,7 +304,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
struct f2fs_xattr_entry *here, *last;
struct page *page;
void *base_addr;
- int error, found, free, name_len, newsize;
+ int error, found, free, newsize;
+ size_t name_len;
char *pval;
if (name == NULL)
@@ -317,6 +318,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
if (name_len > 255 || value_len > MAX_VALUE_LEN)
return -ERANGE;
+ f2fs_balance_fs(sbi);
+
mutex_lock_op(sbi, NODE_NEW);
if (!fi->i_xattr_nid) {
/* Allocate new attribute block */
diff --git a/fs/file.c b/fs/file.c
index 15cb8618e95..2b3570b7cae 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -490,7 +490,7 @@ void exit_files(struct task_struct *tsk)
}
}
-static void __devinit fdtable_defer_list_init(int cpu)
+static void fdtable_defer_list_init(int cpu)
{
struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
spin_lock_init(&fddef->lock);
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 0cf160a94ed..1b2f6c2c3aa 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -4,12 +4,24 @@ config FUSE_FS
With FUSE it is possible to implement a fully functional filesystem
in a userspace program.
- There's also companion library: libfuse. This library along with
- utilities is available from the FUSE homepage:
+ There's also a companion library: libfuse2. This library is available
+ from the FUSE homepage:
<http://fuse.sourceforge.net/>
+ although chances are your distribution already has that library
+ installed if you've installed the "fuse" package itself.
See <file:Documentation/filesystems/fuse.txt> for more information.
See <file:Documentation/Changes> for needed library/utility version.
If you want to develop a userspace FS, or if you want to use
a filesystem based on FUSE, answer Y or M.
+
+config CUSE
+ tristate "Character device in Userspace support"
+ depends on FUSE_FS
+ help
+ This FUSE extension allows character devices to be
+ implemented in userspace.
+
+ If you want to develop or use a userspace character device
+ based on CUSE, answer Y or M.
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index ee8d5504229..e397b675b02 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -45,7 +45,6 @@
#include <linux/miscdevice.h>
#include <linux/mutex.h>
#include <linux/slab.h>
-#include <linux/spinlock.h>
#include <linux/stat.h>
#include <linux/module.h>
@@ -63,7 +62,7 @@ struct cuse_conn {
bool unrestricted_ioctl;
};
-static DEFINE_SPINLOCK(cuse_lock); /* protects cuse_conntbl */
+static DEFINE_MUTEX(cuse_lock); /* protects registration */
static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
static struct class *cuse_class;
@@ -114,14 +113,14 @@ static int cuse_open(struct inode *inode, struct file *file)
int rc;
/* look up and get the connection */
- spin_lock(&cuse_lock);
+ mutex_lock(&cuse_lock);
list_for_each_entry(pos, cuse_conntbl_head(devt), list)
if (pos->dev->devt == devt) {
fuse_conn_get(&pos->fc);
cc = pos;
break;
}
- spin_unlock(&cuse_lock);
+ mutex_unlock(&cuse_lock);
/* dead? */
if (!cc)
@@ -267,7 +266,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
{
char *end = p + len;
- char *key, *val;
+ char *uninitialized_var(key), *uninitialized_var(val);
int rc;
while (true) {
@@ -305,14 +304,14 @@ static void cuse_gendev_release(struct device *dev)
*/
static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
{
- struct cuse_conn *cc = fc_to_cc(fc);
+ struct cuse_conn *cc = fc_to_cc(fc), *pos;
struct cuse_init_out *arg = req->out.args[0].value;
struct page *page = req->pages[0];
struct cuse_devinfo devinfo = { };
struct device *dev;
struct cdev *cdev;
dev_t devt;
- int rc;
+ int rc, i;
if (req->out.h.error ||
arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
@@ -356,15 +355,24 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
dev_set_drvdata(dev, cc);
dev_set_name(dev, "%s", devinfo.name);
+ mutex_lock(&cuse_lock);
+
+ /* make sure the device-name is unique */
+ for (i = 0; i < CUSE_CONNTBL_LEN; ++i) {
+ list_for_each_entry(pos, &cuse_conntbl[i], list)
+ if (!strcmp(dev_name(pos->dev), dev_name(dev)))
+ goto err_unlock;
+ }
+
rc = device_add(dev);
if (rc)
- goto err_device;
+ goto err_unlock;
/* register cdev */
rc = -ENOMEM;
cdev = cdev_alloc();
if (!cdev)
- goto err_device;
+ goto err_unlock;
cdev->owner = THIS_MODULE;
cdev->ops = &cuse_frontend_fops;
@@ -377,9 +385,8 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
cc->cdev = cdev;
/* make the device available */
- spin_lock(&cuse_lock);
list_add(&cc->list, cuse_conntbl_head(devt));
- spin_unlock(&cuse_lock);
+ mutex_unlock(&cuse_lock);
/* announce device availability */
dev_set_uevent_suppress(dev, 0);
@@ -391,7 +398,8 @@ out:
err_cdev:
cdev_del(cdev);
-err_device:
+err_unlock:
+ mutex_unlock(&cuse_lock);
put_device(dev);
err_region:
unregister_chrdev_region(devt, 1);
@@ -520,9 +528,9 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
int rc;
/* remove from the conntbl, no more access from this point on */
- spin_lock(&cuse_lock);
+ mutex_lock(&cuse_lock);
list_del_init(&cc->list);
- spin_unlock(&cuse_lock);
+ mutex_unlock(&cuse_lock);
/* remove device */
if (cc->dev)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c16335315e5..e83351aa5ba 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -692,8 +692,6 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
struct page *oldpage = *pagep;
struct page *newpage;
struct pipe_buffer *buf = cs->pipebufs;
- struct address_space *mapping;
- pgoff_t index;
unlock_request(cs->fc, cs->req);
fuse_copy_finish(cs);
@@ -724,9 +722,6 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (fuse_check_page(newpage) != 0)
goto out_fallback_unlock;
- mapping = oldpage->mapping;
- index = oldpage->index;
-
/*
* This is a new and locked page, it shouldn't be mapped or
* have any special flags on it
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e21d4d8f87e..f3ab824fa30 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2177,8 +2177,8 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
return ret;
}
-long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
- loff_t length)
+static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
+ loff_t length)
{
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
@@ -2213,7 +2213,6 @@ long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
return err;
}
-EXPORT_SYMBOL_GPL(fuse_file_fallocate);
static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 8dad6b09371..b906ed17a83 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -241,6 +241,7 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
static void gfs2_reverse_hex(char *c, u64 value)
{
+ *c = '0';
while (value) {
*c-- = hex_asc[value & 0x0f];
value >>= 4;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 37ee061d899..b7eff078fe9 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -350,10 +350,14 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
BUG_ON(len < chunk_size);
len -= chunk_size;
block = gfs2_rbm_to_block(&rbm);
- gfs2_rbm_from_block(&rbm, block + chunk_size);
- n_unaligned = 3;
- if (ptr)
+ if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
+ n_unaligned = 0;
break;
+ }
+ if (ptr) {
+ n_unaligned = 3;
+ break;
+ }
n_unaligned = len & 3;
}
@@ -557,22 +561,20 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
*/
int gfs2_rs_alloc(struct gfs2_inode *ip)
{
- struct gfs2_blkreserv *res;
+ int error = 0;
+ down_write(&ip->i_rw_mutex);
if (ip->i_res)
- return 0;
-
- res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
- if (!res)
- return -ENOMEM;
+ goto out;
- RB_CLEAR_NODE(&res->rs_node);
+ ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
+ if (!ip->i_res) {
+ error = -ENOMEM;
+ goto out;
+ }
- down_write(&ip->i_rw_mutex);
- if (ip->i_res)
- kmem_cache_free(gfs2_rsrv_cachep, res);
- else
- ip->i_res = res;
+ RB_CLEAR_NODE(&ip->i_res->rs_node);
+out:
up_write(&ip->i_rw_mutex);
return 0;
}
@@ -1424,6 +1426,9 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
rs->rs_free = extlen;
rs->rs_inum = ip->i_no_addr;
rs_insert(ip);
+ } else {
+ if (goal == rgd->rd_last_alloc + rgd->rd_data0)
+ rgd->rd_last_alloc = 0;
}
}
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index a2862339323..81cc7eaff86 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -446,7 +446,8 @@ int __log_start_commit(journal_t *journal, tid_t target)
* currently running transaction (if it exists). Otherwise,
* the target tid must be an old one.
*/
- if (journal->j_running_transaction &&
+ if (journal->j_commit_request != target &&
+ journal->j_running_transaction &&
journal->j_running_transaction->t_tid == target) {
/*
* We want a new commit: OK, mark the request and wakeup the
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 42f6615af0a..df9f29760ef 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -209,7 +209,8 @@ repeat:
if (!new_transaction)
goto alloc_transaction;
write_lock(&journal->j_state_lock);
- if (!journal->j_running_transaction) {
+ if (!journal->j_running_transaction &&
+ !journal->j_barrier_count) {
jbd2_get_transaction(journal, new_transaction);
new_transaction = NULL;
}
@@ -1839,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
BUFFER_TRACE(bh, "entry");
-retry:
/*
* It is safe to proceed here without the j_list_lock because the
* buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1934,14 +1934,11 @@ retry:
* for commit and try again.
*/
if (partial_page) {
- tid_t tid = journal->j_committing_transaction->t_tid;
-
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
write_unlock(&journal->j_state_lock);
- jbd2_log_wait_commit(journal, tid);
- goto retry;
+ return -EBUSY;
}
/*
* OK, buffer won't be reachable after truncate. We just set
@@ -2002,21 +1999,23 @@ zap_buffer_unlocked:
* @page: page to flush
* @offset: length of page to invalidate.
*
- * Reap page buffers containing data after offset in page.
- *
+ * Reap page buffers containing data after offset in page. Can return -EBUSY
+ * if buffers are part of the committing transaction and the page is straddling
+ * i_size. Caller then has to wait for current commit and try again.
*/
-void jbd2_journal_invalidatepage(journal_t *journal,
- struct page *page,
- unsigned long offset)
+int jbd2_journal_invalidatepage(journal_t *journal,
+ struct page *page,
+ unsigned long offset)
{
struct buffer_head *head, *bh, *next;
unsigned int curr_off = 0;
int may_free = 1;
+ int ret = 0;
if (!PageLocked(page))
BUG();
if (!page_has_buffers(page))
- return;
+ return 0;
/* We will potentially be playing with lists other than just the
* data lists (especially for journaled data mode), so be
@@ -2030,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
if (offset <= curr_off) {
/* This block is wholly outside the truncation point */
lock_buffer(bh);
- may_free &= journal_unmap_buffer(journal, bh,
- offset > 0);
+ ret = journal_unmap_buffer(journal, bh, offset > 0);
unlock_buffer(bh);
+ if (ret < 0)
+ return ret;
+ may_free &= ret;
}
curr_off = next_off;
bh = next;
@@ -2043,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
if (may_free && try_to_free_buffers(page))
J_ASSERT(!page_has_buffers(page));
}
+ return 0;
}
/*
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c89b26bc975..264d1aa935f 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -206,7 +206,7 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
list_for_each_entry(lo, &server->layouts, plh_layouts) {
ino = igrab(lo->plh_inode);
- if (ino)
+ if (!ino)
continue;
spin_lock(&ino->i_lock);
/* Is this layout in the process of being freed? */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 32e6c53520e..1b2d7eb9379 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2153,12 +2153,16 @@ static int nfs_open_permission_mask(int openflags)
{
int mask = 0;
- if ((openflags & O_ACCMODE) != O_WRONLY)
- mask |= MAY_READ;
- if ((openflags & O_ACCMODE) != O_RDONLY)
- mask |= MAY_WRITE;
- if (openflags & __FMODE_EXEC)
- mask |= MAY_EXEC;
+ if (openflags & __FMODE_EXEC) {
+ /* ONLY check exec rights */
+ mask = MAY_EXEC;
+ } else {
+ if ((openflags & O_ACCMODE) != O_WRONLY)
+ mask |= MAY_READ;
+ if ((openflags & O_ACCMODE) != O_RDONLY)
+ mask |= MAY_WRITE;
+ }
+
return mask;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5d864fb3657..cf747ef8665 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1626,7 +1626,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
static int nfs4_opendata_access(struct rpc_cred *cred,
struct nfs4_opendata *opendata,
- struct nfs4_state *state, fmode_t fmode)
+ struct nfs4_state *state, fmode_t fmode,
+ int openflags)
{
struct nfs_access_entry cache;
u32 mask;
@@ -1638,11 +1639,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
mask = 0;
/* don't check MAY_WRITE - a newly created file may not have
- * write mode bits, but POSIX allows the creating process to write */
- if (fmode & FMODE_READ)
- mask |= MAY_READ;
- if (fmode & FMODE_EXEC)
- mask |= MAY_EXEC;
+ * write mode bits, but POSIX allows the creating process to write.
+ * use openflags to check for exec, because fmode won't
+ * always have FMODE_EXEC set when file open for exec. */
+ if (openflags & __FMODE_EXEC) {
+ /* ONLY check for exec rights */
+ mask = MAY_EXEC;
+ } else if (fmode & FMODE_READ)
+ mask = MAY_READ;
cache.cred = cred;
cache.jiffies = jiffies;
@@ -1896,7 +1900,7 @@ static int _nfs4_do_open(struct inode *dir,
if (server->caps & NFS_CAP_POSIX_LOCK)
set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
- status = nfs4_opendata_access(cred, opendata, state, fmode);
+ status = nfs4_opendata_access(cred, opendata, state, fmode, flags);
if (status != 0)
goto err_opendata_put;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e7165d91536..d00260b0810 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -254,7 +254,7 @@ static void
pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
{
lo->plh_retry_timestamp = jiffies;
- if (test_and_set_bit(fail_bit, &lo->plh_flags))
+ if (!test_and_set_bit(fail_bit, &lo->plh_flags))
atomic_inc(&lo->plh_refcount);
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index b6bdb18e892..a5e5d9899d5 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -91,12 +91,16 @@ void nfs_readdata_release(struct nfs_read_data *rdata)
put_nfs_open_context(rdata->args.context);
if (rdata->pages.pagevec != rdata->pages.page_array)
kfree(rdata->pages.pagevec);
- if (rdata != &read_header->rpc_data)
- kfree(rdata);
- else
+ if (rdata == &read_header->rpc_data) {
rdata->header = NULL;
+ rdata = NULL;
+ }
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
+ /* Note: we only free the rpc_task after callbacks are done.
+ * See the comment in rpc_free_task() for why
+ */
+ kfree(rdata);
}
EXPORT_SYMBOL_GPL(nfs_readdata_release);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c25cadf8f8c..2e7e8c878e5 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1152,7 +1152,7 @@ static int nfs_get_option_str(substring_t args[], char **option)
{
kfree(*option);
*option = match_strdup(args);
- return !option;
+ return !*option;
}
static int nfs_get_option_ul(substring_t args[], unsigned long *option)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b673be31590..c483cc50b82 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -126,12 +126,16 @@ void nfs_writedata_release(struct nfs_write_data *wdata)
put_nfs_open_context(wdata->args.context);
if (wdata->pages.pagevec != wdata->pages.page_array)
kfree(wdata->pages.pagevec);
- if (wdata != &write_header->rpc_data)
- kfree(wdata);
- else
+ if (wdata == &write_header->rpc_data) {
wdata->header = NULL;
+ wdata = NULL;
+ }
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
+ /* Note: we only free the rpc_task after callbacks are done.
+ * See the comment in rpc_free_task() for why
+ */
+ kfree(wdata);
}
EXPORT_SYMBOL_GPL(nfs_writedata_release);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index e064f562b1f..76ddae83daa 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -352,18 +352,18 @@ retry:
if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
return -ENOMEM;
- spin_lock_bh(&proc_inum_lock);
+ spin_lock_irq(&proc_inum_lock);
error = ida_get_new(&proc_inum_ida, &i);
- spin_unlock_bh(&proc_inum_lock);
+ spin_unlock_irq(&proc_inum_lock);
if (error == -EAGAIN)
goto retry;
else if (error)
return error;
if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
- spin_lock_bh(&proc_inum_lock);
+ spin_lock_irq(&proc_inum_lock);
ida_remove(&proc_inum_ida, i);
- spin_unlock_bh(&proc_inum_lock);
+ spin_unlock_irq(&proc_inum_lock);
return -ENOSPC;
}
*inum = PROC_DYNAMIC_FIRST + i;
@@ -372,9 +372,10 @@ retry:
void proc_free_inum(unsigned int inum)
{
- spin_lock_bh(&proc_inum_lock);
+ unsigned long flags;
+ spin_lock_irqsave(&proc_inum_lock, flags);
ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
- spin_unlock_bh(&proc_inum_lock);
+ spin_unlock_irqrestore(&proc_inum_lock, flags);
}
static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 448455b7fd9..ca5ce7f9f80 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1278,7 +1278,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
walk.mm = mm;
pol = get_vma_policy(task, vma, vma->vm_start);
- mpol_to_str(buffer, sizeof(buffer), pol, 0);
+ mpol_to_str(buffer, sizeof(buffer), pol);
mpol_cond_put(pol);
seq_printf(m, "%08lx %s", vma->vm_start, buffer);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index f883e7e7430..7003e5266f2 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -291,9 +291,8 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
kfree(cxt->przs);
}
-static int __devinit ramoops_init_przs(struct device *dev,
- struct ramoops_context *cxt,
- phys_addr_t *paddr, size_t dump_mem_sz)
+static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
+ phys_addr_t *paddr, size_t dump_mem_sz)
{
int err = -ENOMEM;
int i;
@@ -336,10 +335,9 @@ fail_prz:
return err;
}
-static int __devinit ramoops_init_prz(struct device *dev,
- struct ramoops_context *cxt,
- struct persistent_ram_zone **prz,
- phys_addr_t *paddr, size_t sz, u32 sig)
+static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
+ struct persistent_ram_zone **prz,
+ phys_addr_t *paddr, size_t sz, u32 sig)
{
if (!sz)
return 0;
@@ -367,7 +365,7 @@ static int __devinit ramoops_init_prz(struct device *dev,
return 0;
}
-static int __devinit ramoops_probe(struct platform_device *pdev)
+static int ramoops_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct ramoops_platform_data *pdata = pdev->dev.platform_data;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index eecd2a8a84d..0306303be37 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -390,8 +390,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
return 0;
}
-static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz,
- u32 sig, int ecc_size)
+static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
+ int ecc_size)
{
int ret;
@@ -443,9 +443,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
kfree(prz);
}
-struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start,
- size_t size, u32 sig,
- int ecc_size)
+struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
+ u32 sig, int ecc_size)
{
struct persistent_ram_zone *prz;
int ret = -ENOMEM;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 9d863fb501f..f2bc3dfd0b8 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -296,7 +296,7 @@ EXPORT_SYMBOL(seq_read);
* seq_lseek - ->llseek() method for sequential files.
* @file: the file in question
* @offset: new position
- * @origin: 0 for absolute, 1 for relative position
+ * @whence: 0 for absolute, 1 for relative position
*
* Ready-made ->f_op->llseek()
*/
diff --git a/fs/splice.c b/fs/splice.c
index 8890604e3fc..6909d89d0da 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -696,8 +696,10 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
return -EINVAL;
more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
- if (sd->len < sd->total_len)
+
+ if (sd->len < sd->total_len && pipe->nrbufs > 1)
more |= MSG_SENDPAGE_NOTLAST;
+
return file->f_op->sendpage(file, buf->page, buf->offset,
sd->len, &pos, more);
}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index d44fb568abe..e9be396a558 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -307,7 +307,8 @@ static void udf_sb_free_partitions(struct super_block *sb)
{
struct udf_sb_info *sbi = UDF_SB(sb);
int i;
-
+ if (sbi->s_partmaps == NULL)
+ return;
for (i = 0; i < sbi->s_partitions; i++)
udf_free_partition(&sbi->s_partmaps[i]);
kfree(sbi->s_partmaps);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 26673a0b20e..56d1614760c 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -175,7 +175,7 @@ xfs_buf_get_maps(
bp->b_map_count = map_count;
if (map_count == 1) {
- bp->b_maps = &bp->b_map;
+ bp->b_maps = &bp->__b_map;
return 0;
}
@@ -193,7 +193,7 @@ static void
xfs_buf_free_maps(
struct xfs_buf *bp)
{
- if (bp->b_maps != &bp->b_map) {
+ if (bp->b_maps != &bp->__b_map) {
kmem_free(bp->b_maps);
bp->b_maps = NULL;
}
@@ -377,8 +377,8 @@ xfs_buf_allocate_memory(
}
use_alloc_page:
- start = BBTOB(bp->b_map.bm_bn) >> PAGE_SHIFT;
- end = (BBTOB(bp->b_map.bm_bn + bp->b_length) + PAGE_SIZE - 1)
+ start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
+ end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
>> PAGE_SHIFT;
page_count = end - start;
error = _xfs_buf_get_pages(bp, page_count, flags);
@@ -640,7 +640,7 @@ _xfs_buf_read(
xfs_buf_flags_t flags)
{
ASSERT(!(flags & XBF_WRITE));
- ASSERT(bp->b_map.bm_bn != XFS_BUF_DADDR_NULL);
+ ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
@@ -1709,7 +1709,7 @@ xfs_buf_cmp(
struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
xfs_daddr_t diff;
- diff = ap->b_map.bm_bn - bp->b_map.bm_bn;
+ diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn;
if (diff < 0)
return -1;
if (diff > 0)
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 23f5642480b..433a12ed7b1 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -151,7 +151,7 @@ typedef struct xfs_buf {
struct page **b_pages; /* array of page pointers */
struct page *b_page_array[XB_PAGES]; /* inline pages */
struct xfs_buf_map *b_maps; /* compound buffer map */
- struct xfs_buf_map b_map; /* inline compound buffer map */
+ struct xfs_buf_map __b_map; /* inline compound buffer map */
int b_map_count;
int b_io_length; /* IO size in BBs */
atomic_t b_pin_count; /* pin count */
@@ -330,8 +330,8 @@ void xfs_buf_stale(struct xfs_buf *bp);
* In future, uncached buffers will pass the block number directly to the io
* request function and hence these macros will go away at that point.
*/
-#define XFS_BUF_ADDR(bp) ((bp)->b_map.bm_bn)
-#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_map.bm_bn = (xfs_daddr_t)(bno))
+#define XFS_BUF_ADDR(bp) ((bp)->b_maps[0].bm_bn)
+#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_maps[0].bm_bn = (xfs_daddr_t)(bno))
static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
{
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index becf4a97efc..77b09750e92 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -71,7 +71,7 @@ xfs_buf_item_log_debug(
chunk_num = byte >> XFS_BLF_SHIFT;
word_num = chunk_num >> BIT_TO_WORD_SHIFT;
bit_num = chunk_num & (NBWORD - 1);
- wordp = &(bip->bli_format.blf_data_map[word_num]);
+ wordp = &(bip->__bli_format.blf_data_map[word_num]);
bit_set = *wordp & (1 << bit_num);
ASSERT(bit_set);
byte++;
@@ -237,7 +237,7 @@ xfs_buf_item_size(
* cancel flag in it.
*/
trace_xfs_buf_item_size_stale(bip);
- ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+ ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
return bip->bli_format_count;
}
@@ -278,7 +278,7 @@ xfs_buf_item_format_segment(
uint buffer_offset;
/* copy the flags across from the base format item */
- blfp->blf_flags = bip->bli_format.blf_flags;
+ blfp->blf_flags = bip->__bli_format.blf_flags;
/*
* Base size is the actual size of the ondisk structure - it reflects
@@ -287,6 +287,17 @@ xfs_buf_item_format_segment(
*/
base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
(blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
+
+ nvecs = 0;
+ first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
+ if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) {
+ /*
+ * If the map is not be dirty in the transaction, mark
+ * the size as zero and do not advance the vector pointer.
+ */
+ goto out;
+ }
+
vecp->i_addr = blfp;
vecp->i_len = base_size;
vecp->i_type = XLOG_REG_TYPE_BFORMAT;
@@ -301,15 +312,13 @@ xfs_buf_item_format_segment(
*/
trace_xfs_buf_item_format_stale(bip);
ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
- blfp->blf_size = nvecs;
- return vecp;
+ goto out;
}
/*
* Fill in an iovec for each set of contiguous chunks.
*/
- first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
- ASSERT(first_bit != -1);
+
last_bit = first_bit;
nbits = 1;
for (;;) {
@@ -371,7 +380,8 @@ xfs_buf_item_format_segment(
nbits++;
}
}
- bip->bli_format.blf_size = nvecs;
+out:
+ blfp->blf_size = nvecs;
return vecp;
}
@@ -405,7 +415,7 @@ xfs_buf_item_format(
if (bip->bli_flags & XFS_BLI_INODE_BUF) {
if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
xfs_log_item_in_current_chkpt(lip)))
- bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
+ bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF;
bip->bli_flags &= ~XFS_BLI_INODE_BUF;
}
@@ -485,7 +495,7 @@ xfs_buf_item_unpin(
ASSERT(bip->bli_flags & XFS_BLI_STALE);
ASSERT(xfs_buf_islocked(bp));
ASSERT(XFS_BUF_ISSTALE(bp));
- ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+ ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
trace_xfs_buf_item_unpin_stale(bip);
@@ -601,7 +611,7 @@ xfs_buf_item_unlock(
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
- int aborted;
+ int aborted, clean, i;
uint hold;
/* Clear the buffer's association with this transaction. */
@@ -631,7 +641,7 @@ xfs_buf_item_unlock(
*/
if (bip->bli_flags & XFS_BLI_STALE) {
trace_xfs_buf_item_unlock_stale(bip);
- ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+ ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
if (!aborted) {
atomic_dec(&bip->bli_refcount);
return;
@@ -644,8 +654,15 @@ xfs_buf_item_unlock(
* If the buf item isn't tracking any data, free it, otherwise drop the
* reference we hold to it.
*/
- if (xfs_bitmap_empty(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size))
+ clean = 1;
+ for (i = 0; i < bip->bli_format_count; i++) {
+ if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
+ bip->bli_formats[i].blf_map_size)) {
+ clean = 0;
+ break;
+ }
+ }
+ if (clean)
xfs_buf_item_relse(bp);
else
atomic_dec(&bip->bli_refcount);
@@ -716,7 +733,7 @@ xfs_buf_item_get_format(
bip->bli_format_count = count;
if (count == 1) {
- bip->bli_formats = &bip->bli_format;
+ bip->bli_formats = &bip->__bli_format;
return 0;
}
@@ -731,7 +748,7 @@ STATIC void
xfs_buf_item_free_format(
struct xfs_buf_log_item *bip)
{
- if (bip->bli_formats != &bip->bli_format) {
+ if (bip->bli_formats != &bip->__bli_format) {
kmem_free(bip->bli_formats);
bip->bli_formats = NULL;
}
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 6850f49f4af..16def435944 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -104,7 +104,7 @@ typedef struct xfs_buf_log_item {
#endif
int bli_format_count; /* count of headers */
struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
- struct xfs_buf_log_format bli_format; /* embedded in-log header */
+ struct xfs_buf_log_format __bli_format; /* embedded in-log header */
} xfs_buf_log_item_t;
void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 7536faaa61e..12afe07a91d 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -355,10 +355,12 @@ xfs_dir2_block_addname(
/*
* If need to compact the leaf entries, do it now.
*/
- if (compact)
+ if (compact) {
xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog,
&lfloghigh, &lfloglow);
- else if (btp->stale) {
+ /* recalculate blp post-compaction */
+ blp = xfs_dir2_block_leaf_p(btp);
+ } else if (btp->stale) {
/*
* Set leaf logging boundaries to impossible state.
* For the no-stale case they're set explicitly.
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 5f53e75409b..8a59f854655 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -784,11 +784,11 @@ xfs_qm_scall_getquota(
(XFS_IS_OQUOTA_ENFORCED(mp) &&
(dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
dst->d_id != 0) {
- if (((int) dst->d_bcount > (int) dst->d_blk_softlimit) &&
+ if ((dst->d_bcount > dst->d_blk_softlimit) &&
(dst->d_blk_softlimit > 0)) {
ASSERT(dst->d_btimer != 0);
}
- if (((int) dst->d_icount > (int) dst->d_ino_softlimit) &&
+ if ((dst->d_icount > dst->d_ino_softlimit) &&
(dst->d_ino_softlimit > 0)) {
ASSERT(dst->d_itimer != 0);
}
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 4fc17d479d4..3edf5dbee00 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -93,7 +93,7 @@ _xfs_trans_bjoin(
xfs_buf_item_init(bp, tp->t_mountp);
bip = bp->b_fspriv;
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
- ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
+ ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
if (reset_recur)
bip->bli_recur = 0;
@@ -432,7 +432,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
bip = bp->b_fspriv;
ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
- ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
+ ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
trace_xfs_trans_brelse(bip);
@@ -519,7 +519,7 @@ xfs_trans_bhold(xfs_trans_t *tp,
ASSERT(bp->b_transp == tp);
ASSERT(bip != NULL);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
- ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
+ ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_HOLD;
@@ -539,7 +539,7 @@ xfs_trans_bhold_release(xfs_trans_t *tp,
ASSERT(bp->b_transp == tp);
ASSERT(bip != NULL);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
- ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
+ ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT(bip->bli_flags & XFS_BLI_HOLD);
@@ -598,7 +598,7 @@ xfs_trans_log_buf(xfs_trans_t *tp,
bip->bli_flags &= ~XFS_BLI_STALE;
ASSERT(XFS_BUF_ISSTALE(bp));
XFS_BUF_UNSTALE(bp);
- bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL;
+ bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL;
}
tp->t_flags |= XFS_TRANS_DIRTY;
@@ -643,6 +643,7 @@ xfs_trans_binval(
xfs_buf_t *bp)
{
xfs_buf_log_item_t *bip = bp->b_fspriv;
+ int i;
ASSERT(bp->b_transp == tp);
ASSERT(bip != NULL);
@@ -657,8 +658,8 @@ xfs_trans_binval(
*/
ASSERT(XFS_BUF_ISSTALE(bp));
ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
- ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF));
- ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+ ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
+ ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
return;
@@ -668,10 +669,12 @@ xfs_trans_binval(
bip->bli_flags |= XFS_BLI_STALE;
bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
- bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
- bip->bli_format.blf_flags |= XFS_BLF_CANCEL;
- memset((char *)(bip->bli_format.blf_data_map), 0,
- (bip->bli_format.blf_map_size * sizeof(uint)));
+ bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
+ bip->__bli_format.blf_flags |= XFS_BLF_CANCEL;
+ for (i = 0; i < bip->bli_format_count; i++) {
+ memset(bip->bli_formats[i].blf_data_map, 0,
+ (bip->bli_formats[i].blf_map_size * sizeof(uint)));
+ }
bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
tp->t_flags |= XFS_TRANS_DIRTY;
}
@@ -775,5 +778,5 @@ xfs_trans_dquot_buf(
type == XFS_BLF_GDQUOT_BUF);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
- bip->bli_format.blf_flags |= type;
+ bip->__bli_format.blf_flags |= type;
}