From 89ac9b4d3d1a049ae1054f99b1aed81092cd0a82 Mon Sep 17 00:00:00 2001 From: Sougata Santra Date: Thu, 18 Dec 2014 16:17:12 -0800 Subject: hfsplus: fix longname handling Longname is not correctly handled by hfsplus driver. If an attempt to create a longname(>255) file/directory is made, it succeeds by creating a file/directory with HFSPLUS_MAX_STRLEN and incorrect catalog key. Thus leaving the volume in an inconsistent state. This patch fixes this issue. Although lookup is always called first to create a negative entry, so just doing a check in lookup would probably fix this issue. I choose to propagate error to other iops as well. Please NOTE: I have factored out hfsplus_cat_build_key_with_cnid from hfsplus_cat_build_key, to avoid unncessary branching. Thanks a lot. TEST: ------ dir="TEST_DIR" cdir=`pwd` name255="_123456789_123456789_123456789_123456789_123456789_123456789\ _123456789_123456789_123456789_123456789_123456789_123456789_123456789\ _123456789_123456789_123456789_123456789_123456789_123456789_123456789\ _123456789_123456789_123456789_123456789_123456789_1234" name256="${name255}5" mkdir $dir cd $dir touch $name255 rm -f $name255 touch $name256 ls -la cd $cdir rm -rf $dir RESULT: ------- [sougata@ultrabook tmp]$ cdir=`pwd` [sougata@ultrabook tmp]$ name255="_123456789_123456789_123456789_123456789_123456789_123456789\ > _123456789_123456789_123456789_123456789_123456789_123456789_123456789\ > _123456789_123456789_123456789_123456789_123456789_123456789_123456789\ > _123456789_123456789_123456789_123456789_123456789_1234" [sougata@ultrabook tmp]$ name256="${name255}5" [sougata@ultrabook tmp]$ [sougata@ultrabook tmp]$ mkdir $dir [sougata@ultrabook tmp]$ cd $dir [sougata@ultrabook TEST_DIR]$ touch $name255 [sougata@ultrabook TEST_DIR]$ rm -f $name255 [sougata@ultrabook TEST_DIR]$ touch $name256 [sougata@ultrabook TEST_DIR]$ ls -la ls: cannot access _123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_1234: No such file or directory total 0 drwxrwxr-x 1 sougata sougata 3 Feb 20 19:56 . drwxrwxrwx 1 root root 6 Feb 20 19:56 .. -????????? ? ? ? ? ? _123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_1234 [sougata@ultrabook TEST_DIR]$ cd $cdir [sougata@ultrabook tmp]$ rm -rf $dir rm: cannot remove `TEST_DIR': Directory not empty -ENAMETOOLONG returned from hfsplus_asc2uni was not propaged to iops. This allowed hfsplus to create files/directories with HFSPLUS_MAX_STRLEN and incorrect keys, leaving the FS in an inconsistent state. This patch fixes this issue. Signed-off-by: Sougata Santra Reviewed-by: Christoph Hellwig Cc: Vyacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/catalog.c | 89 ++++++++++++++++++++++++++++++++++++------------- fs/hfsplus/dir.c | 11 ++++-- fs/hfsplus/hfsplus_fs.h | 4 ++- fs/hfsplus/super.c | 4 ++- 4 files changed, 79 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 32602c667b4..7892e6fddb6 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -38,21 +38,30 @@ int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1, return hfsplus_strcmp(&k1->cat.name, &k2->cat.name); } -void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, - u32 parent, struct qstr *str) +/* Generates key for catalog file/folders record. */ +int hfsplus_cat_build_key(struct super_block *sb, + hfsplus_btree_key *key, u32 parent, struct qstr *str) { - int len; + int len, err; key->cat.parent = cpu_to_be32(parent); - if (str) { - hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN, - str->name, str->len); - len = be16_to_cpu(key->cat.name.length); - } else { - key->cat.name.length = 0; - len = 0; - } + err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN, + str->name, str->len); + if (unlikely(err < 0)) + return err; + + len = be16_to_cpu(key->cat.name.length); key->key_len = cpu_to_be16(6 + 2 * len); + return 0; +} + +/* Generates key for catalog thread record. */ +void hfsplus_cat_build_key_with_cnid(struct super_block *sb, + hfsplus_btree_key *key, u32 parent) +{ + key->cat.parent = cpu_to_be32(parent); + key->cat.name.length = 0; + key->key_len = cpu_to_be16(6); } static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent, @@ -167,11 +176,16 @@ static int hfsplus_fill_cat_thread(struct super_block *sb, hfsplus_cat_entry *entry, int type, u32 parentid, struct qstr *str) { + int err; + entry->type = cpu_to_be16(type); entry->thread.reserved = 0; entry->thread.parentID = cpu_to_be32(parentid); - hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN, + err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN, str->name, str->len); + if (unlikely(err < 0)) + return err; + return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2; } @@ -183,7 +197,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, int err; u16 type; - hfsplus_cat_build_key(sb, fd->search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid); err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry)); if (err) return err; @@ -250,11 +264,16 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, if (err) return err; - hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD, dir->i_ino, str); + if (unlikely(entry_size < 0)) { + err = entry_size; + goto err2; + } + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) @@ -265,7 +284,10 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, if (err) goto err2; - hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); + err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); + if (unlikely(err)) + goto err1; + entry_size = hfsplus_cat_build_record(&entry, cnid, inode); err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err != -ENOENT) { @@ -288,7 +310,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, return 0; err1: - hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); if (!hfs_brec_find(&fd, hfs_find_rec_by_key)) hfs_brec_remove(&fd); err2: @@ -313,7 +335,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) if (!str) { int len; - hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -329,7 +351,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) off + 2, len); fd.search_key->key_len = cpu_to_be16(6 + len); } else - hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); + err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); + if (unlikely(err)) + goto out; err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) @@ -360,7 +384,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) if (err) goto out; - hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -405,7 +429,11 @@ int hfsplus_rename_cat(u32 cnid, dst_fd = src_fd; /* find the old dir entry and read the data */ - hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); + err = hfsplus_cat_build_key(sb, src_fd.search_key, + src_dir->i_ino, src_name); + if (unlikely(err)) + goto out; + err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; @@ -419,7 +447,11 @@ int hfsplus_rename_cat(u32 cnid, type = be16_to_cpu(entry.type); /* create new dir entry with the data from the old entry */ - hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); + err = hfsplus_cat_build_key(sb, dst_fd.search_key, + dst_dir->i_ino, dst_name); + if (unlikely(err)) + goto out; + err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) @@ -436,7 +468,11 @@ int hfsplus_rename_cat(u32 cnid, dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC; /* finally remove the old entry */ - hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); + err = hfsplus_cat_build_key(sb, src_fd.search_key, + src_dir->i_ino, src_name); + if (unlikely(err)) + goto out; + err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; @@ -449,7 +485,7 @@ int hfsplus_rename_cat(u32 cnid, src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC; /* remove old thread entry */ - hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid); err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; @@ -459,9 +495,14 @@ int hfsplus_rename_cat(u32 cnid, goto out; /* create new thread entry */ - hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, dst_fd.search_key, cnid); entry_size = hfsplus_fill_cat_thread(sb, &entry, type, dst_dir->i_ino, dst_name); + if (unlikely(entry_size < 0)) { + err = entry_size; + goto out; + } + err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 610a3260bef..435bea231cc 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -44,7 +44,10 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); if (err) return ERR_PTR(err); - hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); + err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, + &dentry->d_name); + if (unlikely(err < 0)) + goto fail; again: err = hfs_brec_read(&fd, &entry, sizeof(entry)); if (err) { @@ -97,9 +100,11 @@ again: be32_to_cpu(entry.file.permissions.dev); str.len = sprintf(name, "iNode%d", linkid); str.name = name; - hfsplus_cat_build_key(sb, fd.search_key, + err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_SB(sb)->hidden_dir->i_ino, &str); + if (unlikely(err < 0)) + goto fail; goto again; } } else if (!dentry->d_fsdata) @@ -145,7 +150,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx) err = -ENOMEM; goto out; } - hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, inode->i_ino); err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index eb5e059f481..b0441d65fa5 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -443,8 +443,10 @@ int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1, const hfsplus_btree_key *k2); int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1, const hfsplus_btree_key *k2); -void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, +int hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, u32 parent, struct qstr *str); +void hfsplus_cat_build_key_with_cnid(struct super_block *sb, + hfsplus_btree_key *key, u32 parent); void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms); int hfsplus_find_cat(struct super_block *sb, u32 cnid, struct hfs_find_data *fd); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 4cf2024b87d..593af2fdcc2 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -515,7 +515,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = hfs_find_init(sbi->cat_tree, &fd); if (err) goto out_put_root; - hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); + err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); + if (unlikely(err < 0)) + goto out_put_root; if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { hfs_find_exit(&fd); if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) -- cgit v1.2.3-70-g09d2 From 47f8f9297d2247d65ee46d8403a73b30f8d0249b Mon Sep 17 00:00:00 2001 From: Pintu Kumar Date: Thu, 18 Dec 2014 16:17:18 -0800 Subject: fs/proc/meminfo.c: include cma info in proc/meminfo This patch include CMA info (CMATotal, CMAFree) in /proc/meminfo. Currently, in a CMA enabled system, if somebody wants to know the total CMA size declared, there is no way to tell, other than the dmesg or /var/log/messages logs. With this patch we are showing the CMA info as part of meminfo, so that it can be determined at any point of time. This will be populated only when CMA is enabled. Below is the sample output from a ARM based device with RAM:512MB and CMA:16MB. MemTotal: 471172 kB MemFree: 111712 kB MemAvailable: 271172 kB . . . CmaTotal: 16384 kB CmaFree: 6144 kB This patch also fix below checkpatch errors that were found during these changes. ERROR: space required after that ',' (ctx:ExV) 199: FILE: fs/proc/meminfo.c:199: + ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10) ^ ERROR: space required after that ',' (ctx:ExV) 202: FILE: fs/proc/meminfo.c:202: + ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * ^ ERROR: space required after that ',' (ctx:ExV) 206: FILE: fs/proc/meminfo.c:206: + ,K(totalcma_pages) ^ total: 3 errors, 0 warnings, 2 checks, 236 lines checked Signed-off-by: Pintu Kumar Signed-off-by: Vishnu Pratap Singh Acked-by: Michal Nazarewicz Cc: Rafael Aquini Cc: Jerome Marchand Cc: Marek Szyprowski Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index aa1eee06420..d3ebf2e6185 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -12,6 +12,9 @@ #include #include #include +#ifdef CONFIG_CMA +#include +#endif #include #include #include "internal.h" @@ -137,6 +140,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE "AnonHugePages: %8lu kB\n" +#endif +#ifdef CONFIG_CMA + "CmaTotal: %8lu kB\n" + "CmaFree: %8lu kB\n" #endif , K(i.totalram), @@ -187,11 +194,15 @@ static int meminfo_proc_show(struct seq_file *m, void *v) vmi.used >> 10, vmi.largest_chunk >> 10 #ifdef CONFIG_MEMORY_FAILURE - ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10) + , atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10) #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE - ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * + , K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * HPAGE_PMD_NR) +#endif +#ifdef CONFIG_CMA + , K(totalcma_pages) + , K(global_page_state(NR_FREE_CMA_PAGES)) #endif ); -- cgit v1.2.3-70-g09d2 From f62f12b3a426c8f65b10011b1ec40ba4277cbf5f Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 18 Dec 2014 16:17:32 -0800 Subject: ocfs2: reflink: fix slow unlink for refcounted file When running ocfs2 test suite multiple nodes reflink stress test, for a 4 nodes cluster, every unlink() for refcounted file needs about 700s. The slow unlink is caused by the contention of refcount tree lock since all nodes are unlink files using the same refcount tree. When the unlinking file have many extents(over 1600 in our test), most of the extents has refcounted flag set. In ocfs2_commit_truncate(), it will execute the following call trace for every extents. This means it needs get and released refcount tree lock about 1600 times. And when several nodes are do this at the same time, the performance will be very low. ocfs2_remove_btree_range() -- ocfs2_lock_refcount_tree() ---- ocfs2_refcount_lock() ------ __ocfs2_cluster_lock() ocfs2_refcount_lock() is costly, move it to ocfs2_commit_truncate() to do lock/unlock once can improve a lot performance. Signed-off-by: Junxiao Bi Cc: Wengang Reviewed-by: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 28 +++++++++++++++++++++------- fs/ocfs2/alloc.h | 2 +- fs/ocfs2/dir.c | 2 +- fs/ocfs2/file.c | 2 +- 4 files changed, 24 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index a93bf989225..fcae9ef1a32 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5662,7 +5662,7 @@ int ocfs2_remove_btree_range(struct inode *inode, struct ocfs2_extent_tree *et, u32 cpos, u32 phys_cpos, u32 len, int flags, struct ocfs2_cached_dealloc_ctxt *dealloc, - u64 refcount_loc) + u64 refcount_loc, bool refcount_tree_locked) { int ret, credits = 0, extra_blocks = 0; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); @@ -5676,11 +5676,13 @@ int ocfs2_remove_btree_range(struct inode *inode, BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); - ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, - &ref_tree, NULL); - if (ret) { - mlog_errno(ret); - goto bail; + if (!refcount_tree_locked) { + ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, + &ref_tree, NULL); + if (ret) { + mlog_errno(ret); + goto bail; + } } ret = ocfs2_prepare_refcount_change_for_del(inode, @@ -7021,6 +7023,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, u64 refcount_loc = le64_to_cpu(di->i_refcount_loc); struct ocfs2_extent_tree et; struct ocfs2_cached_dealloc_ctxt dealloc; + struct ocfs2_refcount_tree *ref_tree = NULL; ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); ocfs2_init_dealloc_ctxt(&dealloc); @@ -7130,9 +7133,18 @@ start: phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno); + if ((flags & OCFS2_EXT_REFCOUNTED) && trunc_len && !ref_tree) { + status = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, + &ref_tree, NULL); + if (status) { + mlog_errno(status); + goto bail; + } + } + status = ocfs2_remove_btree_range(inode, &et, trunc_cpos, phys_cpos, trunc_len, flags, &dealloc, - refcount_loc); + refcount_loc, true); if (status < 0) { mlog_errno(status); goto bail; @@ -7147,6 +7159,8 @@ start: goto start; bail: + if (ref_tree) + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); ocfs2_schedule_truncate_log_flush(osb, 1); diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index ca381c58412..fb09b97db16 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -142,7 +142,7 @@ int ocfs2_remove_btree_range(struct inode *inode, struct ocfs2_extent_tree *et, u32 cpos, u32 phys_cpos, u32 len, int flags, struct ocfs2_cached_dealloc_ctxt *dealloc, - u64 refcount_loc); + u64 refcount_loc, bool refcount_tree_locked); int ocfs2_num_free_extents(struct ocfs2_super *osb, struct ocfs2_extent_tree *et); diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 79d56dc981b..319e786175a 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -4479,7 +4479,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh) p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno); ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0, - &dealloc, 0); + &dealloc, 0, false); if (ret) { mlog_errno(ret); goto out; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 69fb9f75b08..3950693dd0f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1803,7 +1803,7 @@ static int ocfs2_remove_inode_range(struct inode *inode, ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos, phys_cpos, trunc_len, flags, - &dealloc, refcount_loc); + &dealloc, refcount_loc, false); if (ret < 0) { mlog_errno(ret); goto out; -- cgit v1.2.3-70-g09d2 From 1e5895816030eaadb952c89eb9f4054e5c0082c3 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Thu, 18 Dec 2014 16:17:34 -0800 Subject: ocfs2/dlm: fix race between dispatched_work and dlm_lockres_grab_inflight_worker Commit ac4fef4d23ed ("ocfs2/dlm: do not purge lockres that is queued for assert master") may have the following possible race case: dlm_dispatch_assert_master dlm_wq ======================================================================== queue_work(dlm->quedlm_worker, &dlm->dispatched_work); dispatch work, dlm_lockres_drop_inflight_worker *BUG_ON(res->inflight_assert_workers == 0)* dlm_lockres_grab_inflight_worker inflight_assert_workers++ So ensure inflight_assert_workers to be increased first. Signed-off-by: Joseph Qi Signed-off-by: Xue jiufei Cc: Joel Becker Reviewed-by: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmmaster.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 3689b359204..a6944b25fd5 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -695,14 +695,6 @@ void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, res->inflight_assert_workers); } -static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - spin_lock(&res->spinlock); - __dlm_lockres_grab_inflight_worker(dlm, res); - spin_unlock(&res->spinlock); -} - static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { @@ -1646,6 +1638,7 @@ send_response: } mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", dlm->node_num, res->lockname.len, res->lockname.name); + spin_lock(&res->spinlock); ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, DLM_ASSERT_MASTER_MLE_CLEANUP); if (ret < 0) { @@ -1653,7 +1646,8 @@ send_response: response = DLM_MASTER_RESP_ERROR; dlm_lockres_put(res); } else - dlm_lockres_grab_inflight_worker(dlm, res); + __dlm_lockres_grab_inflight_worker(dlm, res); + spin_unlock(&res->spinlock); } else { if (res) dlm_lockres_put(res); -- cgit v1.2.3-70-g09d2 From 136f49b9171074872f2a14ad0ab10486d1ba13ca Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 18 Dec 2014 16:17:37 -0800 Subject: ocfs2: fix journal commit deadlock For buffer write, page lock will be got in write_begin and released in write_end, in ocfs2_write_end_nolock(), before it unlock the page in ocfs2_free_write_ctxt(), it calls ocfs2_run_deallocs(), this will ask for the read lock of journal->j_trans_barrier. Holding page lock and ask for journal->j_trans_barrier breaks the locking order. This will cause a deadlock with journal commit threads, ocfs2cmt will get write lock of journal->j_trans_barrier first, then it wakes up kjournald2 to do the commit work, at last it waits until done. To commit journal, kjournald2 needs flushing data first, it needs get the cache page lock. Since some ocfs2 cluster locks are holding by write process, this deadlock may hung the whole cluster. unlock pages before ocfs2_run_deallocs() can fix the locking order, also put unlock before ocfs2_commit_trans() to make page lock is unlocked before j_trans_barrier to preserve unlocking order. Signed-off-by: Junxiao Bi Reviewed-by: Wengang Wang Cc: Reviewed-by: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/aops.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index d9f222987f2..46d93e941f3 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -894,7 +894,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) } } -static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) +static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc) { int i; @@ -915,7 +915,11 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) page_cache_release(wc->w_target_page); } ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); +} +static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) +{ + ocfs2_unlock_pages(wc); brelse(wc->w_di_bh); kfree(wc); } @@ -2042,11 +2046,19 @@ out_write_size: ocfs2_update_inode_fsync_trans(handle, inode, 1); ocfs2_journal_dirty(handle, wc->w_di_bh); + /* unlock pages before dealloc since it needs acquiring j_trans_barrier + * lock, or it will cause a deadlock since journal commit threads holds + * this lock and will ask for the page lock when flushing the data. + * put it here to preserve the unlock order. + */ + ocfs2_unlock_pages(wc); + ocfs2_commit_trans(osb, handle); ocfs2_run_deallocs(osb, &wc->w_dealloc); - ocfs2_free_write_ctxt(wc); + brelse(wc->w_di_bh); + kfree(wc); return copied; } -- cgit v1.2.3-70-g09d2