From 8b050d350c7846462a21e9e054c9154ede9b43cf Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 20 Feb 2014 18:08:58 +0800 Subject: Btrfs: fix skipped error handle when log sync failed It is possible that many tasks sync the log tree at the same time, but only one task can do the sync work, the others will wait for it. But those wait tasks didn't get the result of the log sync, and returned 0 when they ended the wait. It caused those tasks skipped the error handle, and the serious problem was they told the users the file sync succeeded but in fact they failed. This patch fixes this problem by introducing a log context structure, we insert it into the a global list. When the sync fails, we will set the error number of every log context in the list, then the waiting tasks get the error number of the log context and handle the error if need. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/tree-log.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/tree-log.h') diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 1d4ae0d15a7..59c1edb31d1 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -22,14 +22,26 @@ /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ #define BTRFS_NO_LOG_SYNC 256 +struct btrfs_log_ctx { + int log_ret; + struct list_head list; +}; + +static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) +{ + ctx->log_ret = 0; + INIT_LIST_HEAD(&ctx->list); +} + int btrfs_sync_log(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, struct btrfs_log_ctx *ctx); int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_recover_log_trees(struct btrfs_root *tree_root); int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct dentry *dentry); + struct btrfs_root *root, struct dentry *dentry, + struct btrfs_log_ctx *ctx); int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, -- cgit v1.2.3-70-g09d2 From d1433debe7f4346cf9fc0dafc71c3137d2a97bc4 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 20 Feb 2014 18:08:59 +0800 Subject: Btrfs: just wait or commit our own log sub-transaction We might commit the log sub-transaction which didn't contain the metadata we logged. It was because we didn't record the log transid and just select the current log sub-transaction to commit, but the right one might be committed by the other task already. Actually, we needn't do anything and it is safe that we go back directly in this case. This patch improves the log sync by the above idea. We record the transid of the log sub-transaction in which we log the metadata, and the transid of the log sub-transaction we have committed. If the committed transid is >= the transid we record when logging the metadata, we just go back. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 3 +++ fs/btrfs/disk-io.c | 2 ++ fs/btrfs/tree-log.c | 63 ++++++++++++++++++++++++++++++++++------------------- fs/btrfs/tree-log.h | 2 ++ 4 files changed, 47 insertions(+), 23 deletions(-) (limited to 'fs/btrfs/tree-log.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 906410719ac..b2c0336db69 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1723,6 +1723,9 @@ struct btrfs_root { atomic_t log_commit[2]; atomic_t log_batch; int log_transid; + /* No matter the commit succeeds or not*/ + int log_transid_committed; + /* Just be updated when the commit succeeds. */ int last_log_commit; pid_t log_start_pid; bool log_multiple_pids; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 44f52d280b7..dd52146035b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1209,6 +1209,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, atomic_set(&root->orphan_inodes, 0); atomic_set(&root->refs, 1); root->log_transid = 0; + root->log_transid_committed = -1; root->last_log_commit = 0; if (fs_info) extent_io_tree_init(&root->dirty_log_pages, @@ -1422,6 +1423,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, WARN_ON(root->log_root); root->log_root = log_root; root->log_transid = 0; + root->log_transid_committed = -1; root->last_log_commit = 0; return 0; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index da6da274dce..57d4ca7fd55 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -156,6 +156,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, if (ctx) { index = root->log_transid % 2; list_add_tail(&ctx->list, &root->log_ctxs[index]); + ctx->log_transid = root->log_transid; } mutex_unlock(&root->log_mutex); return 0; @@ -181,6 +182,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, if (ctx) { index = root->log_transid % 2; list_add_tail(&ctx->list, &root->log_ctxs[index]); + ctx->log_transid = root->log_transid; } out: mutex_unlock(&root->log_mutex); @@ -2387,13 +2389,13 @@ static void wait_log_commit(struct btrfs_trans_handle *trans, &wait, TASK_UNINTERRUPTIBLE); mutex_unlock(&root->log_mutex); - if (root->log_transid < transid + 2 && + if (root->log_transid_committed < transid && atomic_read(&root->log_commit[index])) schedule(); finish_wait(&root->log_commit_wait[index], &wait); mutex_lock(&root->log_mutex); - } while (root->log_transid < transid + 2 && + } while (root->log_transid_committed < transid && atomic_read(&root->log_commit[index])); } @@ -2470,18 +2472,24 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, struct blk_plug plug; mutex_lock(&root->log_mutex); - log_transid = root->log_transid; - index1 = root->log_transid % 2; + log_transid = ctx->log_transid; + if (root->log_transid_committed >= log_transid) { + mutex_unlock(&root->log_mutex); + return ctx->log_ret; + } + + index1 = log_transid % 2; if (atomic_read(&root->log_commit[index1])) { - wait_log_commit(trans, root, root->log_transid); + wait_log_commit(trans, root, log_transid); mutex_unlock(&root->log_mutex); return ctx->log_ret; } + ASSERT(log_transid == root->log_transid); atomic_set(&root->log_commit[index1], 1); /* wait for previous tree log sync to complete */ if (atomic_read(&root->log_commit[(index1 + 1) % 2])) - wait_log_commit(trans, root, root->log_transid - 1); + wait_log_commit(trans, root, log_transid - 1); while (1) { int batch = atomic_read(&root->log_batch); @@ -2535,9 +2543,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, */ mutex_unlock(&root->log_mutex); + btrfs_init_log_ctx(&root_log_ctx); + mutex_lock(&log_root_tree->log_mutex); atomic_inc(&log_root_tree->log_batch); atomic_inc(&log_root_tree->log_writers); + + index2 = log_root_tree->log_transid % 2; + list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); + root_log_ctx.log_transid = log_root_tree->log_transid; + mutex_unlock(&log_root_tree->log_mutex); ret = update_log_root(trans, log); @@ -2550,6 +2565,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } if (ret) { + if (!list_empty(&root_log_ctx.list)) + list_del_init(&root_log_ctx.list); + blk_finish_plug(&plug); if (ret != -ENOSPC) { btrfs_abort_transaction(trans, root, ret); @@ -2565,26 +2583,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, goto out; } - index2 = log_root_tree->log_transid % 2; - - btrfs_init_log_ctx(&root_log_ctx); - list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); + if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { + mutex_unlock(&log_root_tree->log_mutex); + ret = root_log_ctx.log_ret; + goto out; + } + index2 = root_log_ctx.log_transid % 2; if (atomic_read(&log_root_tree->log_commit[index2])) { blk_finish_plug(&plug); btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); wait_log_commit(trans, log_root_tree, - log_root_tree->log_transid); + root_log_ctx.log_transid); btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); ret = root_log_ctx.log_ret; goto out; } + ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid); atomic_set(&log_root_tree->log_commit[index2], 1); if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { wait_log_commit(trans, log_root_tree, - log_root_tree->log_transid - 1); + root_log_ctx.log_transid - 1); } wait_for_writer(trans, log_root_tree); @@ -2652,26 +2673,22 @@ out_wake_log_root: */ btrfs_remove_all_log_ctxs(log_root_tree, index2, ret); - /* - * It is dangerous if log_commit is changed before we set - * ->log_ret of log ctx. Because the readers may not get - * the return value. - */ - smp_wmb(); - + mutex_lock(&log_root_tree->log_mutex); + log_root_tree->log_transid_committed++; atomic_set(&log_root_tree->log_commit[index2], 0); - smp_mb(); + mutex_unlock(&log_root_tree->log_mutex); + if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) wake_up(&log_root_tree->log_commit_wait[index2]); out: /* See above. */ btrfs_remove_all_log_ctxs(root, index1, ret); - /* See above. */ - smp_wmb(); + mutex_lock(&root->log_mutex); + root->log_transid_committed++; atomic_set(&root->log_commit[index1], 0); + mutex_unlock(&root->log_mutex); - smp_mb(); if (waitqueue_active(&root->log_commit_wait[index1])) wake_up(&root->log_commit_wait[index1]); return ret; diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 59c1edb31d1..91b145fce33 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -24,12 +24,14 @@ struct btrfs_log_ctx { int log_ret; + int log_transid; struct list_head list; }; static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) { ctx->log_ret = 0; + ctx->log_transid = 0; INIT_LIST_HEAD(&ctx->list); } -- cgit v1.2.3-70-g09d2