From b31268ac793fd300da66b9c28bbf0a200339ab96 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 Mar 2011 17:02:00 -0400 Subject: FS: Use stable writes when not doing a bulk flush If we're only doing a single write, and there are no other unstable writes being queued up, we might want to just flip to using a stable write RPC call. Reviewed-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 47a3ad63e0d..4d686ee5324 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -179,8 +179,8 @@ static int wb_priority(struct writeback_control *wbc) if (wbc->for_reclaim) return FLUSH_HIGHPRI | FLUSH_STABLE; if (wbc->for_kupdate || wbc->for_background) - return FLUSH_LOWPRI; - return 0; + return FLUSH_LOWPRI | FLUSH_COND_STABLE; + return FLUSH_COND_STABLE; } /* @@ -863,7 +863,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->args.context = get_nfs_open_context(req->wb_context); data->args.lock_context = req->wb_lock_context; data->args.stable = NFS_UNSTABLE; - if (how & FLUSH_STABLE) { + if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { data->args.stable = NFS_DATA_SYNC; if (!nfs_need_commit(NFS_I(inode))) data->args.stable = NFS_FILE_SYNC; @@ -912,6 +912,12 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) nfs_list_remove_request(req); + if ((desc->pg_ioflags & FLUSH_COND_STABLE) && + (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || + desc->pg_count > wsize)) + desc->pg_ioflags &= ~FLUSH_COND_STABLE; + + nbytes = desc->pg_count; do { size_t len = min(nbytes, wsize); @@ -1002,6 +1008,10 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) if ((!lseg) && list_is_singular(&data->pages)) lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); + if ((desc->pg_ioflags & FLUSH_COND_STABLE) && + (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) + desc->pg_ioflags &= ~FLUSH_COND_STABLE; + /* Set up the argument struct */ ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); out: -- cgit v1.2.3-70-g09d2 From b8413f98f997bb3ed7327e6d7117e7e91ce010c3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 Mar 2011 15:37:01 -0400 Subject: NFS: Fix a hang/infinite loop in nfs_wb_page() When one of the two waits in nfs_commit_inode() is interrupted, it returns a non-negative value, which causes nfs_wb_page() to think that the operation was successful causing it to busy-loop rather than exiting. It also causes nfs_file_fsync() to incorrectly report the file as being successfully committed to disk. This patch fixes both problems by ensuring that we return an error if the attempts to wait fail. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/write.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4d686ee5324..55a8c367123 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1261,13 +1261,17 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) { + int ret; + if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) return 1; - if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, - NFS_INO_COMMIT, nfs_wait_bit_killable, - TASK_KILLABLE)) - return 1; - return 0; + if (!may_wait) + return 0; + ret = out_of_line_wait_on_bit_lock(&nfsi->flags, + NFS_INO_COMMIT, + nfs_wait_bit_killable, + TASK_KILLABLE); + return (ret < 0) ? ret : 1; } static void nfs_commit_clear_lock(struct nfs_inode *nfsi) @@ -1443,9 +1447,10 @@ int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); int may_wait = how & FLUSH_SYNC; - int res = 0; + int res; - if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) + res = nfs_commit_set_lock(NFS_I(inode), may_wait); + if (res <= 0) goto out_mark_dirty; spin_lock(&inode->i_lock); res = nfs_scan_commit(inode, &head, 0, 0); @@ -1454,12 +1459,14 @@ int nfs_commit_inode(struct inode *inode, int how) int error = nfs_commit_list(inode, &head, how); if (error < 0) return error; - if (may_wait) - wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, - nfs_wait_bit_killable, - TASK_KILLABLE); - else + if (!may_wait) goto out_mark_dirty; + error = wait_on_bit(&NFS_I(inode)->flags, + NFS_INO_COMMIT, + nfs_wait_bit_killable, + TASK_KILLABLE); + if (error < 0) + return error; } else nfs_commit_clear_lock(NFS_I(inode)); return res; -- cgit v1.2.3-70-g09d2 From 465d52437d5ce8d4eb9da0d3e3818b51cff163a1 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 23 Mar 2011 13:27:44 +0000 Subject: NFSv4.1: don't send COMMIT to ds for data sync writes Based on consensus reached in Feb 2011 interim IETF meeting regarding use of LAYOUTCOMMIT, it has been decided that a NFS_DATA_SYNC return from a WRITE to data server should not initiate a COMMIT. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 55a8c367123..92b4a6614fd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -474,7 +474,10 @@ nfs_clear_request_commit(struct nfs_page *req) static inline int nfs_write_need_commit(struct nfs_write_data *data) { - return data->verf.committed != NFS_FILE_SYNC; + if (data->verf.committed == NFS_DATA_SYNC) + return data->lseg == NULL; + else + return data->verf.committed != NFS_FILE_SYNC; } static inline -- cgit v1.2.3-70-g09d2 From 9ace33cdc6562699a780b4716d9d52c55dd2151a Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 23 Mar 2011 13:27:45 +0000 Subject: NFSv4.1: rearrange nfs_commit_rpcsetup Reorder nfs_commit_rpcsetup, preparing for a pnfs entry point. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 60 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 27 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 92b4a6614fd..bf672faa488 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1293,32 +1293,49 @@ static void nfs_commitdata_release(void *data) nfs_commit_free(wdata); } -/* - * Set up the argument/result storage required for the RPC call. - */ -static int nfs_commit_rpcsetup(struct list_head *head, - struct nfs_write_data *data, - int how) +static int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, + const struct rpc_call_ops *call_ops, + int how) { - struct nfs_page *first = nfs_list_entry(head->next); - struct inode *inode = first->wb_context->path.dentry->d_inode; - int priority = flush_task_priority(how); struct rpc_task *task; + int priority = flush_task_priority(how); struct rpc_message msg = { .rpc_argp = &data->args, .rpc_resp = &data->res, - .rpc_cred = first->wb_context->cred, + .rpc_cred = data->cred, }; struct rpc_task_setup task_setup_data = { .task = &data->task, - .rpc_client = NFS_CLIENT(inode), + .rpc_client = clnt, .rpc_message = &msg, - .callback_ops = &nfs_commit_ops, + .callback_ops = call_ops, .callback_data = data, .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, .priority = priority, }; + /* Set up the initial task struct. */ + NFS_PROTO(data->inode)->commit_setup(data, &msg); + + dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + if (how & FLUSH_SYNC) + rpc_wait_for_completion_task(task); + rpc_put_task(task); + return 0; +} + +/* + * Set up the argument/result storage required for the RPC call. + */ +static void nfs_init_commit(struct nfs_write_data *data, + struct list_head *head) +{ + struct nfs_page *first = nfs_list_entry(head->next); + struct inode *inode = first->wb_context->path.dentry->d_inode; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ @@ -1326,7 +1343,8 @@ static int nfs_commit_rpcsetup(struct list_head *head, list_splice_init(head, &data->pages); data->inode = inode; - data->cred = msg.rpc_cred; + data->cred = first->wb_context->cred; + data->mds_ops = &nfs_commit_ops; data->args.fh = NFS_FH(data->inode); /* Note: we always request a commit of the entire inode */ @@ -1337,19 +1355,6 @@ static int nfs_commit_rpcsetup(struct list_head *head, data->res.fattr = &data->fattr; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); - - /* Set up the initial task struct. */ - NFS_PROTO(inode)->commit_setup(data, &msg); - - dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - return PTR_ERR(task); - if (how & FLUSH_SYNC) - rpc_wait_for_completion_task(task); - rpc_put_task(task); - return 0; } /* @@ -1367,7 +1372,8 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) goto out_bad; /* Set up the argument struct */ - return nfs_commit_rpcsetup(head, data, how); + nfs_init_commit(data, head); + return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); out_bad: while (!list_empty(head)) { req = nfs_list_entry(head->next); -- cgit v1.2.3-70-g09d2 From 64bfeb49bd1c2351a8415f7fe6b25eef208872a5 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 23 Mar 2011 13:27:47 +0000 Subject: NFSv4.1: pull error handling out of nfs_commit_list Create a separate support function for later use by data server commit code. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index bf672faa488..dbc801810e7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1357,6 +1357,21 @@ static void nfs_init_commit(struct nfs_write_data *data, nfs_fattr_init(&data->fattr); } +static void nfs_retry_commit(struct list_head *page_list) +{ + struct nfs_page *req; + + while (!list_empty(page_list)) { + req = nfs_list_entry(page_list->next); + nfs_list_remove_request(req); + nfs_mark_request_commit(req); + dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + dec_bdi_stat(req->wb_page->mapping->backing_dev_info, + BDI_RECLAIMABLE); + nfs_clear_page_tag_locked(req); + } +} + /* * Commit dirty pages */ @@ -1364,7 +1379,6 @@ static int nfs_commit_list(struct inode *inode, struct list_head *head, int how) { struct nfs_write_data *data; - struct nfs_page *req; data = nfs_commitdata_alloc(); @@ -1375,15 +1389,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) nfs_init_commit(data, head); return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); out_bad: - while (!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_mark_request_commit(req); - dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(req->wb_page->mapping->backing_dev_info, - BDI_RECLAIMABLE); - nfs_clear_page_tag_locked(req); - } + nfs_retry_commit(head); nfs_commit_clear_lock(NFS_I(inode)); return -ENOMEM; } -- cgit v1.2.3-70-g09d2 From 5917ce8440ba0b3f2adee613b5f1258ac5efff02 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 23 Mar 2011 13:27:48 +0000 Subject: NFSv4.1: pull out code from nfs_commit_release Create a separate support function for later use by data server commit code. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dbc801810e7..f5f005e9db4 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1409,10 +1409,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) return; } -static void nfs_commit_release(void *calldata) +static void nfs_commit_release_pages(struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; - struct nfs_page *req; + struct nfs_page *req; int status = data->task.tk_status; while (!list_empty(&data->pages)) { @@ -1446,6 +1445,13 @@ static void nfs_commit_release(void *calldata) next: nfs_clear_page_tag_locked(req); } +} + +static void nfs_commit_release(void *calldata) +{ + struct nfs_write_data *data = calldata; + + nfs_commit_release_pages(data); nfs_commit_clear_lock(NFS_I(data->inode)); nfs_commitdata_release(calldata); } -- cgit v1.2.3-70-g09d2 From a861a1e1c398fe34701569fd8ac9225dfe0a9a7e Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 23 Mar 2011 13:27:51 +0000 Subject: NFSv4.1: add generic layer hooks for pnfs COMMIT We create three major hooks for the pnfs code. pnfs_mark_request_commit() is called during writeback_done from nfs_mark_request_commit, which gives the driver an opportunity to claim it wants control over commiting a particular req. pnfs_choose_commit_list() is called from nfs_scan_list to choose which list a given req should be added to, based on where we intend to send it for COMMIT. It is up to the driver to have preallocated list headers for each destination it may need. pnfs_commit_list() is how the driver actually takes control, it is used instead of nfs_commit_list(). In order to pass information between the above functions, we create a union in nfs_page to hold a lseg (which is possible because the req is not on any list while in transition), and add some flags to indicate if we need to use the pnfs code. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 5 ++-- fs/nfs/pnfs.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/write.c | 41 +++++++++++++++++---------- include/linux/nfs_fs.h | 1 + include/linux/nfs_page.h | 6 +++- 5 files changed, 108 insertions(+), 18 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index fd85618149a..87a593c2b05 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -398,6 +398,7 @@ int nfs_scan_list(struct nfs_inode *nfsi, pgoff_t idx_end; int found, i; int res; + struct list_head *list; res = 0; if (npages == 0) @@ -418,10 +419,10 @@ int nfs_scan_list(struct nfs_inode *nfsi, idx_start = req->wb_index + 1; if (nfs_set_page_tag_locked(req)) { kref_get(&req->wb_kref); - nfs_list_remove_request(req); radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, tag); - nfs_list_add_request(req, dst); + list = pnfs_choose_commit_list(req, dst); + nfs_list_add_request(req, list); res++; if (res == INT_MAX) goto out; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 6380b9405bc..5370f1b9aa4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -74,6 +74,13 @@ struct pnfs_layoutdriver_type { /* test for nfs page cache coalescing */ int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + /* Returns true if layoutdriver wants to divert this request to + * driver's commit routine. + */ + bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg); + struct list_head * (*choose_commit_list) (struct nfs_page *req); + int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); + /* * Return PNFS_ATTEMPTED to indicate the layout code has attempted * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS @@ -169,6 +176,51 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss) return nfss->pnfs_curr_ld != NULL; } +static inline void +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +{ + if (lseg) { + struct pnfs_layoutdriver_type *ld; + + ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld; + if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) { + set_bit(PG_PNFS_COMMIT, &req->wb_flags); + req->wb_commit_lseg = get_lseg(lseg); + } + } +} + +static inline int +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) +{ + if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags)) + return PNFS_NOT_ATTEMPTED; + return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); +} + +static inline struct list_head * +pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) +{ + struct list_head *rv; + + if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) { + struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode; + + set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); + rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req); + /* matched by ref taken when PG_PNFS_COMMIT is set */ + put_lseg(req->wb_commit_lseg); + } else + rv = mds; + return rv; +} + +static inline void pnfs_clear_request_commit(struct nfs_page *req) +{ + if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) + put_lseg(req->wb_commit_lseg); +} + #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -252,6 +304,27 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino) pgio->pg_test = NULL; } +static inline void +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +{ +} + +static inline int +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) +{ + return PNFS_NOT_ATTEMPTED; +} + +static inline struct list_head * +pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) +{ + return mds; +} + +static inline void pnfs_clear_request_commit(struct nfs_page *req) +{ +} + #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f5f005e9db4..6927a18b689 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -441,7 +441,7 @@ nfs_mark_request_dirty(struct nfs_page *req) * Add a request to the inode's commit list. */ static void -nfs_mark_request_commit(struct nfs_page *req) +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); @@ -453,6 +453,7 @@ nfs_mark_request_commit(struct nfs_page *req) NFS_PAGE_TAG_COMMIT); nfsi->ncommit++; spin_unlock(&inode->i_lock); + pnfs_mark_request_commit(req, lseg); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); @@ -481,10 +482,11 @@ int nfs_write_need_commit(struct nfs_write_data *data) } static inline -int nfs_reschedule_unstable_write(struct nfs_page *req) +int nfs_reschedule_unstable_write(struct nfs_page *req, + struct nfs_write_data *data) { if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { - nfs_mark_request_commit(req); + nfs_mark_request_commit(req, data->lseg); return 1; } if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { @@ -495,7 +497,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req) } #else static inline void -nfs_mark_request_commit(struct nfs_page *req) +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { } @@ -512,7 +514,8 @@ int nfs_write_need_commit(struct nfs_write_data *data) } static inline -int nfs_reschedule_unstable_write(struct nfs_page *req) +int nfs_reschedule_unstable_write(struct nfs_page *req, + struct nfs_write_data *data) { return 0; } @@ -615,9 +618,11 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, } if (nfs_clear_request_commit(req) && - radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) + radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) { NFS_I(inode)->ncommit--; + pnfs_clear_request_commit(req); + } /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { @@ -765,11 +770,12 @@ int nfs_updatepage(struct file *file, struct page *page, return status; } -static void nfs_writepage_release(struct nfs_page *req) +static void nfs_writepage_release(struct nfs_page *req, + struct nfs_write_data *data) { struct page *page = req->wb_page; - if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) + if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) nfs_inode_remove_request(req); nfs_clear_page_tag_locked(req); nfs_end_page_writeback(page); @@ -1087,7 +1093,7 @@ static void nfs_writeback_release_partial(void *calldata) out: if (atomic_dec_and_test(&req->wb_complete)) - nfs_writepage_release(req); + nfs_writepage_release(req, data); nfs_writedata_release(calldata); } @@ -1154,7 +1160,7 @@ static void nfs_writeback_release_full(void *calldata) if (nfs_write_need_commit(data)) { memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - nfs_mark_request_commit(req); + nfs_mark_request_commit(req, data->lseg); dprintk(" marked for commit\n"); goto next; } @@ -1357,14 +1363,15 @@ static void nfs_init_commit(struct nfs_write_data *data, nfs_fattr_init(&data->fattr); } -static void nfs_retry_commit(struct list_head *page_list) +static void nfs_retry_commit(struct list_head *page_list, + struct pnfs_layout_segment *lseg) { struct nfs_page *req; while (!list_empty(page_list)) { req = nfs_list_entry(page_list->next); nfs_list_remove_request(req); - nfs_mark_request_commit(req); + nfs_mark_request_commit(req, lseg); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dec_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); @@ -1389,7 +1396,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) nfs_init_commit(data, head); return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); out_bad: - nfs_retry_commit(head); + nfs_retry_commit(head, NULL); nfs_commit_clear_lock(NFS_I(inode)); return -ENOMEM; } @@ -1477,7 +1484,11 @@ int nfs_commit_inode(struct inode *inode, int how) res = nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&inode->i_lock); if (res) { - int error = nfs_commit_list(inode, &head, how); + int error; + + error = pnfs_commit_list(inode, &head, how); + if (error == PNFS_NOT_ATTEMPTED) + error = nfs_commit_list(inode, &head, how); if (error < 0) return error; if (!may_wait) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 4179c368844..eddda6ce7c4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -226,6 +226,7 @@ struct nfs_inode { #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ +#define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 92d54c81f51..8023e4e2513 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -33,11 +33,15 @@ enum { PG_CLEAN, PG_NEED_COMMIT, PG_NEED_RESCHED, + PG_PNFS_COMMIT, }; struct nfs_inode; struct nfs_page { - struct list_head wb_list; /* Defines state of page: */ + union { + struct list_head wb_list; /* Defines state of page: */ + struct pnfs_layout_segment *wb_commit_lseg; /* Used when PG_PNFS_COMMIT set */ + }; struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ struct nfs_lock_context *wb_lock_context; /* lock context info */ -- cgit v1.2.3-70-g09d2 From 988b6dceb0ae6d642587c8594529b94f6be0c5ea Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 23 Mar 2011 13:27:52 +0000 Subject: NFSv4.1: remove GETATTR from ds commits Any COMMIT compound directed to a data server needs to have the GETATTR calls suppressed. We here, make sure the field we are testing (data->lseg) is set and refcounted correctly. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++++-- fs/nfs/nfs4xdr.c | 8 +++++--- fs/nfs/write.c | 7 +++++-- 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bcc29bdc9be..5d61cccc8d4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3217,8 +3217,12 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) { struct nfs_server *server = NFS_SERVER(data->inode); - - data->args.bitmask = server->cache_consistency_bitmask; + + if (data->lseg) { + data->args.bitmask = NULL; + data->res.fattr = NULL; + } else + data->args.bitmask = server->cache_consistency_bitmask; if (!data->write_done_cb) data->write_done_cb = nfs4_commit_done_cb; data->res.server = server; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0cf560f7788..07cdf925c52 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2294,7 +2294,8 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_commit(xdr, args, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); + if (args->bitmask) + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -5723,8 +5724,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_commit(xdr, res); if (status) goto out; - decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + if (res->fattr) + decode_getfattr(xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6927a18b689..cae5d160d83 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1295,6 +1295,7 @@ static void nfs_commitdata_release(void *data) { struct nfs_write_data *wdata = data; + put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); nfs_commit_free(wdata); } @@ -1338,7 +1339,8 @@ static int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *cln * Set up the argument/result storage required for the RPC call. */ static void nfs_init_commit(struct nfs_write_data *data, - struct list_head *head) + struct list_head *head, + struct pnfs_layout_segment *lseg) { struct nfs_page *first = nfs_list_entry(head->next); struct inode *inode = first->wb_context->path.dentry->d_inode; @@ -1350,6 +1352,7 @@ static void nfs_init_commit(struct nfs_write_data *data, data->inode = inode; data->cred = first->wb_context->cred; + data->lseg = lseg; /* reference transferred */ data->mds_ops = &nfs_commit_ops; data->args.fh = NFS_FH(data->inode); @@ -1393,7 +1396,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) goto out_bad; /* Set up the argument struct */ - nfs_init_commit(data, head); + nfs_init_commit(data, head, NULL); return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); out_bad: nfs_retry_commit(head, NULL); -- cgit v1.2.3-70-g09d2 From e0c2b3801828aadb65dec9f67f7c6b7a675ad007 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 23 Mar 2011 13:27:53 +0000 Subject: NFSv4.1: filelayout driver specific code for COMMIT Implement all the hooks created in the previous patches. This requires exporting quite a few functions and adding a few structure fields. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 + fs/nfs/internal.h | 14 +++ fs/nfs/nfs4filelayout.c | 235 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/pnfs.c | 1 + fs/nfs/write.c | 21 +++-- include/linux/nfs_fs.h | 1 + include/linux/nfs_xdr.h | 1 + 7 files changed, 267 insertions(+), 7 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 477a2e512b3..229e586b1a2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1470,6 +1470,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) nfsi->delegation_state = 0; init_rwsem(&nfsi->rwsem); nfsi->layout = NULL; + atomic_set(&nfsi->commits_outstanding, 0); #endif } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d1ddc23c404..70870506221 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -276,11 +276,25 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, extern void nfs_read_prepare(struct rpc_task *task, void *calldata); /* write.c */ +extern void nfs_commit_free(struct nfs_write_data *p); extern int nfs_initiate_write(struct nfs_write_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops, int how); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); +extern int nfs_initiate_commit(struct nfs_write_data *data, + struct rpc_clnt *clnt, + const struct rpc_call_ops *call_ops, + int how); +extern void nfs_init_commit(struct nfs_write_data *data, + struct list_head *head, + struct pnfs_layout_segment *lseg); +void nfs_retry_commit(struct list_head *page_list, + struct pnfs_layout_segment *lseg); +void nfs_commit_clear_lock(struct nfs_inode *nfsi); +void nfs_commitdata_release(void *data); +void nfs_commit_release_pages(struct nfs_write_data *data); + #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, struct page *, struct page *); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 03ff80c67c6..97e75a22af7 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -213,6 +213,37 @@ static int filelayout_write_done_cb(struct rpc_task *task, return 0; } +/* Fake up some data that will cause nfs_commit_release to retry the writes. */ +static void prepare_to_resend_writes(struct nfs_write_data *data) +{ + struct nfs_page *first = nfs_list_entry(data->pages.next); + + data->task.tk_status = 0; + memcpy(data->verf.verifier, first->wb_verf.verifier, + sizeof(first->wb_verf.verifier)); + data->verf.verifier[0]++; /* ensure verifier mismatch */ +} + +static int filelayout_commit_done_cb(struct rpc_task *task, + struct nfs_write_data *data) +{ + int reset = 0; + + if (filelayout_async_handle_error(task, data->args.context->state, + data->ds_clp, &reset) == -EAGAIN) { + dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", + __func__, data->ds_clp, data->ds_clp->cl_session); + if (reset) { + prepare_to_resend_writes(data); + filelayout_set_lo_fail(data->lseg); + } else + nfs_restart_rpc(task, data->ds_clp); + return -EAGAIN; + } + + return 0; +} + static void filelayout_write_prepare(struct rpc_task *task, void *data) { struct nfs_write_data *wdata = (struct nfs_write_data *)data; @@ -240,6 +271,16 @@ static void filelayout_write_release(void *data) wdata->mds_ops->rpc_release(data); } +static void filelayout_commit_release(void *data) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)data; + + nfs_commit_release_pages(wdata); + if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) + nfs_commit_clear_lock(NFS_I(wdata->inode)); + nfs_commitdata_release(wdata); +} + struct rpc_call_ops filelayout_read_call_ops = { .rpc_call_prepare = filelayout_read_prepare, .rpc_call_done = filelayout_read_call_done, @@ -252,6 +293,12 @@ struct rpc_call_ops filelayout_write_call_ops = { .rpc_release = filelayout_write_release, }; +struct rpc_call_ops filelayout_commit_call_ops = { + .rpc_call_prepare = filelayout_write_prepare, + .rpc_call_done = filelayout_write_call_done, + .rpc_release = filelayout_commit_release, +}; + static enum pnfs_try_status filelayout_read_pagelist(struct nfs_read_data *data) { @@ -574,6 +621,191 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return (p_stripe == r_stripe); } +static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) +{ + return !FILELAYOUT_LSEG(lseg)->commit_through_mds; +} + +static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) +{ + if (fl->stripe_type == STRIPE_SPARSE) + return nfs4_fl_calc_ds_index(&fl->generic_hdr, j); + else + return j; +} + +struct list_head *filelayout_choose_commit_list(struct nfs_page *req) +{ + struct pnfs_layout_segment *lseg = req->wb_commit_lseg; + struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); + u32 i, j; + struct list_head *list; + + /* Note that we are calling nfs4_fl_calc_j_index on each page + * that ends up being committed to a data server. An attractive + * alternative is to add a field to nfs_write_data and nfs_page + * to store the value calculated in filelayout_write_pagelist + * and just use that here. + */ + j = nfs4_fl_calc_j_index(lseg, + (loff_t)req->wb_index << PAGE_CACHE_SHIFT); + i = select_bucket_index(fl, j); + list = &fl->commit_buckets[i]; + if (list_empty(list)) { + /* Non-empty buckets hold a reference on the lseg */ + get_lseg(lseg); + } + return list; +} + +static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) +{ + struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); + + if (flseg->stripe_type == STRIPE_SPARSE) + return i; + else + return nfs4_fl_calc_ds_index(lseg, i); +} + +static struct nfs_fh * +select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) +{ + struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); + + if (flseg->stripe_type == STRIPE_SPARSE) { + if (flseg->num_fh == 1) + i = 0; + else if (flseg->num_fh == 0) + /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ + return NULL; + } + return flseg->fh_array[i]; +} + +static int filelayout_initiate_commit(struct nfs_write_data *data, int how) +{ + struct pnfs_layout_segment *lseg = data->lseg; + struct nfs4_pnfs_ds *ds; + u32 idx; + struct nfs_fh *fh; + + idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); + ds = nfs4_fl_prepare_ds(lseg, idx); + if (!ds) { + printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); + set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); + set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); + prepare_to_resend_writes(data); + data->mds_ops->rpc_release(data); + return -EAGAIN; + } + dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); + data->write_done_cb = filelayout_commit_done_cb; + data->ds_clp = ds->ds_clp; + fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); + if (fh) + data->args.fh = fh; + return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient, + &filelayout_commit_call_ops, how); +} + +/* + * This is only useful while we are using whole file layouts. + */ +static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) +{ + struct pnfs_layout_segment *lseg, *rv = NULL; + + spin_lock(&inode->i_lock); + list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) + if (lseg->pls_range.iomode == IOMODE_RW) + rv = get_lseg(lseg); + spin_unlock(&inode->i_lock); + return rv; +} + +static int alloc_ds_commits(struct inode *inode, struct list_head *list) +{ + struct pnfs_layout_segment *lseg; + struct nfs4_filelayout_segment *fl; + struct nfs_write_data *data; + int i, j; + + /* Won't need this when non-whole file layout segments are supported + * instead we will use a pnfs_layout_hdr structure */ + lseg = find_only_write_lseg(inode); + if (!lseg) + return 0; + fl = FILELAYOUT_LSEG(lseg); + for (i = 0; i < fl->number_of_buckets; i++) { + if (list_empty(&fl->commit_buckets[i])) + continue; + data = nfs_commitdata_alloc(); + if (!data) + goto out_bad; + data->ds_commit_index = i; + data->lseg = lseg; + list_add(&data->pages, list); + } + put_lseg(lseg); + return 0; + +out_bad: + for (j = i; j < fl->number_of_buckets; j++) { + if (list_empty(&fl->commit_buckets[i])) + continue; + nfs_retry_commit(&fl->commit_buckets[i], lseg); + put_lseg(lseg); /* associated with emptying bucket */ + } + put_lseg(lseg); + /* Caller will clean up entries put on list */ + return -ENOMEM; +} + +/* This follows nfs_commit_list pretty closely */ +static int +filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, + int how) +{ + struct nfs_write_data *data, *tmp; + LIST_HEAD(list); + + if (!list_empty(mds_pages)) { + data = nfs_commitdata_alloc(); + if (!data) + goto out_bad; + data->lseg = NULL; + list_add(&data->pages, &list); + } + + if (alloc_ds_commits(inode, &list)) + goto out_bad; + + list_for_each_entry_safe(data, tmp, &list, pages) { + list_del_init(&data->pages); + atomic_inc(&NFS_I(inode)->commits_outstanding); + if (!data->lseg) { + nfs_init_commit(data, mds_pages, NULL); + nfs_initiate_commit(data, NFS_CLIENT(inode), + data->mds_ops, how); + } else { + nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg); + filelayout_initiate_commit(data, how); + } + } + return 0; + out_bad: + list_for_each_entry_safe(data, tmp, &list, pages) { + nfs_retry_commit(&data->pages, data->lseg); + list_del_init(&data->pages); + nfs_commit_free(data); + } + nfs_retry_commit(mds_pages, NULL); + nfs_commit_clear_lock(NFS_I(inode)); + return -ENOMEM; +} + static struct pnfs_layoutdriver_type filelayout_type = { .id = LAYOUT_NFSV4_1_FILES, .name = "LAYOUT_NFSV4_1_FILES", @@ -581,6 +813,9 @@ static struct pnfs_layoutdriver_type filelayout_type = { .alloc_lseg = filelayout_alloc_lseg, .free_lseg = filelayout_free_lseg, .pg_test = filelayout_pg_test, + .mark_pnfs_commit = filelayout_mark_pnfs_commit, + .choose_commit_list = filelayout_choose_commit_list, + .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, .write_pagelist = filelayout_write_pagelist, }; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f38813a0a29..c67565965f2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -259,6 +259,7 @@ put_lseg(struct pnfs_layout_segment *lseg) pnfs_free_lseg_list(&free_me); } } +EXPORT_SYMBOL_GPL(put_lseg); static bool should_free_lseg(u32 lseg_iomode, u32 recall_iomode) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index cae5d160d83..e7aeda0663c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -59,6 +59,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void) } return p; } +EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); void nfs_commit_free(struct nfs_write_data *p) { @@ -66,6 +67,7 @@ void nfs_commit_free(struct nfs_write_data *p) kfree(p->pagevec); mempool_free(p, nfs_commit_mempool); } +EXPORT_SYMBOL_GPL(nfs_commit_free); struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) { @@ -1283,15 +1285,15 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) return (ret < 0) ? ret : 1; } -static void nfs_commit_clear_lock(struct nfs_inode *nfsi) +void nfs_commit_clear_lock(struct nfs_inode *nfsi) { clear_bit(NFS_INO_COMMIT, &nfsi->flags); smp_mb__after_clear_bit(); wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); } +EXPORT_SYMBOL_GPL(nfs_commit_clear_lock); - -static void nfs_commitdata_release(void *data) +void nfs_commitdata_release(void *data) { struct nfs_write_data *wdata = data; @@ -1299,8 +1301,9 @@ static void nfs_commitdata_release(void *data) put_nfs_open_context(wdata->args.context); nfs_commit_free(wdata); } +EXPORT_SYMBOL_GPL(nfs_commitdata_release); -static int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, +int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops, int how) { @@ -1334,11 +1337,12 @@ static int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *cln rpc_put_task(task); return 0; } +EXPORT_SYMBOL_GPL(nfs_initiate_commit); /* * Set up the argument/result storage required for the RPC call. */ -static void nfs_init_commit(struct nfs_write_data *data, +void nfs_init_commit(struct nfs_write_data *data, struct list_head *head, struct pnfs_layout_segment *lseg) { @@ -1365,8 +1369,9 @@ static void nfs_init_commit(struct nfs_write_data *data, data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); } +EXPORT_SYMBOL_GPL(nfs_init_commit); -static void nfs_retry_commit(struct list_head *page_list, +void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg) { struct nfs_page *req; @@ -1381,6 +1386,7 @@ static void nfs_retry_commit(struct list_head *page_list, nfs_clear_page_tag_locked(req); } } +EXPORT_SYMBOL_GPL(nfs_retry_commit); /* * Commit dirty pages @@ -1419,7 +1425,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) return; } -static void nfs_commit_release_pages(struct nfs_write_data *data) +void nfs_commit_release_pages(struct nfs_write_data *data) { struct nfs_page *req; int status = data->task.tk_status; @@ -1456,6 +1462,7 @@ static void nfs_commit_release_pages(struct nfs_write_data *data) nfs_clear_page_tag_locked(req); } } +EXPORT_SYMBOL_GPL(nfs_commit_release_pages); static void nfs_commit_release(void *calldata) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index eddda6ce7c4..807e07c86b2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -198,6 +198,7 @@ struct nfs_inode { /* pNFS layout information */ struct pnfs_layout_hdr *layout; + atomic_t commits_outstanding; #endif /* CONFIG_NFS_V4*/ #ifdef CONFIG_NFS_FSCACHE struct fscache_cookie *fscache; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2c2c67d2eb4..ac0c0e51786 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1040,6 +1040,7 @@ struct nfs_write_data { struct nfs_writeres res; /* result struct */ struct pnfs_layout_segment *lseg; struct nfs_client *ds_clp; /* pNFS data server */ + int ds_commit_index; const struct rpc_call_ops *mds_ops; int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); #ifdef CONFIG_NFS_V4 -- cgit v1.2.3-70-g09d2 From 863a3c6c686d5773f7192a4818769e15db12ce08 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 23 Mar 2011 13:27:54 +0000 Subject: NFSv4.1: layoutcommit The filelayout driver sends LAYOUTCOMMIT only when COMMIT goes to the data server (as opposed to the MDS) and the data server WRITE is not NFS_FILE_SYNC. Only whole file layout support means that there is only one IOMODE_RW layout segment. Signed-off-by: Andy Adamson Signed-off-by: Alexandros Batsakis Signed-off-by: Boaz Harrosh Signed-off-by: Dean Hildebrand Signed-off-by: Fred Isaman Signed-off-by: Mingyang Guo Signed-off-by: Tao Guo Signed-off-by: Zhang Jingwang Tested-by: Boaz Harrosh Signed-off-by: Benny Halevy Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 3 ++ fs/nfs/nfs4_fs.h | 2 + fs/nfs/nfs4filelayout.c | 18 +++++++ fs/nfs/nfs4proc.c | 94 +++++++++++++++++++++++++++++++++++ fs/nfs/nfs4xdr.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/pnfs.c | 94 +++++++++++++++++++++++++++++++++++ fs/nfs/pnfs.h | 9 +++- fs/nfs/write.c | 15 +++++- include/linux/nfs4.h | 1 + include/linux/nfs_fs.h | 1 + include/linux/nfs_xdr.h | 23 +++++++++ 11 files changed, 387 insertions(+), 2 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d85a534b15c..85cb95de5df 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -326,6 +326,9 @@ nfs_file_fsync(struct file *file, int datasync) ret = xchg(&ctx->error, 0); if (!ret && status < 0) ret = status; + if (!ret && !datasync) + /* application has asked for meta-data sync */ + ret = pnfs_layoutcommit_inode(inode, 1); return ret; } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c64be1cff08..1e612d159b7 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -262,6 +262,8 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *); extern int nfs4_init_session(struct nfs_server *server); extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); +extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, + int sync); static inline bool is_ds_only_client(struct nfs_client *clp) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 97e75a22af7..fc1a0e9c127 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -153,6 +153,23 @@ static int filelayout_read_done_cb(struct rpc_task *task, return 0; } +/* + * We reference the rpc_cred of the first WRITE that triggers the need for + * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. + * rfc5661 is not clear about which credential should be used. + */ +static void +filelayout_set_layoutcommit(struct nfs_write_data *wdata) +{ + if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds || + wdata->res.verf->committed == NFS_FILE_SYNC) + return; + + pnfs_set_layoutcommit(wdata); + dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, + (unsigned long) wdata->lseg->pls_end_pos); +} + /* * Call ops for the async read/write cases * In the case of dense layouts, the offset needs to be reset to its @@ -210,6 +227,7 @@ static int filelayout_write_done_cb(struct rpc_task *task, return -EAGAIN; } + filelayout_set_layoutcommit(data); return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5d61cccc8d4..6f2f40239d1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5616,6 +5616,100 @@ int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) } EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); +static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_layoutcommit_data *data = calldata; + struct nfs_server *server = NFS_SERVER(data->args.inode); + + if (nfs4_setup_sequence(server, &data->args.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); +} + +static void +nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_layoutcommit_data *data = calldata; + struct nfs_server *server = NFS_SERVER(data->args.inode); + + if (!nfs4_sequence_done(task, &data->res.seq_res)) + return; + + switch (task->tk_status) { /* Just ignore these failures */ + case NFS4ERR_DELEG_REVOKED: /* layout was recalled */ + case NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */ + case NFS4ERR_BADLAYOUT: /* no layout */ + case NFS4ERR_GRACE: /* loca_recalim always false */ + task->tk_status = 0; + } + + if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { + nfs_restart_rpc(task, server->nfs_client); + return; + } + + if (task->tk_status == 0) + nfs_post_op_update_inode_force_wcc(data->args.inode, + data->res.fattr); +} + +static void nfs4_layoutcommit_release(void *calldata) +{ + struct nfs4_layoutcommit_data *data = calldata; + + /* Matched by references in pnfs_set_layoutcommit */ + put_lseg(data->lseg); + put_rpccred(data->cred); + kfree(data); +} + +static const struct rpc_call_ops nfs4_layoutcommit_ops = { + .rpc_call_prepare = nfs4_layoutcommit_prepare, + .rpc_call_done = nfs4_layoutcommit_done, + .rpc_release = nfs4_layoutcommit_release, +}; + +int +nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, int sync) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->cred, + }; + struct rpc_task_setup task_setup_data = { + .task = &data->task, + .rpc_client = NFS_CLIENT(data->args.inode), + .rpc_message = &msg, + .callback_ops = &nfs4_layoutcommit_ops, + .callback_data = data, + .flags = RPC_TASK_ASYNC, + }; + struct rpc_task *task; + int status = 0; + + dprintk("NFS: %4d initiating layoutcommit call. sync %d " + "lbw: %llu inode %lu\n", + data->task.tk_pid, sync, + data->args.lastbytewritten, + data->args.inode->i_ino); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + if (!sync) + goto out; + status = nfs4_wait_for_completion_rpc_task(task); + if (status != 0) + goto out; + status = task->tk_status; +out: + dprintk("%s: status %d\n", __func__, status); + rpc_put_task(task); + return status; +} #endif /* CONFIG_NFS_V4_1 */ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 07cdf925c52..207d399c8de 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -324,6 +324,18 @@ static int nfs4_stat_to_errno(int); #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ decode_stateid_maxsz + \ XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) +#define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \ + 2 /* offset */ + \ + 2 /* length */ + \ + 1 /* reclaim */ + \ + encode_stateid_maxsz + \ + 1 /* new offset (true) */ + \ + 2 /* last byte written */ + \ + 1 /* nt_timechanged (false) */ + \ + 1 /* layoutupdate4 layout type */ + \ + 1 /* NULL filelayout layoutupdate4 payload */) +#define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) + #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -727,6 +739,17 @@ static int nfs4_stat_to_errno(int); decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_layoutget_maxsz) +#define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz +\ + encode_putfh_maxsz + \ + encode_layoutcommit_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_layoutcommit_maxsz + \ + decode_getattr_maxsz) + const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1816,6 +1839,34 @@ encode_layoutget(struct xdr_stream *xdr, hdr->nops++; hdr->replen += decode_layoutget_maxsz; } + +static int +encode_layoutcommit(struct xdr_stream *xdr, + const struct nfs4_layoutcommit_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, + NFS_SERVER(args->inode)->pnfs_curr_ld->id); + + p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); + /* Only whole file layouts */ + p = xdr_encode_hyper(p, 0); /* offset */ + p = xdr_encode_hyper(p, NFS4_MAX_UINT64); /* length */ + *p++ = cpu_to_be32(0); /* reclaim */ + p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(1); /* newoffset = TRUE */ + p = xdr_encode_hyper(p, args->lastbytewritten); + *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ + *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ + *p++ = cpu_to_be32(0); /* no file layout payload */ + + hdr->nops++; + hdr->replen += decode_layoutcommit_maxsz; + return 0; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2607,6 +2658,26 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, encode_layoutget(xdr, args, &hdr); encode_nops(&hdr); } + +/* + * Encode LAYOUTCOMMIT request + */ +static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_layoutcommit_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, NFS_FH(args->inode), &hdr); + encode_layoutcommit(xdr, args, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} #endif /* CONFIG_NFS_V4_1 */ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) @@ -5007,6 +5078,35 @@ out_overflow: print_overflow_msg(__func__, xdr); return -EIO; } + +static int decode_layoutcommit(struct xdr_stream *xdr, + struct rpc_rqst *req, + struct nfs4_layoutcommit_res *res) +{ + __be32 *p; + __u32 sizechanged; + int status; + + status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + sizechanged = be32_to_cpup(p); + + if (sizechanged) { + /* throw away new size */ + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + } + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -6068,6 +6168,34 @@ static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode LAYOUTCOMMIT response + */ +static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_layoutcommit_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_layoutcommit(xdr, rqstp, res); + if (status) + goto out; + decode_getfattr(xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} #endif /* CONFIG_NFS_V4_1 */ /** @@ -6269,6 +6397,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), PROC(LAYOUTGET, enc_layoutget, dec_layoutget), + PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c67565965f2..2a08ca0dddc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -946,3 +946,97 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); return trypnfs; } + +/* + * Currently there is only one (whole file) write lseg. + */ +static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode) +{ + struct pnfs_layout_segment *lseg, *rv = NULL; + + list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) + if (lseg->pls_range.iomode == IOMODE_RW) + rv = lseg; + return rv; +} + +void +pnfs_set_layoutcommit(struct nfs_write_data *wdata) +{ + struct nfs_inode *nfsi = NFS_I(wdata->inode); + loff_t end_pos = wdata->args.offset + wdata->res.count; + + spin_lock(&nfsi->vfs_inode.i_lock); + if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { + /* references matched in nfs4_layoutcommit_release */ + get_lseg(wdata->lseg); + wdata->lseg->pls_lc_cred = + get_rpccred(wdata->args.context->state->owner->so_cred); + mark_inode_dirty_sync(wdata->inode); + dprintk("%s: Set layoutcommit for inode %lu ", + __func__, wdata->inode->i_ino); + } + if (end_pos > wdata->lseg->pls_end_pos) + wdata->lseg->pls_end_pos = end_pos; + spin_unlock(&nfsi->vfs_inode.i_lock); +} +EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); + +int +pnfs_layoutcommit_inode(struct inode *inode, int sync) +{ + struct nfs4_layoutcommit_data *data; + struct nfs_inode *nfsi = NFS_I(inode); + struct pnfs_layout_segment *lseg; + struct rpc_cred *cred; + loff_t end_pos; + int status = 0; + + dprintk("--> %s inode %lu\n", __func__, inode->i_ino); + + /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ + data = kzalloc(sizeof(*data), GFP_NOFS); + spin_lock(&inode->i_lock); + + if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { + spin_unlock(&inode->i_lock); + kfree(data); + goto out; + } + /* + * Currently only one (whole file) write lseg which is referenced + * in pnfs_set_layoutcommit and will be found. + */ + lseg = pnfs_list_write_lseg(inode); + + end_pos = lseg->pls_end_pos; + cred = lseg->pls_lc_cred; + lseg->pls_end_pos = 0; + lseg->pls_lc_cred = NULL; + + if (!data) { + put_lseg(lseg); + spin_unlock(&inode->i_lock); + put_rpccred(cred); + status = -ENOMEM; + goto out; + } else { + memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, + sizeof(nfsi->layout->plh_stateid.data)); + } + spin_unlock(&inode->i_lock); + + data->args.inode = inode; + data->lseg = lseg; + data->cred = cred; + nfs_fattr_init(&data->fattr); + data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; + data->res.fattr = &data->fattr; + data->args.lastbytewritten = end_pos - 1; + data->res.server = NFS_SERVER(inode); + + status = nfs4_proc_layoutcommit(data, sync); +out: + dprintk("<-- %s status %d\n", __func__, status); + return status; +} diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 5370f1b9aa4..0806c77862b 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -43,6 +43,8 @@ struct pnfs_layout_segment { atomic_t pls_refcount; unsigned long pls_flags; struct pnfs_layout_hdr *pls_layout; + struct rpc_cred *pls_lc_cred; /* LAYOUTCOMMIT credential */ + loff_t pls_end_pos; /* LAYOUTCOMMIT write end */ }; enum pnfs_try_status { @@ -152,7 +154,8 @@ bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier); - +void pnfs_set_layoutcommit(struct nfs_write_data *wdata); +int pnfs_layoutcommit_inode(struct inode *inode, int sync); static inline int lo_fail_bit(u32 iomode) { @@ -325,6 +328,10 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req) { } +static inline int pnfs_layoutcommit_inode(struct inode *inode, int sync) +{ + return 0; +} #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e7aeda0663c..a03c11f9081 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1562,7 +1562,20 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { - return nfs_commit_unstable_pages(inode, wbc); + int ret; + + ret = nfs_commit_unstable_pages(inode, wbc); + if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { + int status, sync = wbc->sync_mode; + + if (wbc->nonblocking || wbc->for_background) + sync = 0; + + status = pnfs_layoutcommit_inode(inode, sync); + if (status < 0) + return status; + } + return ret; } /* diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 134716e5e35..bf22cfaaeee 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -560,6 +560,7 @@ enum { NFSPROC4_CLNT_RECLAIM_COMPLETE, NFSPROC4_CLNT_LAYOUTGET, NFSPROC4_CLNT_GETDEVICEINFO, + NFSPROC4_CLNT_LAYOUTCOMMIT, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 807e07c86b2..1b93b9c60e5 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -228,6 +228,7 @@ struct nfs_inode { #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ #define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */ +#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ac0c0e51786..84f3585c572 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -236,6 +236,29 @@ struct nfs4_getdeviceinfo_res { struct nfs4_sequence_res seq_res; }; +struct nfs4_layoutcommit_args { + nfs4_stateid stateid; + __u64 lastbytewritten; + struct inode *inode; + const u32 *bitmask; + struct nfs4_sequence_args seq_args; +}; + +struct nfs4_layoutcommit_res { + struct nfs_fattr *fattr; + const struct nfs_server *server; + struct nfs4_sequence_res seq_res; +}; + +struct nfs4_layoutcommit_data { + struct rpc_task task; + struct nfs_fattr fattr; + struct pnfs_layout_segment *lseg; + struct rpc_cred *cred; + struct nfs4_layoutcommit_args args; + struct nfs4_layoutcommit_res res; +}; + /* * Arguments to the open call. */ -- cgit v1.2.3-70-g09d2 From ef31153786bc1e4304e6b9422cc8b9efef455611 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Sat, 12 Mar 2011 02:58:10 -0500 Subject: NFSv4.1 convert layoutcommit sync to boolean Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 2 +- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 4 ++-- fs/nfs/pnfs.c | 2 +- fs/nfs/pnfs.h | 4 ++-- fs/nfs/write.c | 8 +++++--- 6 files changed, 12 insertions(+), 10 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 85cb95de5df..3ac5bd695e5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -328,7 +328,7 @@ nfs_file_fsync(struct file *file, int datasync) ret = status; if (!ret && !datasync) /* application has asked for meta-data sync */ - ret = pnfs_layoutcommit_inode(inode, 1); + ret = pnfs_layoutcommit_inode(inode, true); return ret; } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1e612d159b7..4414fd7e7d5 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -263,7 +263,7 @@ extern int nfs4_init_session(struct nfs_server *server); extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, - int sync); + bool sync); static inline bool is_ds_only_client(struct nfs_client *clp) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6f2f40239d1..43045fa4471 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5671,7 +5671,7 @@ static const struct rpc_call_ops nfs4_layoutcommit_ops = { }; int -nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, int sync) +nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT], @@ -5699,7 +5699,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, int sync) task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); - if (!sync) + if (sync == false) goto out; status = nfs4_wait_for_completion_rpc_task(task); if (status != 0) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ac711257479..22c2ddbef42 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -991,7 +991,7 @@ EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); * if WRITEs to a data server return NFS_DATA_SYNC. */ int -pnfs_layoutcommit_inode(struct inode *inode, int sync) +pnfs_layoutcommit_inode(struct inode *inode, bool sync) { struct nfs4_layoutcommit_data *data; struct nfs_inode *nfsi = NFS_I(inode); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 0806c77862b..33b9ae90c6f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -155,7 +155,7 @@ void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier); void pnfs_set_layoutcommit(struct nfs_write_data *wdata); -int pnfs_layoutcommit_inode(struct inode *inode, int sync); +int pnfs_layoutcommit_inode(struct inode *inode, bool sync); static inline int lo_fail_bit(u32 iomode) { @@ -328,7 +328,7 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req) { } -static inline int pnfs_layoutcommit_inode(struct inode *inode, int sync) +static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a03c11f9081..85d75254328 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1566,10 +1566,12 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) ret = nfs_commit_unstable_pages(inode, wbc); if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { - int status, sync = wbc->sync_mode; + int status; + bool sync = true; - if (wbc->nonblocking || wbc->for_background) - sync = 0; + if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking || + wbc->for_background) + sync = false; status = pnfs_layoutcommit_inode(inode, sync); if (status < 0) -- cgit v1.2.3-70-g09d2 From 4d65c520fb4abed970069d18c119cfe85624f46d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 Mar 2011 14:15:11 -0400 Subject: NFS: Fix a hang in the writeback path Now that the inode scalability patches have been merged, it is no longer safe to call igrab() under the inode->i_lock. Now that we no longer call nfs_clear_request() until the nfs_page is being freed, we know that we are always holding a reference to the nfs_open_context, which again holds a reference to the path, and so the inode cannot be freed until the last nfs_page has been removed from the radix tree and freed. We can therefore skip the igrab()/iput() altogether. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 4 ++-- fs/nfs/write.c | 13 +++---------- include/linux/nfs_page.h | 1 - 3 files changed, 5 insertions(+), 13 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 87a593c2b05..c80add6e221 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -135,14 +135,14 @@ void nfs_clear_page_tag_locked(struct nfs_page *req) nfs_unlock_request(req); } -/** +/* * nfs_clear_request - Free up all resources allocated to the request * @req: * * Release page and open context resources associated with a read/write * request after it has completed. */ -void nfs_clear_request(struct nfs_page *req) +static void nfs_clear_request(struct nfs_page *req) { struct page *page = req->wb_page; struct nfs_open_context *ctx = req->wb_context; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 85d75254328..af0c6279a4a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -389,11 +389,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) spin_lock(&inode->i_lock); error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); BUG_ON(error); - if (!nfsi->npages) { - igrab(inode); - if (nfs_have_delegation(inode, FMODE_WRITE)) - nfsi->change_attr++; - } + if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) + nfsi->change_attr++; set_bit(PG_MAPPED, &req->wb_flags); SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); @@ -423,11 +420,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) clear_bit(PG_MAPPED, &req->wb_flags); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; - if (!nfsi->npages) { - spin_unlock(&inode->i_lock); - iput(inode); - } else - spin_unlock(&inode->i_lock); + spin_unlock(&inode->i_lock); nfs_release_request(req); } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 8023e4e2513..91af2e49fa3 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -78,7 +78,6 @@ extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, struct page *page, unsigned int offset, unsigned int count); -extern void nfs_clear_request(struct nfs_page *req); extern void nfs_release_request(struct nfs_page *req); -- cgit v1.2.3-70-g09d2