From b31268ac793fd300da66b9c28bbf0a200339ab96 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 Mar 2011 17:02:00 -0400 Subject: FS: Use stable writes when not doing a bulk flush If we're only doing a single write, and there are no other unstable writes being queued up, we might want to just flip to using a stable write RPC call. Reviewed-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 23e79441066..fd85618149a 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -223,6 +223,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_count = 0; desc->pg_bsize = bsize; desc->pg_base = 0; + desc->pg_moreio = 0; desc->pg_inode = inode; desc->pg_doio = doio; desc->pg_ioflags = io_flags; @@ -335,9 +336,11 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { while (!nfs_pageio_do_add_request(desc, req)) { + desc->pg_moreio = 1; nfs_pageio_doio(desc); if (desc->pg_error < 0) return 0; + desc->pg_moreio = 0; } return 1; } -- cgit v1.2.3-70-g09d2 From a861a1e1c398fe34701569fd8ac9225dfe0a9a7e Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 23 Mar 2011 13:27:51 +0000 Subject: NFSv4.1: add generic layer hooks for pnfs COMMIT We create three major hooks for the pnfs code. pnfs_mark_request_commit() is called during writeback_done from nfs_mark_request_commit, which gives the driver an opportunity to claim it wants control over commiting a particular req. pnfs_choose_commit_list() is called from nfs_scan_list to choose which list a given req should be added to, based on where we intend to send it for COMMIT. It is up to the driver to have preallocated list headers for each destination it may need. pnfs_commit_list() is how the driver actually takes control, it is used instead of nfs_commit_list(). In order to pass information between the above functions, we create a union in nfs_page to hold a lseg (which is possible because the req is not on any list while in transition), and add some flags to indicate if we need to use the pnfs code. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 5 ++-- fs/nfs/pnfs.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/write.c | 41 +++++++++++++++++---------- include/linux/nfs_fs.h | 1 + include/linux/nfs_page.h | 6 +++- 5 files changed, 108 insertions(+), 18 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index fd85618149a..87a593c2b05 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -398,6 +398,7 @@ int nfs_scan_list(struct nfs_inode *nfsi, pgoff_t idx_end; int found, i; int res; + struct list_head *list; res = 0; if (npages == 0) @@ -418,10 +419,10 @@ int nfs_scan_list(struct nfs_inode *nfsi, idx_start = req->wb_index + 1; if (nfs_set_page_tag_locked(req)) { kref_get(&req->wb_kref); - nfs_list_remove_request(req); radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, tag); - nfs_list_add_request(req, dst); + list = pnfs_choose_commit_list(req, dst); + nfs_list_add_request(req, list); res++; if (res == INT_MAX) goto out; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 6380b9405bc..5370f1b9aa4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -74,6 +74,13 @@ struct pnfs_layoutdriver_type { /* test for nfs page cache coalescing */ int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + /* Returns true if layoutdriver wants to divert this request to + * driver's commit routine. + */ + bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg); + struct list_head * (*choose_commit_list) (struct nfs_page *req); + int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); + /* * Return PNFS_ATTEMPTED to indicate the layout code has attempted * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS @@ -169,6 +176,51 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss) return nfss->pnfs_curr_ld != NULL; } +static inline void +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +{ + if (lseg) { + struct pnfs_layoutdriver_type *ld; + + ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld; + if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) { + set_bit(PG_PNFS_COMMIT, &req->wb_flags); + req->wb_commit_lseg = get_lseg(lseg); + } + } +} + +static inline int +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) +{ + if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags)) + return PNFS_NOT_ATTEMPTED; + return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); +} + +static inline struct list_head * +pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) +{ + struct list_head *rv; + + if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) { + struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode; + + set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); + rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req); + /* matched by ref taken when PG_PNFS_COMMIT is set */ + put_lseg(req->wb_commit_lseg); + } else + rv = mds; + return rv; +} + +static inline void pnfs_clear_request_commit(struct nfs_page *req) +{ + if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) + put_lseg(req->wb_commit_lseg); +} + #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -252,6 +304,27 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino) pgio->pg_test = NULL; } +static inline void +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +{ +} + +static inline int +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) +{ + return PNFS_NOT_ATTEMPTED; +} + +static inline struct list_head * +pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) +{ + return mds; +} + +static inline void pnfs_clear_request_commit(struct nfs_page *req) +{ +} + #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f5f005e9db4..6927a18b689 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -441,7 +441,7 @@ nfs_mark_request_dirty(struct nfs_page *req) * Add a request to the inode's commit list. */ static void -nfs_mark_request_commit(struct nfs_page *req) +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); @@ -453,6 +453,7 @@ nfs_mark_request_commit(struct nfs_page *req) NFS_PAGE_TAG_COMMIT); nfsi->ncommit++; spin_unlock(&inode->i_lock); + pnfs_mark_request_commit(req, lseg); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); @@ -481,10 +482,11 @@ int nfs_write_need_commit(struct nfs_write_data *data) } static inline -int nfs_reschedule_unstable_write(struct nfs_page *req) +int nfs_reschedule_unstable_write(struct nfs_page *req, + struct nfs_write_data *data) { if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { - nfs_mark_request_commit(req); + nfs_mark_request_commit(req, data->lseg); return 1; } if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { @@ -495,7 +497,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req) } #else static inline void -nfs_mark_request_commit(struct nfs_page *req) +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { } @@ -512,7 +514,8 @@ int nfs_write_need_commit(struct nfs_write_data *data) } static inline -int nfs_reschedule_unstable_write(struct nfs_page *req) +int nfs_reschedule_unstable_write(struct nfs_page *req, + struct nfs_write_data *data) { return 0; } @@ -615,9 +618,11 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, } if (nfs_clear_request_commit(req) && - radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) + radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) { NFS_I(inode)->ncommit--; + pnfs_clear_request_commit(req); + } /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { @@ -765,11 +770,12 @@ int nfs_updatepage(struct file *file, struct page *page, return status; } -static void nfs_writepage_release(struct nfs_page *req) +static void nfs_writepage_release(struct nfs_page *req, + struct nfs_write_data *data) { struct page *page = req->wb_page; - if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) + if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) nfs_inode_remove_request(req); nfs_clear_page_tag_locked(req); nfs_end_page_writeback(page); @@ -1087,7 +1093,7 @@ static void nfs_writeback_release_partial(void *calldata) out: if (atomic_dec_and_test(&req->wb_complete)) - nfs_writepage_release(req); + nfs_writepage_release(req, data); nfs_writedata_release(calldata); } @@ -1154,7 +1160,7 @@ static void nfs_writeback_release_full(void *calldata) if (nfs_write_need_commit(data)) { memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - nfs_mark_request_commit(req); + nfs_mark_request_commit(req, data->lseg); dprintk(" marked for commit\n"); goto next; } @@ -1357,14 +1363,15 @@ static void nfs_init_commit(struct nfs_write_data *data, nfs_fattr_init(&data->fattr); } -static void nfs_retry_commit(struct list_head *page_list) +static void nfs_retry_commit(struct list_head *page_list, + struct pnfs_layout_segment *lseg) { struct nfs_page *req; while (!list_empty(page_list)) { req = nfs_list_entry(page_list->next); nfs_list_remove_request(req); - nfs_mark_request_commit(req); + nfs_mark_request_commit(req, lseg); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dec_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); @@ -1389,7 +1396,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) nfs_init_commit(data, head); return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); out_bad: - nfs_retry_commit(head); + nfs_retry_commit(head, NULL); nfs_commit_clear_lock(NFS_I(inode)); return -ENOMEM; } @@ -1477,7 +1484,11 @@ int nfs_commit_inode(struct inode *inode, int how) res = nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&inode->i_lock); if (res) { - int error = nfs_commit_list(inode, &head, how); + int error; + + error = pnfs_commit_list(inode, &head, how); + if (error == PNFS_NOT_ATTEMPTED) + error = nfs_commit_list(inode, &head, how); if (error < 0) return error; if (!may_wait) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 4179c368844..eddda6ce7c4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -226,6 +226,7 @@ struct nfs_inode { #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ +#define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 92d54c81f51..8023e4e2513 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -33,11 +33,15 @@ enum { PG_CLEAN, PG_NEED_COMMIT, PG_NEED_RESCHED, + PG_PNFS_COMMIT, }; struct nfs_inode; struct nfs_page { - struct list_head wb_list; /* Defines state of page: */ + union { + struct list_head wb_list; /* Defines state of page: */ + struct pnfs_layout_segment *wb_commit_lseg; /* Used when PG_PNFS_COMMIT set */ + }; struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ struct nfs_lock_context *wb_lock_context; /* lock context info */ -- cgit v1.2.3-70-g09d2 From 4d65c520fb4abed970069d18c119cfe85624f46d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 Mar 2011 14:15:11 -0400 Subject: NFS: Fix a hang in the writeback path Now that the inode scalability patches have been merged, it is no longer safe to call igrab() under the inode->i_lock. Now that we no longer call nfs_clear_request() until the nfs_page is being freed, we know that we are always holding a reference to the nfs_open_context, which again holds a reference to the path, and so the inode cannot be freed until the last nfs_page has been removed from the radix tree and freed. We can therefore skip the igrab()/iput() altogether. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 4 ++-- fs/nfs/write.c | 13 +++---------- include/linux/nfs_page.h | 1 - 3 files changed, 5 insertions(+), 13 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 87a593c2b05..c80add6e221 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -135,14 +135,14 @@ void nfs_clear_page_tag_locked(struct nfs_page *req) nfs_unlock_request(req); } -/** +/* * nfs_clear_request - Free up all resources allocated to the request * @req: * * Release page and open context resources associated with a read/write * request after it has completed. */ -void nfs_clear_request(struct nfs_page *req) +static void nfs_clear_request(struct nfs_page *req) { struct page *page = req->wb_page; struct nfs_open_context *ctx = req->wb_context; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 85d75254328..af0c6279a4a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -389,11 +389,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) spin_lock(&inode->i_lock); error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); BUG_ON(error); - if (!nfsi->npages) { - igrab(inode); - if (nfs_have_delegation(inode, FMODE_WRITE)) - nfsi->change_attr++; - } + if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) + nfsi->change_attr++; set_bit(PG_MAPPED, &req->wb_flags); SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); @@ -423,11 +420,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) clear_bit(PG_MAPPED, &req->wb_flags); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; - if (!nfsi->npages) { - spin_unlock(&inode->i_lock); - iput(inode); - } else - spin_unlock(&inode->i_lock); + spin_unlock(&inode->i_lock); nfs_release_request(req); } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 8023e4e2513..91af2e49fa3 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -78,7 +78,6 @@ extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, struct page *page, unsigned int offset, unsigned int count); -extern void nfs_clear_request(struct nfs_page *req); extern void nfs_release_request(struct nfs_page *req); -- cgit v1.2.3-70-g09d2