summaryrefslogtreecommitdiffstats
path: root/fs/nfs/nfs4filelayout.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-04-14 13:18:27 +0200
committerIngo Molnar <mingo@kernel.org>2012-04-14 13:19:04 +0200
commit6ac1ef482d7ae0c690f1640bf6eb818ff9a2d91e (patch)
tree021cc9f6b477146fcebe6f3be4752abfa2ba18a9 /fs/nfs/nfs4filelayout.c
parent682968e0c425c60f0dde37977e5beb2b12ddc4cc (diff)
parenta385ec4f11bdcf81af094c03e2444ee9b7fad2e5 (diff)
Merge branch 'perf/core' into perf/uprobes
Merge in latest upstream (and the latest perf development tree), to prepare for tooling changes, and also to pick up v3.4 MM changes that the uprobes code needs to take care of. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs/nfs/nfs4filelayout.c')
-rw-r--r--fs/nfs/nfs4filelayout.c271
1 files changed, 214 insertions, 57 deletions
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 71ec08617e2..5acfd9ea8a3 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -33,7 +33,10 @@
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/sunrpc/metrics.h>
+
#include "internal.h"
+#include "delegation.h"
#include "nfs4filelayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -84,12 +87,27 @@ static int filelayout_async_handle_error(struct rpc_task *task,
struct nfs_client *clp,
int *reset)
{
+ struct nfs_server *mds_server = NFS_SERVER(state->inode);
+ struct nfs_client *mds_client = mds_server->nfs_client;
+
if (task->tk_status >= 0)
return 0;
-
*reset = 0;
switch (task->tk_status) {
+ /* MDS state errors */
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ nfs_remove_bad_delegation(state->inode);
+ case -NFS4ERR_OPENMODE:
+ nfs4_schedule_stateid_recovery(mds_server, state);
+ goto wait_on_recovery;
+ case -NFS4ERR_EXPIRED:
+ nfs4_schedule_stateid_recovery(mds_server, state);
+ nfs4_schedule_lease_recovery(mds_client);
+ goto wait_on_recovery;
+ /* DS session errors */
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
@@ -115,8 +133,14 @@ static int filelayout_async_handle_error(struct rpc_task *task,
*reset = 1;
break;
}
+out:
task->tk_status = 0;
return -EAGAIN;
+wait_on_recovery:
+ rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
+ if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
+ rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
+ goto out;
}
/* NFS_PROTO call done callback routines */
@@ -173,7 +197,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
&rdata->args.seq_args, &rdata->res.seq_res,
- 0, task))
+ task))
return;
rpc_call_start(task);
@@ -189,10 +213,18 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
rdata->mds_ops->rpc_call_done(task, data);
}
+static void filelayout_read_count_stats(struct rpc_task *task, void *data)
+{
+ struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+
+ rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics);
+}
+
static void filelayout_read_release(void *data)
{
struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+ put_lseg(rdata->lseg);
rdata->mds_ops->rpc_release(data);
}
@@ -254,7 +286,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
&wdata->args.seq_args, &wdata->res.seq_res,
- 0, task))
+ task))
return;
rpc_call_start(task);
@@ -268,10 +300,18 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
wdata->mds_ops->rpc_call_done(task, data);
}
+static void filelayout_write_count_stats(struct rpc_task *task, void *data)
+{
+ struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+ rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics);
+}
+
static void filelayout_write_release(void *data)
{
struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ put_lseg(wdata->lseg);
wdata->mds_ops->rpc_release(data);
}
@@ -282,24 +322,28 @@ static void filelayout_commit_release(void *data)
nfs_commit_release_pages(wdata);
if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
nfs_commit_clear_lock(NFS_I(wdata->inode));
+ put_lseg(wdata->lseg);
nfs_commitdata_release(wdata);
}
-struct rpc_call_ops filelayout_read_call_ops = {
+static const struct rpc_call_ops filelayout_read_call_ops = {
.rpc_call_prepare = filelayout_read_prepare,
.rpc_call_done = filelayout_read_call_done,
+ .rpc_count_stats = filelayout_read_count_stats,
.rpc_release = filelayout_read_release,
};
-struct rpc_call_ops filelayout_write_call_ops = {
+static const struct rpc_call_ops filelayout_write_call_ops = {
.rpc_call_prepare = filelayout_write_prepare,
.rpc_call_done = filelayout_write_call_done,
+ .rpc_count_stats = filelayout_write_count_stats,
.rpc_release = filelayout_write_release,
};
-struct rpc_call_ops filelayout_commit_call_ops = {
+static const struct rpc_call_ops filelayout_commit_call_ops = {
.rpc_call_prepare = filelayout_write_prepare,
.rpc_call_done = filelayout_write_call_done,
+ .rpc_count_stats = filelayout_write_count_stats,
.rpc_release = filelayout_commit_release,
};
@@ -367,7 +411,8 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
idx = nfs4_fl_calc_ds_index(lseg, j);
ds = nfs4_fl_prepare_ds(lseg, idx);
if (!ds) {
- printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
+ printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
+ __func__);
set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
@@ -575,7 +620,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
goto out_err_free;
fl->fh_array[i]->size = be32_to_cpup(p++);
if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
- printk(KERN_ERR "Too big fh %d received %d\n",
+ printk(KERN_ERR "NFS: Too big fh %d received %d\n",
i, fl->fh_array[i]->size);
goto out_err_free;
}
@@ -640,14 +685,16 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
int size = (fl->stripe_type == STRIPE_SPARSE) ?
fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
- fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags);
+ fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
if (!fl->commit_buckets) {
filelayout_free_lseg(&fl->generic_hdr);
return NULL;
}
fl->number_of_buckets = size;
- for (i = 0; i < size; i++)
- INIT_LIST_HEAD(&fl->commit_buckets[i]);
+ for (i = 0; i < size; i++) {
+ INIT_LIST_HEAD(&fl->commit_buckets[i].written);
+ INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
+ }
}
return &fl->generic_hdr;
}
@@ -679,7 +726,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
return (p_stripe == r_stripe);
}
-void
+static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
@@ -696,7 +743,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
nfs_pageio_reset_read_mds(pgio);
}
-void
+static void
filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
@@ -725,11 +772,6 @@ static const struct nfs_pageio_ops filelayout_pg_write_ops = {
.pg_doio = pnfs_generic_pg_writepages,
};
-static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
-{
- return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
-}
-
static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
{
if (fl->stripe_type == STRIPE_SPARSE)
@@ -738,13 +780,48 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
return j;
}
-struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
+/* The generic layer is about to remove the req from the commit list.
+ * If this will make the bucket empty, it will need to put the lseg reference.
+ */
+static void
+filelayout_clear_request_commit(struct nfs_page *req)
+{
+ struct pnfs_layout_segment *freeme = NULL;
+ struct inode *inode = req->wb_context->dentry->d_inode;
+
+ spin_lock(&inode->i_lock);
+ if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
+ goto out;
+ if (list_is_singular(&req->wb_list)) {
+ struct pnfs_layout_segment *lseg;
+
+ /* From here we can find the bucket, but for the moment,
+ * since there is only one relevant lseg...
+ */
+ list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
+ if (lseg->pls_range.iomode == IOMODE_RW) {
+ freeme = lseg;
+ break;
+ }
+ }
+ }
+out:
+ nfs_request_remove_commit_list(req);
+ spin_unlock(&inode->i_lock);
+ put_lseg(freeme);
+}
+
+static struct list_head *
+filelayout_choose_commit_list(struct nfs_page *req,
+ struct pnfs_layout_segment *lseg)
{
- struct pnfs_layout_segment *lseg = req->wb_commit_lseg;
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
u32 i, j;
struct list_head *list;
+ if (fl->commit_through_mds)
+ return &NFS_I(req->wb_context->dentry->d_inode)->commit_list;
+
/* Note that we are calling nfs4_fl_calc_j_index on each page
* that ends up being committed to a data server. An attractive
* alternative is to add a field to nfs_write_data and nfs_page
@@ -754,14 +831,30 @@ struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
j = nfs4_fl_calc_j_index(lseg,
(loff_t)req->wb_index << PAGE_CACHE_SHIFT);
i = select_bucket_index(fl, j);
- list = &fl->commit_buckets[i];
+ list = &fl->commit_buckets[i].written;
if (list_empty(list)) {
- /* Non-empty buckets hold a reference on the lseg */
+ /* Non-empty buckets hold a reference on the lseg. That ref
+ * is normally transferred to the COMMIT call and released
+ * there. It could also be released if the last req is pulled
+ * off due to a rewrite, in which case it will be done in
+ * filelayout_remove_commit_req
+ */
get_lseg(lseg);
}
+ set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
return list;
}
+static void
+filelayout_mark_request_commit(struct nfs_page *req,
+ struct pnfs_layout_segment *lseg)
+{
+ struct list_head *list;
+
+ list = filelayout_choose_commit_list(req, lseg);
+ nfs_request_add_commit_list(req, list);
+}
+
static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
{
struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
@@ -797,11 +890,12 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
ds = nfs4_fl_prepare_ds(lseg, idx);
if (!ds) {
- printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
+ printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
+ __func__);
set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
prepare_to_resend_writes(data);
- data->mds_ops->rpc_release(data);
+ filelayout_commit_release(data);
return -EAGAIN;
}
dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
@@ -817,24 +911,87 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
/*
* This is only useful while we are using whole file layouts.
*/
-static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
+static struct pnfs_layout_segment *
+find_only_write_lseg_locked(struct inode *inode)
{
- struct pnfs_layout_segment *lseg, *rv = NULL;
+ struct pnfs_layout_segment *lseg;
- spin_lock(&inode->i_lock);
list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
if (lseg->pls_range.iomode == IOMODE_RW)
- rv = get_lseg(lseg);
+ return lseg;
+ return NULL;
+}
+
+static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
+{
+ struct pnfs_layout_segment *rv;
+
+ spin_lock(&inode->i_lock);
+ rv = find_only_write_lseg_locked(inode);
+ if (rv)
+ get_lseg(rv);
spin_unlock(&inode->i_lock);
return rv;
}
-static int alloc_ds_commits(struct inode *inode, struct list_head *list)
+static int
+filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
+ spinlock_t *lock)
+{
+ struct list_head *src = &bucket->written;
+ struct list_head *dst = &bucket->committing;
+ struct nfs_page *req, *tmp;
+ int ret = 0;
+
+ list_for_each_entry_safe(req, tmp, src, wb_list) {
+ if (!nfs_lock_request(req))
+ continue;
+ if (cond_resched_lock(lock))
+ list_safe_reset_next(req, tmp, wb_list);
+ nfs_request_remove_commit_list(req);
+ clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
+ nfs_list_add_request(req, dst);
+ ret++;
+ if (ret == max)
+ break;
+ }
+ return ret;
+}
+
+/* Move reqs from written to committing lists, returning count of number moved.
+ * Note called with i_lock held.
+ */
+static int filelayout_scan_commit_lists(struct inode *inode, int max,
+ spinlock_t *lock)
+{
+ struct pnfs_layout_segment *lseg;
+ struct nfs4_filelayout_segment *fl;
+ int i, rv = 0, cnt;
+
+ lseg = find_only_write_lseg_locked(inode);
+ if (!lseg)
+ goto out_done;
+ fl = FILELAYOUT_LSEG(lseg);
+ if (fl->commit_through_mds)
+ goto out_done;
+ for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
+ cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i],
+ max, lock);
+ max -= cnt;
+ rv += cnt;
+ }
+out_done:
+ return rv;
+}
+
+static unsigned int
+alloc_ds_commits(struct inode *inode, struct list_head *list)
{
struct pnfs_layout_segment *lseg;
struct nfs4_filelayout_segment *fl;
struct nfs_write_data *data;
int i, j;
+ unsigned int nreq = 0;
/* Won't need this when non-whole file layout segments are supported
* instead we will use a pnfs_layout_hdr structure */
@@ -843,28 +1000,27 @@ static int alloc_ds_commits(struct inode *inode, struct list_head *list)
return 0;
fl = FILELAYOUT_LSEG(lseg);
for (i = 0; i < fl->number_of_buckets; i++) {
- if (list_empty(&fl->commit_buckets[i]))
+ if (list_empty(&fl->commit_buckets[i].committing))
continue;
data = nfs_commitdata_alloc();
if (!data)
- goto out_bad;
+ break;
data->ds_commit_index = i;
data->lseg = lseg;
list_add(&data->pages, list);
+ nreq++;
}
- put_lseg(lseg);
- return 0;
-out_bad:
+ /* Clean up on error */
for (j = i; j < fl->number_of_buckets; j++) {
- if (list_empty(&fl->commit_buckets[i]))
+ if (list_empty(&fl->commit_buckets[i].committing))
continue;
- nfs_retry_commit(&fl->commit_buckets[i], lseg);
+ nfs_retry_commit(&fl->commit_buckets[i].committing, lseg);
put_lseg(lseg); /* associated with emptying bucket */
}
put_lseg(lseg);
/* Caller will clean up entries put on list */
- return -ENOMEM;
+ return nreq;
}
/* This follows nfs_commit_list pretty closely */
@@ -874,40 +1030,40 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
{
struct nfs_write_data *data, *tmp;
LIST_HEAD(list);
+ unsigned int nreq = 0;
if (!list_empty(mds_pages)) {
data = nfs_commitdata_alloc();
- if (!data)
- goto out_bad;
- data->lseg = NULL;
- list_add(&data->pages, &list);
+ if (data != NULL) {
+ data->lseg = NULL;
+ list_add(&data->pages, &list);
+ nreq++;
+ } else
+ nfs_retry_commit(mds_pages, NULL);
}
- if (alloc_ds_commits(inode, &list))
- goto out_bad;
+ nreq += alloc_ds_commits(inode, &list);
+
+ if (nreq == 0) {
+ nfs_commit_clear_lock(NFS_I(inode));
+ goto out;
+ }
+
+ atomic_add(nreq, &NFS_I(inode)->commits_outstanding);
list_for_each_entry_safe(data, tmp, &list, pages) {
list_del_init(&data->pages);
- atomic_inc(&NFS_I(inode)->commits_outstanding);
if (!data->lseg) {
nfs_init_commit(data, mds_pages, NULL);
nfs_initiate_commit(data, NFS_CLIENT(inode),
data->mds_ops, how);
} else {
- nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg);
+ nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg);
filelayout_initiate_commit(data, how);
}
}
- return 0;
- out_bad:
- list_for_each_entry_safe(data, tmp, &list, pages) {
- nfs_retry_commit(&data->pages, data->lseg);
- list_del_init(&data->pages);
- nfs_commit_free(data);
- }
- nfs_retry_commit(mds_pages, NULL);
- nfs_commit_clear_lock(NFS_I(inode));
- return -ENOMEM;
+out:
+ return PNFS_ATTEMPTED;
}
static void
@@ -924,8 +1080,9 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.free_lseg = filelayout_free_lseg,
.pg_read_ops = &filelayout_pg_read_ops,
.pg_write_ops = &filelayout_pg_write_ops,
- .mark_pnfs_commit = filelayout_mark_pnfs_commit,
- .choose_commit_list = filelayout_choose_commit_list,
+ .mark_request_commit = filelayout_mark_request_commit,
+ .clear_request_commit = filelayout_clear_request_commit,
+ .scan_commit_lists = filelayout_scan_commit_lists,
.commit_pagelist = filelayout_commit_pagelist,
.read_pagelist = filelayout_read_pagelist,
.write_pagelist = filelayout_write_pagelist,