From 78fe0f41d9937ee62817912ac8d627e06243c269 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 31 May 2011 19:05:47 -0400 Subject: NFS: use scope from exchange_id to skip reclaim can be skipped if the "eir_server_scope" from the exchange_id proc differs from previous calls. Also, in the future server_scope will be useful for determining whether client trunking is available Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/client.c') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b3dc2b88b65..006f8ff0a3c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -293,6 +293,7 @@ static void nfs_free_client(struct nfs_client *clp) nfs4_deviceid_purge_client(clp); kfree(clp->cl_hostname); + kfree(clp->server_scope); kfree(clp); dprintk("<-- nfs_free_client()\n"); -- cgit v1.2.3-70-g09d2 From 6382a44138e7aa40bf52170e7afc014443a24806 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 1 Jun 2011 16:44:44 -0400 Subject: NFS: move pnfs layouts to nfs_server structure Layouts should be tracked per nfs_server (aka superblock) instead of per struct nfs_client, which may have multiple FSIDs associated with it. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 57 ++++++++++++++++++++++++++++++----------------- fs/nfs/client.c | 4 +--- fs/nfs/pnfs.c | 13 ++++++++--- include/linux/nfs_fs_sb.h | 2 +- 4 files changed, 48 insertions(+), 28 deletions(-) (limited to 'fs/nfs/client.c') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index d4d1954e9bb..74780f9f852 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf static u32 initiate_file_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; struct inode *ino; bool found = false; @@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp, LIST_HEAD(free_me_list); spin_lock(&clp->cl_lock); - list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { - if (nfs_compare_fh(&args->cbl_fh, - &NFS_I(lo->plh_inode)->fh)) - continue; - ino = igrab(lo->plh_inode); - if (!ino) - continue; - found = true; - /* Without this, layout can be freed as soon - * as we release cl_lock. - */ - get_layout_hdr(lo); - break; + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (nfs_compare_fh(&args->cbl_fh, + &NFS_I(lo->plh_inode)->fh)) + continue; + ino = igrab(lo->plh_inode); + if (!ino) + continue; + found = true; + /* Without this, layout can be freed as soon + * as we release cl_lock. + */ + get_layout_hdr(lo); + break; + } + if (found) + break; } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); + if (!found) return NFS4ERR_NOMATCHING_LAYOUT; @@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, static u32 initiate_bulk_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; struct inode *ino; u32 rv = NFS4ERR_NOMATCHING_LAYOUT; @@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, }; spin_lock(&clp->cl_lock); - list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { if ((args->cbl_recall_type == RETURN_FSID) && - memcmp(&NFS_SERVER(lo->plh_inode)->fsid, - &args->cbl_fsid, sizeof(struct nfs_fsid))) - continue; - if (!igrab(lo->plh_inode)) + memcmp(&server->fsid, &args->cbl_fsid, + sizeof(struct nfs_fsid))) continue; - get_layout_hdr(lo); - BUG_ON(!list_empty(&lo->plh_bulk_recall)); - list_add(&lo->plh_bulk_recall, &recall_list); + + list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (!igrab(lo->plh_inode)) + continue; + get_layout_hdr(lo); + BUG_ON(!list_empty(&lo->plh_bulk_recall)); + list_add(&lo->plh_bulk_recall, &recall_list); + } } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); + list_for_each_entry_safe(lo, tmp, &recall_list, plh_bulk_recall) { ino = lo->plh_inode; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 006f8ff0a3c..5452ada5946 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ cred = rpc_lookup_machine_cred(); if (!IS_ERR(cred)) clp->cl_machine_cred = cred; -#if defined(CONFIG_NFS_V4_1) - INIT_LIST_HEAD(&clp->cl_layouts); -#endif nfs_fscache_get_client_cookie(clp); return clp; @@ -1063,6 +1060,7 @@ static struct nfs_server *nfs_alloc_server(void) INIT_LIST_HEAD(&server->client_link); INIT_LIST_HEAD(&server->master_link); INIT_LIST_HEAD(&server->delegations); + INIT_LIST_HEAD(&server->layouts); atomic_set(&server->active, 0); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 29c0ca7fc34..ff820077237 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -448,11 +448,17 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) void pnfs_destroy_all_layouts(struct nfs_client *clp) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; LIST_HEAD(tmp_list); spin_lock(&clp->cl_lock); - list_splice_init(&clp->cl_layouts, &tmp_list); + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (!list_empty(&server->layouts)) + list_splice_init(&server->layouts, &tmp_list); + } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); while (!list_empty(&tmp_list)) { @@ -920,7 +926,8 @@ pnfs_update_layout(struct inode *ino, }; unsigned pg_offset; struct nfs_inode *nfsi = NFS_I(ino); - struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; + struct nfs_server *server = NFS_SERVER(ino); + struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; bool first = false; @@ -964,7 +971,7 @@ pnfs_update_layout(struct inode *ino, */ spin_lock(&clp->cl_lock); BUG_ON(!list_empty(&lo->plh_layouts)); - list_add_tail(&lo->plh_layouts, &clp->cl_layouts); + list_add_tail(&lo->plh_layouts, &server->layouts); spin_unlock(&clp->cl_lock); } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index f23b1883155..4faeac8f448 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -78,7 +78,6 @@ struct nfs_client { /* The flags used for obtaining the clientid during EXCHANGE_ID */ u32 cl_exchange_flags; struct nfs4_session *cl_session; /* sharred session */ - struct list_head cl_layouts; #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_FSCACHE @@ -152,6 +151,7 @@ struct nfs_server { struct rb_root openowner_id; struct rb_root lockowner_id; #endif + struct list_head layouts; struct list_head delegations; void (*destroy)(struct nfs_server *); -- cgit v1.2.3-70-g09d2 From 94b134ac8e9965309e70684b504c53bca36338b4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Jul 2011 19:26:49 -0400 Subject: NFS: Convert nfs4_set_ds_client to EXPORT_SYMBOL_GPL This is not part of an external ABI... Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/client.c') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5452ada5946..19ea7d9c75e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1463,7 +1463,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, dprintk("<-- %s %p\n", __func__, clp); return clp; } -EXPORT_SYMBOL(nfs4_set_ds_client); +EXPORT_SYMBOL_GPL(nfs4_set_ds_client); /* * Session has been established, and the client marked ready. -- cgit v1.2.3-70-g09d2 From 738fd0f360359aecc7fcd7604bbe9e854d81fb1f Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sat, 30 Jul 2011 20:52:36 -0400 Subject: pnfs: add set-clear layoutdriver interface To allow layout driver to issue getdevicelist at mount time, and clean up at umount time. [fixup non NFS_V4_1 set_pnfs_layoutdriver definition] [pnfs: pass mntfh down the init_pnfs path] Signed-off-by: Benny Halevy Signed-off-by: Benny Halevy Signed-off-by: Jim Rees Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 8 +++++--- fs/nfs/pnfs.c | 15 +++++++++++++-- fs/nfs/pnfs.h | 8 ++++++-- 3 files changed, 24 insertions(+), 7 deletions(-) (limited to 'fs/nfs/client.c') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 19ea7d9c75e..a9b18483cb2 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -904,7 +904,9 @@ error: /* * Load up the server record from information gained in an fsinfo record */ -static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo) +static void nfs_server_set_fsinfo(struct nfs_server *server, + struct nfs_fh *mntfh, + struct nfs_fsinfo *fsinfo) { unsigned long max_rpc_payload; @@ -934,7 +936,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * if (server->wsize > NFS_MAX_FILE_IO_SIZE) server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - set_pnfs_layoutdriver(server, fsinfo->layouttype); + set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype); server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); @@ -980,7 +982,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str if (error < 0) goto out_error; - nfs_server_set_fsinfo(server, &fsinfo); + nfs_server_set_fsinfo(server, mntfh, &fsinfo); /* Get some general file system info */ if (server->namelen == 0) { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a7e5f17f777..3a47f7ce1e9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -76,8 +76,11 @@ find_pnfs_driver(u32 id) void unset_pnfs_layoutdriver(struct nfs_server *nfss) { - if (nfss->pnfs_curr_ld) + if (nfss->pnfs_curr_ld) { + if (nfss->pnfs_curr_ld->clear_layoutdriver) + nfss->pnfs_curr_ld->clear_layoutdriver(nfss); module_put(nfss->pnfs_curr_ld->owner); + } nfss->pnfs_curr_ld = NULL; } @@ -88,7 +91,8 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss) * @id layout type. Zero (illegal layout type) indicates pNFS not in use. */ void -set_pnfs_layoutdriver(struct nfs_server *server, u32 id) +set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, + u32 id) { struct pnfs_layoutdriver_type *ld_type = NULL; @@ -115,6 +119,13 @@ set_pnfs_layoutdriver(struct nfs_server *server, u32 id) goto out_no_driver; } server->pnfs_curr_ld = ld_type; + if (ld_type->set_layoutdriver + && ld_type->set_layoutdriver(server, mntfh)) { + printk(KERN_ERR "%s: Error initializing pNFS layout driver %u.\n", + __func__, id); + module_put(ld_type->owner); + goto out_no_driver; + } dprintk("%s: pNFS module for %u set\n", __func__, id); return; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 7074394944a..bddd8b997e1 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -80,6 +80,9 @@ struct pnfs_layoutdriver_type { struct module *owner; unsigned flags; + int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); + int (*clear_layoutdriver) (struct nfs_server *); + struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags); void (*free_layout_hdr) (struct pnfs_layout_hdr *); @@ -167,7 +170,7 @@ void put_lseg(struct pnfs_layout_segment *lseg); bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); -void set_pnfs_layoutdriver(struct nfs_server *, u32 id); +void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); @@ -374,7 +377,8 @@ pnfs_roc_drain(struct inode *ino, u32 *barrier) return false; } -static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id) +static inline void set_pnfs_layoutdriver(struct nfs_server *s, + const struct nfs_fh *mntfh, u32 id); { } -- cgit v1.2.3-70-g09d2 From dae100c2b1b9463996aab9162f2258145c43f7df Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Sat, 30 Jul 2011 20:52:37 -0400 Subject: pnfs: ask for layout_blksize and save it in nfs_server Block layout needs it to determine IO size. Signed-off-by: Fred Isaman Signed-off-by: Tao Guo Signed-off-by: Benny Halevy Signed-off-by: Benny Halevy Signed-off-by: Jim Rees Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 + fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 5 ++- fs/nfs/nfs4xdr.c | 99 +++++++++++++++++++++++++++++++++++++++-------- include/linux/nfs_fs_sb.h | 3 +- include/linux/nfs_xdr.h | 3 +- 6 files changed, 91 insertions(+), 22 deletions(-) (limited to 'fs/nfs/client.c') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a9b18483cb2..de00a373f08 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -936,6 +936,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, if (server->wsize > NFS_MAX_FILE_IO_SIZE) server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + server->pnfs_blksize = fsinfo->blksize; set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype); server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1909ee8be35..1ec1a85fa71 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -318,7 +318,7 @@ extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; extern const u32 nfs4_fattr_bitmap[2]; extern const u32 nfs4_statfs_bitmap[2]; extern const u32 nfs4_pathconf_bitmap[2]; -extern const u32 nfs4_fsinfo_bitmap[2]; +extern const u32 nfs4_fsinfo_bitmap[3]; extern const u32 nfs4_fs_locations_bitmap[2]; /* nfs4renewd.c */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index af32d3df054..e86de799dd1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -140,12 +140,13 @@ const u32 nfs4_pathconf_bitmap[2] = { 0 }; -const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE +const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_LEASE_TIME, FATTR4_WORD1_TIME_DELTA - | FATTR4_WORD1_FS_LAYOUT_TYPES + | FATTR4_WORD1_FS_LAYOUT_TYPES, + FATTR4_WORD2_LAYOUT_BLKSIZE }; const u32 nfs4_fs_locations_bitmap[2] = { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5f769f8d05b..026166993d1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -113,7 +113,11 @@ static int nfs4_stat_to_errno(int); #define encode_restorefh_maxsz (op_encode_hdr_maxsz) #define decode_restorefh_maxsz (op_decode_hdr_maxsz) #define encode_fsinfo_maxsz (encode_getattr_maxsz) -#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 15) +/* The 5 accounts for the PNFS attributes, and assumes that at most three + * layout types will be returned. + */ +#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz + 4 + 8 + 5) #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) #define decode_renew_maxsz (op_decode_hdr_maxsz) #define encode_setclientid_maxsz \ @@ -1123,6 +1127,35 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm hdr->replen += decode_getattr_maxsz; } +static void +encode_getattr_three(struct xdr_stream *xdr, + uint32_t bm0, uint32_t bm1, uint32_t bm2, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_GETATTR); + if (bm2) { + p = reserve_space(xdr, 16); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(bm0); + *p++ = cpu_to_be32(bm1); + *p = cpu_to_be32(bm2); + } else if (bm1) { + p = reserve_space(xdr, 12); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(bm0); + *p = cpu_to_be32(bm1); + } else { + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(1); + *p = cpu_to_be32(bm0); + } + hdr->nops++; + hdr->replen += decode_getattr_maxsz; +} + static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], @@ -1131,8 +1164,11 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { - encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], - bitmask[1] & nfs4_fsinfo_bitmap[1], hdr); + encode_getattr_three(xdr, + bitmask[0] & nfs4_fsinfo_bitmap[0], + bitmask[1] & nfs4_fsinfo_bitmap[1], + bitmask[2] & nfs4_fsinfo_bitmap[2], + hdr); } static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) @@ -2643,7 +2679,7 @@ static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, struct compound_hdr hdr = { .nops = 0, }; - const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; + const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME }; encode_compound_hdr(xdr, req, &hdr); encode_setclientid_confirm(xdr, arg, &hdr); @@ -2787,7 +2823,7 @@ static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->la_seq_args), }; - const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; + const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME }; encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->la_seq_args, &hdr); @@ -3068,14 +3104,17 @@ static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) goto out_overflow; bmlen = be32_to_cpup(p); - bitmap[0] = bitmap[1] = 0; + bitmap[0] = bitmap[1] = bitmap[2] = 0; p = xdr_inline_decode(xdr, (bmlen << 2)); if (unlikely(!p)) goto out_overflow; if (bmlen > 0) { bitmap[0] = be32_to_cpup(p++); - if (bmlen > 1) - bitmap[1] = be32_to_cpup(p); + if (bmlen > 1) { + bitmap[1] = be32_to_cpup(p++); + if (bmlen > 2) + bitmap[2] = be32_to_cpup(p); + } } return 0; out_overflow: @@ -3107,8 +3146,9 @@ static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint3 return ret; bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; } else - bitmask[0] = bitmask[1] = 0; - dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]); + bitmask[0] = bitmask[1] = bitmask[2] = 0; + dprintk("%s: bitmask=%08x:%08x:%08x\n", __func__, + bitmask[0], bitmask[1], bitmask[2]); return 0; } @@ -4162,7 +4202,7 @@ out_overflow: static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) { __be32 *savep; - uint32_t attrlen, bitmap[2] = {0}; + uint32_t attrlen, bitmap[3] = {0}; int status; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -4188,7 +4228,7 @@ xdr_error: static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) { __be32 *savep; - uint32_t attrlen, bitmap[2] = {0}; + uint32_t attrlen, bitmap[3] = {0}; int status; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -4220,7 +4260,7 @@ xdr_error: static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) { __be32 *savep; - uint32_t attrlen, bitmap[2] = {0}; + uint32_t attrlen, bitmap[3] = {0}; int status; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -4360,7 +4400,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat { __be32 *savep; uint32_t attrlen, - bitmap[2] = {0}; + bitmap[3] = {0}; int status; status = decode_op_hdr(xdr, OP_GETATTR); @@ -4446,10 +4486,32 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, return status; } +/* + * The prefered block size for layout directed io + */ +static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, + uint32_t *res) +{ + __be32 *p; + + dprintk("%s: bitmap is %x\n", __func__, bitmap[2]); + *res = 0; + if (bitmap[2] & FATTR4_WORD2_LAYOUT_BLKSIZE) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) { + print_overflow_msg(__func__, xdr); + return -EIO; + } + *res = be32_to_cpup(p); + bitmap[2] &= ~FATTR4_WORD2_LAYOUT_BLKSIZE; + } + return 0; +} + static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) { __be32 *savep; - uint32_t attrlen, bitmap[2]; + uint32_t attrlen, bitmap[3]; int status; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -4477,6 +4539,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); if (status != 0) goto xdr_error; + status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize); + if (status) + goto xdr_error; status = verify_attr_len(xdr, savep, attrlen); xdr_error: @@ -4896,7 +4961,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, { __be32 *savep; uint32_t attrlen, - bitmap[2] = {0}; + bitmap[3] = {0}; struct kvec *iov = req->rq_rcv_buf.head; int status; @@ -6852,7 +6917,7 @@ out: int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus) { - uint32_t bitmap[2] = {0}; + uint32_t bitmap[3] = {0}; uint32_t len; __be32 *p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4faeac8f448..b2ea8b82d2c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -132,7 +132,7 @@ struct nfs_server { #endif #ifdef CONFIG_NFS_V4 - u32 attr_bitmask[2];/* V4 bitmask representing the set + u32 attr_bitmask[3];/* V4 bitmask representing the set of attributes supported on this filesystem */ u32 cache_consistency_bitmask[2]; @@ -145,6 +145,7 @@ struct nfs_server { filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; + u32 pnfs_blksize; /* layout_blksize attr */ /* the following fields are protected by nfs_client->cl_lock */ struct rb_root state_owners; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 21f333eae3c..94f27e56df9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -122,6 +122,7 @@ struct nfs_fsinfo { struct timespec time_delta; /* server time granularity */ __u32 lease_time; /* in seconds */ __u32 layouttype; /* supported pnfs layout driver */ + __u32 blksize; /* preferred pnfs io block size */ }; struct nfs_fsstat { @@ -954,7 +955,7 @@ struct nfs4_server_caps_arg { }; struct nfs4_server_caps_res { - u32 attr_bitmask[2]; + u32 attr_bitmask[3]; u32 acl_bitmask; u32 has_links; u32 has_symlinks; -- cgit v1.2.3-70-g09d2 From fe0a9b740881d181e3c96c1f6f6043e252692ffe Mon Sep 17 00:00:00 2001 From: Jim Rees Date: Sat, 30 Jul 2011 20:52:42 -0400 Subject: pnfsblock: add device operations Signed-off-by: Jim Rees Signed-off-by: Fred Isaman Signed-off-by: Benny Halevy Signed-off-by: Benny Halevy [upcall bugfixes] Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/Makefile | 2 +- fs/nfs/blocklayout/blocklayout.c | 42 ++++++++ fs/nfs/blocklayout/blocklayout.h | 40 ++++++++ fs/nfs/blocklayout/blocklayoutdev.c | 191 ++++++++++++++++++++++++++++++++++++ fs/nfs/client.c | 2 +- include/linux/nfs.h | 2 + 6 files changed, 277 insertions(+), 2 deletions(-) create mode 100644 fs/nfs/blocklayout/blocklayoutdev.c (limited to 'fs/nfs/client.c') diff --git a/fs/nfs/blocklayout/Makefile b/fs/nfs/blocklayout/Makefile index 5cfadf6ebc9..5bf3409084d 100644 --- a/fs/nfs/blocklayout/Makefile +++ b/fs/nfs/blocklayout/Makefile @@ -2,4 +2,4 @@ # Makefile for the pNFS block layout driver kernel module # obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o -blocklayoutdriver-objs := blocklayout.o extents.o +blocklayoutdriver-objs := blocklayout.o extents.o blocklayoutdev.o diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 8dde3723482..c8387844104 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -31,6 +31,8 @@ */ #include #include +#include +#include #include "blocklayout.h" @@ -40,6 +42,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andy Adamson "); MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); +struct dentry *bl_device_pipe; +wait_queue_head_t bl_wq; + static enum pnfs_try_status bl_read_pagelist(struct nfs_read_data *rdata) { @@ -176,13 +181,49 @@ static struct pnfs_layoutdriver_type blocklayout_type = { .pg_write_ops = &bl_pg_write_ops, }; +static const struct rpc_pipe_ops bl_upcall_ops = { + .upcall = bl_pipe_upcall, + .downcall = bl_pipe_downcall, + .destroy_msg = bl_pipe_destroy_msg, +}; + static int __init nfs4blocklayout_init(void) { + struct vfsmount *mnt; + struct path path; int ret; dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); ret = pnfs_register_layoutdriver(&blocklayout_type); + if (ret) + goto out; + + init_waitqueue_head(&bl_wq); + + mnt = rpc_get_mount(); + if (IS_ERR(mnt)) { + ret = PTR_ERR(mnt); + goto out_remove; + } + + ret = vfs_path_lookup(mnt->mnt_root, + mnt, + NFS_PIPE_DIRNAME, 0, &path); + if (ret) + goto out_remove; + + bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL, + &bl_upcall_ops, 0); + if (IS_ERR(bl_device_pipe)) { + ret = PTR_ERR(bl_device_pipe); + goto out_remove; + } +out: + return ret; + +out_remove: + pnfs_unregister_layoutdriver(&blocklayout_type); return ret; } @@ -192,6 +233,7 @@ static void __exit nfs4blocklayout_exit(void) __func__); pnfs_unregister_layoutdriver(&blocklayout_type); + rpc_unlink(bl_device_pipe); } MODULE_ALIAS("nfs-layouttype4-3"); diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 98e2f60c214..dd25f1b3fe1 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -34,8 +34,16 @@ #include #include +#include + #include "../pnfs.h" +struct pnfs_block_dev { + struct list_head bm_node; + struct nfs4_deviceid bm_mdevid; /* associated devid */ + struct block_device *bm_mdev; /* meta device itself */ +}; + enum exstate4 { PNFS_BLOCK_READWRITE_DATA = 0, PNFS_BLOCK_READ_DATA = 1, @@ -88,5 +96,37 @@ static inline struct pnfs_block_layout *BLK_LO2EXT(struct pnfs_layout_hdr *lo) return container_of(lo, struct pnfs_block_layout, bl_layout); } +struct bl_dev_msg { + int status; + uint32_t major, minor; +}; + +struct bl_msg_hdr { + u8 type; + u16 totallen; /* length of entire message, including hdr itself */ +}; + +extern struct dentry *bl_device_pipe; +extern wait_queue_head_t bl_wq; + +#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ +#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/ +#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ +#define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ +#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ + +/* blocklayoutdev.c */ +ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *, + char __user *, size_t); +ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); +void bl_pipe_destroy_msg(struct rpc_pipe_msg *); +struct block_device *nfs4_blkdev_get(dev_t dev); +int nfs4_blkdev_put(struct block_device *bdev); +struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, + struct pnfs_device *dev, + struct list_head *sdlist); +int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, + struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); + void bl_put_extent(struct pnfs_block_extent *be); #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c new file mode 100644 index 00000000000..7e1377fcfdc --- /dev/null +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -0,0 +1,191 @@ +/* + * linux/fs/nfs/blocklayout/blocklayoutdev.c + * + * Device operations for the pnfs nfs4 file layout driver. + * + * Copyright (c) 2006 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson + * Fred Isaman + * + * permission is granted to use, copy, create derivative works and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the university of michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. if + * the above copyright notice or any other identification of the + * university of michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the + * university of michigan as to its fitness for any purpose, and without + * warranty by the university of michigan of any kind, either express + * or implied, including without limitation the implied warranties of + * merchantability and fitness for a particular purpose. the regents + * of the university of michigan shall not be liable for any damages, + * including special, indirect, incidental, or consequential damages, + * with respect to any claim arising out or in connection with the use + * of the software, even if it has been or is hereafter advised of the + * possibility of such damages. + */ +#include +#include /* __bread */ + +#include +#include +#include + +#include "blocklayout.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +/* Open a block_device by device number. */ +struct block_device *nfs4_blkdev_get(dev_t dev) +{ + struct block_device *bd; + + dprintk("%s enter\n", __func__); + bd = blkdev_get_by_dev(dev, FMODE_READ, NULL); + if (IS_ERR(bd)) + goto fail; + return bd; +fail: + dprintk("%s failed to open device : %ld\n", + __func__, PTR_ERR(bd)); + return NULL; +} + +/* + * Release the block device + */ +int nfs4_blkdev_put(struct block_device *bdev) +{ + dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev), + MINOR(bdev->bd_dev)); + return blkdev_put(bdev, FMODE_READ); +} + +/* + * Shouldn't there be a rpc_generic_upcall() to do this for us? + */ +ssize_t bl_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, + char __user *dst, size_t buflen) +{ + char *data = (char *)msg->data + msg->copied; + size_t mlen = min(msg->len - msg->copied, buflen); + unsigned long left; + + left = copy_to_user(dst, data, mlen); + if (left == mlen) { + msg->errno = -EFAULT; + return -EFAULT; + } + + mlen -= left; + msg->copied += mlen; + msg->errno = 0; + return mlen; +} + +static struct bl_dev_msg bl_mount_reply; + +ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, + size_t mlen) +{ + if (mlen != sizeof (struct bl_dev_msg)) + return -EINVAL; + + if (copy_from_user(&bl_mount_reply, src, mlen) != 0) + return -EFAULT; + + wake_up(&bl_wq); + + return mlen; +} + +void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) +{ + if (msg->errno >= 0) + return; + wake_up(&bl_wq); +} + +/* + * Decodes pnfs_block_deviceaddr4 which is XDR encoded in dev->dev_addr_buf. + */ +struct pnfs_block_dev * +nfs4_blk_decode_device(struct nfs_server *server, + struct pnfs_device *dev, + struct list_head *sdlist) +{ + struct pnfs_block_dev *rv = NULL; + struct block_device *bd = NULL; + struct rpc_pipe_msg msg; + struct bl_msg_hdr bl_msg = { + .type = BL_DEVICE_MOUNT, + .totallen = dev->mincount, + }; + uint8_t *dataptr; + DECLARE_WAITQUEUE(wq, current); + struct bl_dev_msg *reply = &bl_mount_reply; + + dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); + dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, + dev->mincount); + + memset(&msg, 0, sizeof(msg)); + msg.data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); + if (!msg.data) { + rv = ERR_PTR(-ENOMEM); + goto out; + } + + memcpy(msg.data, &bl_msg, sizeof(bl_msg)); + dataptr = (uint8_t *) msg.data; + memcpy(&dataptr[sizeof(bl_msg)], dev->area, dev->mincount); + msg.len = sizeof(bl_msg) + dev->mincount; + + dprintk("%s CALLING USERSPACE DAEMON\n", __func__); + add_wait_queue(&bl_wq, &wq); + if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { + remove_wait_queue(&bl_wq, &wq); + goto out; + } + + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&bl_wq, &wq); + + if (reply->status != BL_DEVICE_REQUEST_PROC) { + dprintk("%s failed to open device: %d\n", + __func__, reply->status); + rv = ERR_PTR(-EINVAL); + goto out; + } + + bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor)); + if (IS_ERR(bd)) { + dprintk("%s failed to open device : %ld\n", + __func__, PTR_ERR(bd)); + goto out; + } + + rv = kzalloc(sizeof(*rv), GFP_NOFS); + if (!rv) { + rv = ERR_PTR(-ENOMEM); + goto out; + } + + rv->bm_mdev = bd; + memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); + dprintk("%s Created device %s with bd_block_size %u\n", + __func__, + bd->bd_disk->disk_name, + bd->bd_block_size); + +out: + kfree(msg.data); + return rv; +} diff --git a/fs/nfs/client.c b/fs/nfs/client.c index de00a373f08..5833fbbf59b 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -105,7 +105,7 @@ struct rpc_program nfs_program = { .nrvers = ARRAY_SIZE(nfs_version), .version = nfs_version, .stats = &nfs_rpcstat, - .pipe_dir_name = "/nfs", + .pipe_dir_name = NFS_PIPE_DIRNAME, }; struct rpc_stat nfs_rpcstat = { diff --git a/include/linux/nfs.h b/include/linux/nfs.h index f387919bbc5..8c6ee44914c 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -29,6 +29,8 @@ #define NFS_MNT_VERSION 1 #define NFS_MNT3_VERSION 3 +#define NFS_PIPE_DIRNAME "/nfs" + /* * NFS stats. The good thing with these values is that NFSv3 errors are * a superset of NFSv2 errors (with the exception of NFSERR_WFLUSH which -- cgit v1.2.3-70-g09d2