diff options
Diffstat (limited to 'fs')
112 files changed, 7579 insertions, 1837 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index d78e950402c..a97263be6a9 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -236,10 +236,12 @@ source "fs/nfsd/Kconfig" config LOCKD tristate + depends on FILE_LOCKING config LOCKD_V4 bool depends on NFSD_V3 || NFS_V3 + depends on FILE_LOCKING default y config EXPORTFS diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 1dd96d4406c..47d4a01c539 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -52,6 +52,19 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { .d_delete = anon_inodefs_delete_dentry, }; +/* + * nop .set_page_dirty method so that people can use .page_mkwrite on + * anon inodes. + */ +static int anon_set_page_dirty(struct page *page) +{ + return 0; +}; + +static const struct address_space_operations anon_aops = { + .set_page_dirty = anon_set_page_dirty, +}; + /** * anon_inode_getfd - creates a new file instance by hooking it up to an * anonymous inode, and a dentry that describe the "class" @@ -151,6 +164,8 @@ static struct inode *anon_inode_mkinode(void) inode->i_fop = &anon_inode_fops; + inode->i_mapping->a_ops = &anon_aops; + /* * Mark the inode dirty from the very beginning, * that way it will never be moved to the dirty diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 40381df3486..9fa212b014a 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1340,8 +1340,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus, prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; prstatus->pr_sigpend = p->pending.signal.sig[0]; prstatus->pr_sighold = p->blocked.sig[0]; + rcu_read_lock(); + prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent)); + rcu_read_unlock(); prstatus->pr_pid = task_pid_vnr(p); - prstatus->pr_ppid = task_pid_vnr(p->real_parent); prstatus->pr_pgrp = task_pgrp_vnr(p); prstatus->pr_sid = task_session_vnr(p); if (thread_group_leader(p)) { @@ -1382,8 +1384,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_psargs[i] = ' '; psinfo->pr_psargs[len] = 0; + rcu_read_lock(); + psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent)); + rcu_read_unlock(); psinfo->pr_pid = task_pid_vnr(p); - psinfo->pr_ppid = task_pid_vnr(p->real_parent); psinfo->pr_pgrp = task_pgrp_vnr(p); psinfo->pr_sid = task_session_vnr(p); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index fdb66faa24f..20fbeced472 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1387,8 +1387,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus, prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; prstatus->pr_sigpend = p->pending.signal.sig[0]; prstatus->pr_sighold = p->blocked.sig[0]; + rcu_read_lock(); + prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent)); + rcu_read_unlock(); prstatus->pr_pid = task_pid_vnr(p); - prstatus->pr_ppid = task_pid_vnr(p->real_parent); prstatus->pr_pgrp = task_pgrp_vnr(p); prstatus->pr_sid = task_session_vnr(p); if (thread_group_leader(p)) { @@ -1432,8 +1434,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_psargs[i] = ' '; psinfo->pr_psargs[len] = 0; + rcu_read_lock(); + psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent)); + rcu_read_unlock(); psinfo->pr_pid = task_pid_vnr(p); - psinfo->pr_ppid = task_pid_vnr(p->real_parent); psinfo->pr_pgrp = task_pgrp_vnr(p); psinfo->pr_sid = task_session_vnr(p); diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c5ded5ff72b..c135202c38b 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -94,7 +94,6 @@ #include <linux/atm_tcp.h> #include <linux/sonet.h> #include <linux/atm_suni.h> -#include <linux/mtd/mtd.h> #include <linux/usb.h> #include <linux/usbdevice_fs.h> @@ -1405,46 +1404,6 @@ static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg) #define HIDPGETCONNLIST _IOR('H', 210, int) #define HIDPGETCONNINFO _IOR('H', 211, int) -struct mtd_oob_buf32 { - u_int32_t start; - u_int32_t length; - compat_caddr_t ptr; /* unsigned char* */ -}; - -#define MEMWRITEOOB32 _IOWR('M',3,struct mtd_oob_buf32) -#define MEMREADOOB32 _IOWR('M',4,struct mtd_oob_buf32) - -static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - struct mtd_oob_buf __user *buf = compat_alloc_user_space(sizeof(*buf)); - struct mtd_oob_buf32 __user *buf32 = compat_ptr(arg); - u32 data; - char __user *datap; - unsigned int real_cmd; - int err; - - real_cmd = (cmd == MEMREADOOB32) ? - MEMREADOOB : MEMWRITEOOB; - - if (copy_in_user(&buf->start, &buf32->start, - 2 * sizeof(u32)) || - get_user(data, &buf32->ptr)) - return -EFAULT; - datap = compat_ptr(data); - if (put_user(datap, &buf->ptr)) - return -EFAULT; - - err = sys_ioctl(fd, real_cmd, (unsigned long) buf); - - if (!err) { - if (copy_in_user(&buf32->start, &buf->start, - 2 * sizeof(u32))) - err = -EFAULT; - } - - return err; -} - #ifdef CONFIG_BLOCK struct raw32_config_request { @@ -2426,15 +2385,6 @@ COMPATIBLE_IOCTL(USBDEVFS_SUBMITURB32) COMPATIBLE_IOCTL(USBDEVFS_REAPURB32) COMPATIBLE_IOCTL(USBDEVFS_REAPURBNDELAY32) COMPATIBLE_IOCTL(USBDEVFS_CLEAR_HALT) -/* MTD */ -COMPATIBLE_IOCTL(MEMGETINFO) -COMPATIBLE_IOCTL(MEMERASE) -COMPATIBLE_IOCTL(MEMLOCK) -COMPATIBLE_IOCTL(MEMUNLOCK) -COMPATIBLE_IOCTL(MEMGETREGIONCOUNT) -COMPATIBLE_IOCTL(MEMGETREGIONINFO) -COMPATIBLE_IOCTL(MEMGETBADBLOCK) -COMPATIBLE_IOCTL(MEMSETBADBLOCK) /* NBD */ ULONG_IOCTL(NBD_SET_SOCK) ULONG_IOCTL(NBD_SET_BLKSIZE) @@ -2544,8 +2494,6 @@ COMPATIBLE_IOCTL(JSIOCGBUTTONS) COMPATIBLE_IOCTL(JSIOCGNAME(0)) /* now things that need handlers */ -HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob) -HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob) #ifdef CONFIG_NET HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32) HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 5458e80fc55..085c5c06342 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -98,7 +98,7 @@ struct epoll_filefd { struct nested_call_node { struct list_head llink; void *cookie; - int cpu; + void *ctx; }; /* @@ -317,17 +317,17 @@ static void ep_nested_calls_init(struct nested_calls *ncalls) * @nproc: Nested call core function pointer. * @priv: Opaque data to be passed to the @nproc callback. * @cookie: Cookie to be used to identify this nested call. + * @ctx: This instance context. * * Returns: Returns the code returned by the @nproc callback, or -1 if * the maximum recursion limit has been exceeded. */ static int ep_call_nested(struct nested_calls *ncalls, int max_nests, int (*nproc)(void *, void *, int), void *priv, - void *cookie) + void *cookie, void *ctx) { int error, call_nests = 0; unsigned long flags; - int this_cpu = get_cpu(); struct list_head *lsthead = &ncalls->tasks_call_list; struct nested_call_node *tncur; struct nested_call_node tnode; @@ -340,7 +340,7 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests, * very much limited. */ list_for_each_entry(tncur, lsthead, llink) { - if (tncur->cpu == this_cpu && + if (tncur->ctx == ctx && (tncur->cookie == cookie || ++call_nests > max_nests)) { /* * Ops ... loop detected or maximum nest level reached. @@ -352,7 +352,7 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests, } /* Add the current task and cookie to the list */ - tnode.cpu = this_cpu; + tnode.ctx = ctx; tnode.cookie = cookie; list_add(&tnode.llink, lsthead); @@ -364,10 +364,9 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests, /* Remove the current task from the list */ spin_lock_irqsave(&ncalls->lock, flags); list_del(&tnode.llink); - out_unlock: +out_unlock: spin_unlock_irqrestore(&ncalls->lock, flags); - put_cpu(); return error; } @@ -408,8 +407,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) */ static void ep_poll_safewake(wait_queue_head_t *wq) { + int this_cpu = get_cpu(); + ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, - ep_poll_wakeup_proc, NULL, wq); + ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); + + put_cpu(); } /* @@ -663,7 +666,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) * could re-enter here. */ pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, - ep_poll_readyevents_proc, ep, ep); + ep_poll_readyevents_proc, ep, ep, current); return pollflags != -1 ? pollflags : 0; } diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 003500498c2..6cde970b0a1 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -450,7 +450,7 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child) /* Releases the page */ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, - struct page *page, struct inode *inode) + struct page *page, struct inode *inode, int update_times) { loff_t pos = page_offset(page) + (char *) de - (char *) page_address(page); @@ -465,7 +465,8 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, ext2_set_de_type(de, inode); err = ext2_commit_chunk(page, pos, len); ext2_put_page(page); - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + if (update_times) + dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(dir); } diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index f2e5811936d..d988a718aed 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -111,7 +111,7 @@ extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *, extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); extern int ext2_empty_dir (struct inode *); extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); -extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *); +extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int); /* ialloc.c */ extern struct inode * ext2_new_inode (struct inode *, int); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 90ea17998a7..6524ecaebb7 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -320,7 +320,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, if (!new_de) goto out_dir; inode_inc_link_count(old_inode); - ext2_set_link(new_dir, new_de, new_page, old_inode); + ext2_set_link(new_dir, new_de, new_page, old_inode, 1); new_inode->i_ctime = CURRENT_TIME_SEC; if (dir_de) drop_nlink(new_inode); @@ -352,7 +352,8 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, inode_dec_link_count(old_inode); if (dir_de) { - ext2_set_link(old_inode, dir_de, dir_page, new_dir); + if (old_dir != new_dir) + ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0); inode_dec_link_count(old_dir); } return 0; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index b0248c6d5d4..05dea8132fc 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -820,7 +820,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, while (count < maxblocks && count <= blocks_to_boundary) { ext3_fsblk_t blk; - if (!verify_chain(chain, partial)) { + if (!verify_chain(chain, chain + depth - 1)) { /* * Indirect block might be removed by * truncate while we were reading it. @@ -2374,7 +2374,7 @@ void ext3_truncate(struct inode *inode) struct page *page; if (!ext3_can_truncate(inode)) - return; + goto out_notrans; if (inode->i_size == 0 && ext3_should_writeback_data(inode)) ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE; @@ -2390,7 +2390,7 @@ void ext3_truncate(struct inode *inode) page = grab_cache_page(mapping, inode->i_size >> PAGE_CACHE_SHIFT); if (!page) - return; + goto out_notrans; } handle = start_transaction(inode); @@ -2401,7 +2401,7 @@ void ext3_truncate(struct inode *inode) unlock_page(page); page_cache_release(page); } - return; /* AKPM: return what? */ + goto out_notrans; } last_block = (inode->i_size + blocksize-1) @@ -2525,6 +2525,14 @@ out_stop: ext3_orphan_del(handle, inode); ext3_journal_stop(handle); + return; +out_notrans: + /* + * Delete the inode from orphan list so that it doesn't stay there + * forever and trigger assertion on umount. + */ + if (inode->i_nlink) + ext3_orphan_del(NULL, inode); } static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, @@ -3122,12 +3130,6 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) rc = inode_setattr(inode, attr); - /* If inode_setattr's call to ext3_truncate failed to get a - * transaction handle at all, we need to clean up the in-core - * orphan list manually. */ - if (inode->i_nlink) - ext3_orphan_del(NULL, inode); - if (!rc && (ia_valid & ATTR_MODE)) rc = ext3_acl_chmod(inode); diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 8a0b26340b5..8359e7b3dc8 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -990,7 +990,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, sb->s_id, n_blocks_count); if (sizeof(sector_t) < 8) ext3_warning(sb, __func__, - "CONFIG_LBD not enabled\n"); + "CONFIG_LBDAF not enabled\n"); return -EINVAL; } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 26aa64dee6a..601e881e610 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1812,7 +1812,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) printk(KERN_ERR "EXT3-fs: filesystem on %s:" " too large to mount safely\n", sb->s_id); if (sizeof(sector_t) < 8) - printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not " + printk(KERN_WARNING "EXT3-fs: CONFIG_LBDAF not " "enabled\n"); goto failed_mount; } diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 8a34710ecf4..8867b2a1e5f 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ - ext4_jbd2.o migrate.o mballoc.o block_validity.o + ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cc7d5edc38c..17b9998680e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -352,6 +352,7 @@ struct ext4_new_group_data { /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) +#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) /* * ioctl commands in 32 bit emulation @@ -447,6 +448,15 @@ struct ext4_inode { __le32 i_version_hi; /* high 32 bits for 64-bit version */ }; +struct move_extent { + __u32 reserved; /* should be zero */ + __u32 donor_fd; /* donor file descriptor */ + __u64 orig_start; /* logical start offset in block for orig */ + __u64 donor_start; /* logical start offset in block for donor */ + __u64 len; /* block length to be moved */ + __u64 moved_len; /* moved block length */ +}; +#define MAX_DEFRAG_SIZE ((1UL<<31) - 1) #define EXT4_EPOCH_BITS 2 #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) @@ -674,7 +684,6 @@ struct ext4_inode_info { #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ -#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */ #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ @@ -696,17 +705,10 @@ struct ext4_inode_info { #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ -/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ -#ifndef _LINUX_EXT2_FS_H #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt #define set_opt(o, opt) o |= EXT4_MOUNT_##opt #define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ EXT4_MOUNT_##opt) -#else -#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD -#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT -#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS -#endif #define ext4_set_bit ext2_set_bit #define ext4_set_bit_atomic ext2_set_bit_atomic @@ -824,6 +826,13 @@ struct ext4_super_block { }; #ifdef __KERNEL__ + +/* + * run-time mount flags + */ +#define EXT4_MF_MNTDIR_SAMPLED 0x0001 +#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ + /* * fourth extended-fs super-block data in memory */ @@ -842,7 +851,8 @@ struct ext4_sb_info { struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ struct buffer_head **s_group_desc; - unsigned long s_mount_opt; + unsigned int s_mount_opt; + unsigned int s_mount_flags; ext4_fsblk_t s_sb_block; uid_t s_resuid; gid_t s_resgid; @@ -853,6 +863,7 @@ struct ext4_sb_info { int s_inode_size; int s_first_ino; unsigned int s_inode_readahead_blks; + unsigned int s_inode_goal; spinlock_t s_next_gen_lock; u32 s_next_generation; u32 s_hash_seed[4]; @@ -1305,7 +1316,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo); /* ialloc.c */ -extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); +extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, + const struct qstr *qstr, __u32 goal); extern void ext4_free_inode(handle_t *, struct inode *); extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); extern unsigned long ext4_count_free_inodes(struct super_block *); @@ -1329,7 +1341,7 @@ extern void ext4_discard_preallocations(struct inode *); extern int __init init_ext4_mballoc(void); extern void exit_ext4_mballoc(void); extern void ext4_mb_free_blocks(handle_t *, struct inode *, - unsigned long, unsigned long, int, unsigned long *); + ext4_fsblk_t, unsigned long, int, unsigned long *); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); extern void ext4_mb_update_group_info(struct ext4_group_info *grp, @@ -1647,6 +1659,11 @@ extern int ext4_get_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh, int flags); extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); +/* move_extent.c */ +extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, + __u64 start_orig, __u64 start_donor, + __u64 len, __u64 *moved_len); + /* * Add new method to test wether block and inode bitmaps are properly diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index f0c3ec85bd4..20a84105a10 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -221,12 +221,16 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) } extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); +extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); extern int ext4_extent_tree_init(handle_t *, struct inode *); extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, struct ext4_ext_path *path); +extern int ext4_can_extents_be_merged(struct inode *inode, + struct ext4_extent *ex1, + struct ext4_extent *ex2); extern int ext4_ext_try_to_merge(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2593f748c3a..50322a09bd0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -49,7 +49,7 @@ * ext_pblock: * combine low and high parts of physical block number into ext4_fsblk_t */ -static ext4_fsblk_t ext_pblock(struct ext4_extent *ex) +ext4_fsblk_t ext_pblock(struct ext4_extent *ex) { ext4_fsblk_t block; @@ -1417,7 +1417,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, return err; } -static int +int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2) { diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 588af8c7724..3f1873fef1c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -21,6 +21,8 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> +#include <linux/mount.h> +#include <linux/path.h> #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" @@ -145,6 +147,38 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } +static int ext4_file_open(struct inode * inode, struct file * filp) +{ + struct super_block *sb = inode->i_sb; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct vfsmount *mnt = filp->f_path.mnt; + struct path path; + char buf[64], *cp; + + if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) && + !(sb->s_flags & MS_RDONLY))) { + sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED; + /* + * Sample where the filesystem has been mounted and + * store it in the superblock for sysadmin convenience + * when trying to sort through large numbers of block + * devices or filesystem images. + */ + memset(buf, 0, sizeof(buf)); + path.mnt = mnt->mnt_parent; + path.dentry = mnt->mnt_mountpoint; + path_get(&path); + cp = d_path(&path, buf, sizeof(buf)); + path_put(&path); + if (!IS_ERR(cp)) { + memcpy(sbi->s_es->s_last_mounted, cp, + sizeof(sbi->s_es->s_last_mounted)); + sb->s_dirt = 1; + } + } + return generic_file_open(inode, filp); +} + const struct file_operations ext4_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -156,7 +190,7 @@ const struct file_operations ext4_file_operations = { .compat_ioctl = ext4_compat_ioctl, #endif .mmap = ext4_file_mmap, - .open = generic_file_open, + .open = ext4_file_open, .release = ext4_release_file, .fsync = ext4_sync_file, .splice_read = generic_file_splice_read, diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 5afe4370840..83cf6415f59 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -28,10 +28,12 @@ #include <linux/writeback.h> #include <linux/jbd2.h> #include <linux/blkdev.h> -#include <linux/marker.h> + #include "ext4.h" #include "ext4_jbd2.h" +#include <trace/events/ext4.h> + /* * akpm: A new design for ext4_sync_file(). * @@ -52,9 +54,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) J_ASSERT(ext4_journal_current_handle() == NULL); - trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", - inode->i_sb->s_id, datasync, inode->i_ino, - dentry->d_parent->d_inode->i_ino); + trace_ext4_sync_file(file, dentry, datasync); /* * data=writeback: diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 3743bd849bc..2f645732e3b 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -23,11 +23,14 @@ #include <linux/bitops.h> #include <linux/blkdev.h> #include <asm/byteorder.h> + #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" +#include <trace/events/ext4.h> + /* * ialloc.c contains the inodes allocation and deallocation routines */ @@ -208,11 +211,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) ino = inode->i_ino; ext4_debug("freeing inode %lu\n", ino); - trace_mark(ext4_free_inode, - "dev %s ino %lu mode %d uid %lu gid %lu bocks %llu", - sb->s_id, inode->i_ino, inode->i_mode, - (unsigned long) inode->i_uid, (unsigned long) inode->i_gid, - (unsigned long long) inode->i_blocks); + trace_ext4_free_inode(inode); /* * Note: we must free any quota before locking the superblock, @@ -471,7 +470,8 @@ void get_orlov_stats(struct super_block *sb, ext4_group_t g, */ static int find_group_orlov(struct super_block *sb, struct inode *parent, - ext4_group_t *group, int mode) + ext4_group_t *group, int mode, + const struct qstr *qstr) { ext4_group_t parent_group = EXT4_I(parent)->i_block_group; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -486,6 +486,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, struct ext4_group_desc *desc; struct orlov_stats stats; int flex_size = ext4_flex_bg_size(sbi); + struct dx_hash_info hinfo; ngroups = real_ngroups; if (flex_size > 1) { @@ -507,7 +508,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, int best_ndir = inodes_per_group; int ret = -1; - get_random_bytes(&grp, sizeof(grp)); + if (qstr) { + hinfo.hash_version = DX_HASH_HALF_MD4; + hinfo.seed = sbi->s_hash_seed; + ext4fs_dirhash(qstr->name, qstr->len, &hinfo); + grp = hinfo.hash; + } else + get_random_bytes(&grp, sizeof(grp)); parent_group = (unsigned)grp % ngroups; for (i = 0; i < ngroups; i++) { g = (parent_group + i) % ngroups; @@ -650,7 +657,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, *group = parent_group + flex_size; if (*group > ngroups) *group = 0; - return find_group_orlov(sb, parent, group, mode); + return find_group_orlov(sb, parent, group, mode, 0); } /* @@ -791,7 +798,8 @@ err_ret: * For other inodes, search forward from the parent directory's block * group to find a free inode. */ -struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) +struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, + const struct qstr *qstr, __u32 goal) { struct super_block *sb; struct buffer_head *inode_bitmap_bh = NULL; @@ -815,14 +823,23 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) sb = dir->i_sb; ngroups = ext4_get_groups_count(sb); - trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, - dir->i_ino, mode); + trace_ext4_request_inode(dir, mode); inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); ei = EXT4_I(inode); sbi = EXT4_SB(sb); + if (!goal) + goal = sbi->s_inode_goal; + + if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) { + group = (goal - 1) / EXT4_INODES_PER_GROUP(sb); + ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb); + ret2 = 0; + goto got_group; + } + if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { ret2 = find_group_flex(sb, dir, &group); if (ret2 == -1) { @@ -841,7 +858,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) if (test_opt(sb, OLDALLOC)) ret2 = find_group_dir(sb, dir, &group); else - ret2 = find_group_orlov(sb, dir, &group, mode); + ret2 = find_group_orlov(sb, dir, &group, mode, qstr); } else ret2 = find_group_other(sb, dir, &group, mode); @@ -851,7 +868,7 @@ got_group: if (ret2 == -1) goto out; - for (i = 0; i < ngroups; i++) { + for (i = 0; i < ngroups; i++, ino = 0) { err = -EIO; gdp = ext4_get_group_desc(sb, group, &group_desc_bh); @@ -863,8 +880,6 @@ got_group: if (!inode_bitmap_bh) goto fail; - ino = 0; - repeat_in_this_group: ino = ext4_find_next_zero_bit((unsigned long *) inode_bitmap_bh->b_data, @@ -1047,8 +1062,7 @@ got: } ext4_debug("allocating inode %lu\n", inode->i_ino); - trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d", - sb->s_id, inode->i_ino, dir->i_ino, mode); + trace_ext4_allocate_inode(inode, dir, mode); goto really_out; fail: ext4_std_error(sb, err); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 875db944b22..7c17ae275af 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -37,11 +37,14 @@ #include <linux/namei.h> #include <linux/uio.h> #include <linux/bio.h> + #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" #include "ext4_extents.h" +#include <trace/events/ext4.h> + #define MPAGE_DA_EXTENT_TAIL 0x01 static inline int ext4_begin_ordered_truncate(struct inode *inode, @@ -78,7 +81,7 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) * If the handle isn't valid we're not journaling so there's nothing to do. */ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t blocknr) + struct buffer_head *bh, ext4_fsblk_t blocknr) { int err; @@ -90,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, BUFFER_TRACE(bh, "enter"); jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " - "data mode %lx\n", + "data mode %x\n", bh, is_metadata, inode->i_mode, test_opt(inode->i_sb, DATA_FLAGS)); @@ -329,8 +332,8 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) */ static int ext4_block_to_path(struct inode *inode, - ext4_lblk_t i_block, - ext4_lblk_t offsets[4], int *boundary) + ext4_lblk_t i_block, + ext4_lblk_t offsets[4], int *boundary) { int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); @@ -362,9 +365,9 @@ static int ext4_block_to_path(struct inode *inode, final = ptrs; } else { ext4_warning(inode->i_sb, "ext4_block_to_path", - "block %lu > max in inode %lu", - i_block + direct_blocks + - indirect_blocks + double_blocks, inode->i_ino); + "block %lu > max in inode %lu", + i_block + direct_blocks + + indirect_blocks + double_blocks, inode->i_ino); } if (boundary) *boundary = final - 1 - (i_block & (ptrs - 1)); @@ -379,25 +382,25 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, while (bref < p+max) { blk = le32_to_cpu(*bref++); - if (blk && - unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), + if (blk && + unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), blk, 1))) { ext4_error(inode->i_sb, function, "invalid block reference %u " "in inode #%lu", blk, inode->i_ino); - return -EIO; - } - } - return 0; + return -EIO; + } + } + return 0; } #define ext4_check_indirect_blockref(inode, bh) \ - __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ + __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ EXT4_ADDR_PER_BLOCK((inode)->i_sb)) #define ext4_check_inode_blockref(inode) \ - __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ + __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ EXT4_NDIR_BLOCKS) /** @@ -447,7 +450,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, bh = sb_getblk(sb, le32_to_cpu(p->key)); if (unlikely(!bh)) goto failure; - + if (!bh_uptodate_or_lock(bh)) { if (bh_submit_read(bh) < 0) { put_bh(bh); @@ -459,7 +462,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, goto failure; } } - + add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); /* Reader: end */ if (!p->key) @@ -552,7 +555,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) * returns it. */ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, - Indirect *partial) + Indirect *partial) { /* * XXX need to get goal block from mballoc's data structures @@ -574,7 +577,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, * direct and indirect blocks. */ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, - int blocks_to_boundary) + int blocks_to_boundary) { unsigned int count = 0; @@ -610,9 +613,9 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, * direct blocks */ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, ext4_fsblk_t goal, - int indirect_blks, int blks, - ext4_fsblk_t new_blocks[4], int *err) + ext4_lblk_t iblock, ext4_fsblk_t goal, + int indirect_blks, int blks, + ext4_fsblk_t new_blocks[4], int *err) { struct ext4_allocation_request ar; int target, i; @@ -683,10 +686,10 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, } if (!*err) { if (target == blks) { - /* - * save the new block number - * for the first direct block - */ + /* + * save the new block number + * for the first direct block + */ new_blocks[index] = current_block; } blk_allocated += ar.len; @@ -728,9 +731,9 @@ failed_out: * as described above and return 0. */ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, int indirect_blks, - int *blks, ext4_fsblk_t goal, - ext4_lblk_t *offsets, Indirect *branch) + ext4_lblk_t iblock, int indirect_blks, + int *blks, ext4_fsblk_t goal, + ext4_lblk_t *offsets, Indirect *branch) { int blocksize = inode->i_sb->s_blocksize; int i, n = 0; @@ -777,7 +780,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, * the chain to point to the new allocated * data blocks numbers */ - for (i=1; i < num; i++) + for (i = 1; i < num; i++) *(branch[n].p + i) = cpu_to_le32(++current_block); } BUFFER_TRACE(bh, "marking uptodate"); @@ -820,7 +823,8 @@ failed: * chain to new block and return 0. */ static int ext4_splice_branch(handle_t *handle, struct inode *inode, - ext4_lblk_t block, Indirect *where, int num, int blks) + ext4_lblk_t block, Indirect *where, int num, + int blks) { int i; int err = 0; @@ -852,10 +856,6 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, } /* We are done with atomic stuff, now do the rest of housekeeping */ - - inode->i_ctime = ext4_current_time(inode); - ext4_mark_inode_dirty(handle, inode); - /* had we spliced it onto indirect block? */ if (where->bh) { /* @@ -874,8 +874,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, } else { /* * OK, we spliced it into the inode itself on a direct block. - * Inode was dirtied above. */ + ext4_mark_inode_dirty(handle, inode); jbd_debug(5, "splicing direct\n"); } return err; @@ -921,9 +921,9 @@ err_out: * blocks. */ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, unsigned int maxblocks, - struct buffer_head *bh_result, - int flags) + ext4_lblk_t iblock, unsigned int maxblocks, + struct buffer_head *bh_result, + int flags) { int err = -EIO; ext4_lblk_t offsets[4]; @@ -939,7 +939,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); depth = ext4_block_to_path(inode, iblock, offsets, - &blocks_to_boundary); + &blocks_to_boundary); if (depth == 0) goto out; @@ -987,8 +987,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, * Block out ext4_truncate while we alter the tree */ err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, - &count, goal, - offsets + (partial - chain), partial); + &count, goal, + offsets + (partial - chain), partial); /* * The ext4_splice_branch call will free and forget any buffers @@ -999,8 +999,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, */ if (!err) err = ext4_splice_branch(handle, inode, iblock, - partial, indirect_blks, count); - else + partial, indirect_blks, count); + else goto cleanup; set_buffer_new(bh_result); @@ -1172,7 +1172,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, up_read((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && buffer_mapped(bh)) { - int ret = check_block_validity(inode, block, + int ret = check_block_validity(inode, block, bh->b_blocknr, retval); if (ret != 0) return ret; @@ -1254,7 +1254,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, up_write((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && buffer_mapped(bh)) { - int ret = check_block_validity(inode, block, + int ret = check_block_validity(inode, block, bh->b_blocknr, retval); if (ret != 0) return ret; @@ -1405,8 +1405,7 @@ static int walk_page_buffers(handle_t *handle, for (bh = head, block_start = 0; ret == 0 && (bh != head || !block_start); - block_start = block_end, bh = next) - { + block_start = block_end, bh = next) { next = bh->b_this_page; block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { @@ -1447,7 +1446,7 @@ static int walk_page_buffers(handle_t *handle, * write. */ static int do_journal_get_write_access(handle_t *handle, - struct buffer_head *bh) + struct buffer_head *bh) { if (!buffer_mapped(bh) || buffer_freed(bh)) return 0; @@ -1455,27 +1454,24 @@ static int do_journal_get_write_access(handle_t *handle, } static int ext4_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { struct inode *inode = mapping->host; int ret, needed_blocks; handle_t *handle; int retries = 0; struct page *page; - pgoff_t index; + pgoff_t index; unsigned from, to; - trace_mark(ext4_write_begin, - "dev %s ino %lu pos %llu len %u flags %u", - inode->i_sb->s_id, inode->i_ino, - (unsigned long long) pos, len, flags); + trace_ext4_write_begin(inode, pos, len, flags); /* * Reserve one block more for addition to orphan list in case * we allocate blocks but write fails for some reason */ needed_blocks = ext4_writepage_trans_blocks(inode) + 1; - index = pos >> PAGE_CACHE_SHIFT; + index = pos >> PAGE_CACHE_SHIFT; from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; @@ -1523,7 +1519,7 @@ retry: ext4_journal_stop(handle); if (pos + len > inode->i_size) { vmtruncate(inode, inode->i_size); - /* + /* * If vmtruncate failed early the inode might * still be on the orphan list; we need to * make sure the inode is removed from the @@ -1550,9 +1546,9 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) } static int ext4_generic_write_end(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { int i_size_changed = 0; struct inode *inode = mapping->host; @@ -1603,18 +1599,15 @@ static int ext4_generic_write_end(struct file *file, * buffers are managed internally. */ static int ext4_ordered_write_end(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext4_journal_current_handle(); struct inode *inode = mapping->host; int ret = 0, ret2; - trace_mark(ext4_ordered_write_end, - "dev %s ino %lu pos %llu len %u copied %u", - inode->i_sb->s_id, inode->i_ino, - (unsigned long long) pos, len, copied); + trace_ext4_ordered_write_end(inode, pos, len, copied); ret = ext4_jbd2_file_inode(handle, inode); if (ret == 0) { @@ -1636,7 +1629,7 @@ static int ext4_ordered_write_end(struct file *file, if (pos + len > inode->i_size) { vmtruncate(inode, inode->i_size); - /* + /* * If vmtruncate failed early the inode might still be * on the orphan list; we need to make sure the inode * is removed from the orphan list in that case. @@ -1650,18 +1643,15 @@ static int ext4_ordered_write_end(struct file *file, } static int ext4_writeback_write_end(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext4_journal_current_handle(); struct inode *inode = mapping->host; int ret = 0, ret2; - trace_mark(ext4_writeback_write_end, - "dev %s ino %lu pos %llu len %u copied %u", - inode->i_sb->s_id, inode->i_ino, - (unsigned long long) pos, len, copied); + trace_ext4_writeback_write_end(inode, pos, len, copied); ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, page, fsdata); copied = ret2; @@ -1681,7 +1671,7 @@ static int ext4_writeback_write_end(struct file *file, if (pos + len > inode->i_size) { vmtruncate(inode, inode->i_size); - /* + /* * If vmtruncate failed early the inode might still be * on the orphan list; we need to make sure the inode * is removed from the orphan list in that case. @@ -1694,9 +1684,9 @@ static int ext4_writeback_write_end(struct file *file, } static int ext4_journalled_write_end(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext4_journal_current_handle(); struct inode *inode = mapping->host; @@ -1705,10 +1695,7 @@ static int ext4_journalled_write_end(struct file *file, unsigned from, to; loff_t new_i_size; - trace_mark(ext4_journalled_write_end, - "dev %s ino %lu pos %llu len %u copied %u", - inode->i_sb->s_id, inode->i_ino, - (unsigned long long) pos, len, copied); + trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; @@ -1747,7 +1734,7 @@ static int ext4_journalled_write_end(struct file *file, ret = ret2; if (pos + len > inode->i_size) { vmtruncate(inode, inode->i_size); - /* + /* * If vmtruncate failed early the inode might still be * on the orphan list; we need to make sure the inode * is removed from the orphan list in that case. @@ -1854,7 +1841,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) } static void ext4_da_page_release_reservation(struct page *page, - unsigned long offset) + unsigned long offset) { int to_release = 0; struct buffer_head *head, *bh; @@ -2554,9 +2541,7 @@ static int ext4_da_writepage(struct page *page, struct buffer_head *page_bufs; struct inode *inode = page->mapping->host; - trace_mark(ext4_da_writepage, - "dev %s ino %lu page_index %lu", - inode->i_sb->s_id, inode->i_ino, page->index); + trace_ext4_da_writepage(inode, page); size = i_size_read(inode); if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; @@ -2667,19 +2652,7 @@ static int ext4_da_writepages(struct address_space *mapping, int needed_blocks, ret = 0, nr_to_writebump = 0; struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); - trace_mark(ext4_da_writepages, - "dev %s ino %lu nr_t_write %ld " - "pages_skipped %ld range_start %llu " - "range_end %llu nonblocking %d " - "for_kupdate %d for_reclaim %d " - "for_writepages %d range_cyclic %d", - inode->i_sb->s_id, inode->i_ino, - wbc->nr_to_write, wbc->pages_skipped, - (unsigned long long) wbc->range_start, - (unsigned long long) wbc->range_end, - wbc->nonblocking, wbc->for_kupdate, - wbc->for_reclaim, wbc->for_writepages, - wbc->range_cyclic); + trace_ext4_da_writepages(inode, wbc); /* * No pages to write? This is mainly a kludge to avoid starting @@ -2693,13 +2666,13 @@ static int ext4_da_writepages(struct address_space *mapping, * If the filesystem has aborted, it is read-only, so return * right away instead of dumping stack traces later on that * will obscure the real source of the problem. We test - * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because + * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because * the latter could be true if the filesystem is mounted * read-only, and in that case, ext4_da_writepages should * *never* be called, so if that ever happens, we would want * the stack trace. */ - if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT)) + if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) return -EROFS; /* @@ -2845,14 +2818,7 @@ out_writepages: if (!no_nrwrite_index_update) wbc->no_nrwrite_index_update = 0; wbc->nr_to_write -= nr_to_writebump; - trace_mark(ext4_da_writepage_result, - "dev %s ino %lu ret %d pages_written %d " - "pages_skipped %ld congestion %d " - "more_io %d no_nrwrite_index_update %d", - inode->i_sb->s_id, inode->i_ino, ret, - pages_written, wbc->pages_skipped, - wbc->encountered_congestion, wbc->more_io, - wbc->no_nrwrite_index_update); + trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); return ret; } @@ -2884,8 +2850,8 @@ static int ext4_nonda_switch(struct super_block *sb) } static int ext4_da_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { int ret, retries = 0; struct page *page; @@ -2904,11 +2870,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, len, flags, pagep, fsdata); } *fsdata = (void *)0; - - trace_mark(ext4_da_write_begin, - "dev %s ino %lu pos %llu len %u flags %u", - inode->i_sb->s_id, inode->i_ino, - (unsigned long long) pos, len, flags); + trace_ext4_da_write_begin(inode, pos, len, flags); retry: /* * With delayed allocation, we don't log the i_disksize update @@ -2959,7 +2921,7 @@ out: * when write to the end of file but not require block allocation */ static int ext4_da_should_update_i_disksize(struct page *page, - unsigned long offset) + unsigned long offset) { struct buffer_head *bh; struct inode *inode = page->mapping->host; @@ -2978,9 +2940,9 @@ static int ext4_da_should_update_i_disksize(struct page *page, } static int ext4_da_write_end(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { struct inode *inode = mapping->host; int ret = 0, ret2; @@ -3001,10 +2963,7 @@ static int ext4_da_write_end(struct file *file, } } - trace_mark(ext4_da_write_end, - "dev %s ino %lu pos %llu len %u copied %u", - inode->i_sb->s_id, inode->i_ino, - (unsigned long long) pos, len, copied); + trace_ext4_da_write_end(inode, pos, len, copied); start = pos & (PAGE_CACHE_SIZE - 1); end = start + copied - 1; @@ -3081,7 +3040,7 @@ int ext4_alloc_da_blocks(struct inode *inode) * not strictly speaking necessary (and for users of * laptop_mode, not even desirable). However, to do otherwise * would require replicating code paths in: - * + * * ext4_da_writepages() -> * write_cache_pages() ---> (via passed in callback function) * __mpage_da_writepage() --> @@ -3101,7 +3060,7 @@ int ext4_alloc_da_blocks(struct inode *inode) * write out the pages, but rather only collect contiguous * logical block extents, call the multi-block allocator, and * then update the buffer heads with the block allocations. - * + * * For now, though, we'll cheat by calling filemap_flush(), * which will map the blocks, and start the I/O, but not * actually wait for the I/O to complete. @@ -3237,7 +3196,7 @@ static int bput_one(handle_t *handle, struct buffer_head *bh) * */ static int __ext4_normal_writepage(struct page *page, - struct writeback_control *wbc) + struct writeback_control *wbc) { struct inode *inode = page->mapping->host; @@ -3249,15 +3208,13 @@ static int __ext4_normal_writepage(struct page *page, } static int ext4_normal_writepage(struct page *page, - struct writeback_control *wbc) + struct writeback_control *wbc) { struct inode *inode = page->mapping->host; loff_t size = i_size_read(inode); loff_t len; - trace_mark(ext4_normal_writepage, - "dev %s ino %lu page_index %lu", - inode->i_sb->s_id, inode->i_ino, page->index); + trace_ext4_normal_writepage(inode, page); J_ASSERT(PageLocked(page)); if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; @@ -3287,7 +3244,7 @@ static int ext4_normal_writepage(struct page *page, } static int __ext4_journalled_writepage(struct page *page, - struct writeback_control *wbc) + struct writeback_control *wbc) { struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; @@ -3337,15 +3294,13 @@ out: } static int ext4_journalled_writepage(struct page *page, - struct writeback_control *wbc) + struct writeback_control *wbc) { struct inode *inode = page->mapping->host; loff_t size = i_size_read(inode); loff_t len; - trace_mark(ext4_journalled_writepage, - "dev %s ino %lu page_index %lu", - inode->i_sb->s_id, inode->i_ino, page->index); + trace_ext4_journalled_writepage(inode, page); J_ASSERT(PageLocked(page)); if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; @@ -3442,8 +3397,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) * VFS code falls back into buffered path in that case so we are safe. */ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -3763,7 +3718,8 @@ static inline int all_zeroes(__le32 *p, __le32 *q) * (no partially truncated stuff there). */ static Indirect *ext4_find_shared(struct inode *inode, int depth, - ext4_lblk_t offsets[4], Indirect chain[4], __le32 *top) + ext4_lblk_t offsets[4], Indirect chain[4], + __le32 *top) { Indirect *partial, *p; int k, err; @@ -3819,8 +3775,10 @@ no_top: * than `count' because there can be holes in there. */ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t block_to_free, - unsigned long count, __le32 *first, __le32 *last) + struct buffer_head *bh, + ext4_fsblk_t block_to_free, + unsigned long count, __le32 *first, + __le32 *last) { __le32 *p; if (try_to_extend_transaction(handle, inode)) { @@ -3837,10 +3795,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, } /* - * Any buffers which are on the journal will be in memory. We find - * them on the hash table so jbd2_journal_revoke() will run jbd2_journal_forget() - * on them. We've already detached each block from the file, so - * bforget() in jbd2_journal_forget() should be safe. + * Any buffers which are on the journal will be in memory. We + * find them on the hash table so jbd2_journal_revoke() will + * run jbd2_journal_forget() on them. We've already detached + * each block from the file, so bforget() in + * jbd2_journal_forget() should be safe. * * AKPM: turn on bforget in jbd2_journal_forget()!!! */ @@ -4212,7 +4171,7 @@ void ext4_truncate(struct inode *inode) (__le32*)partial->bh->b_data+addr_per_block, (chain+n-1) - partial); BUFFER_TRACE(partial->bh, "call brelse"); - brelse (partial->bh); + brelse(partial->bh); partial--; } do_indirects: @@ -4453,8 +4412,9 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei) if (flags & S_DIRSYNC) ei->i_flags |= EXT4_DIRSYNC_FL; } + static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, - struct ext4_inode_info *ei) + struct ext4_inode_info *ei) { blkcnt_t i_blocks ; struct inode *inode = &(ei->vfs_inode); @@ -4569,7 +4529,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) - ei->i_state |= EXT4_STATE_XATTR; + ei->i_state |= EXT4_STATE_XATTR; } } else ei->i_extra_isize = 0; @@ -4588,7 +4548,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = 0; if (ei->i_file_acl && - ((ei->i_file_acl < + ((ei->i_file_acl < (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + EXT4_SB(sb)->s_gdb_count)) || (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { @@ -4603,15 +4563,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) !ext4_inode_is_fast_symlink(inode))) /* Validate extent which is part of inode */ ret = ext4_ext_check_inode(inode); - } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || (S_ISLNK(inode->i_mode) && !ext4_inode_is_fast_symlink(inode))) { - /* Validate block references which are part of inode */ + /* Validate block references which are part of inode */ ret = ext4_check_inode_blockref(inode); } if (ret) { - brelse(bh); - goto bad_inode; + brelse(bh); + goto bad_inode; } if (S_ISREG(inode->i_mode)) { @@ -4642,7 +4602,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) } else { brelse(bh); ret = -EIO; - ext4_error(inode->i_sb, __func__, + ext4_error(inode->i_sb, __func__, "bogus i_mode (%o) for inode=%lu", inode->i_mode, inode->i_ino); goto bad_inode; @@ -4795,8 +4755,9 @@ static int ext4_do_update_inode(handle_t *handle, cpu_to_le32(new_encode_dev(inode->i_rdev)); raw_inode->i_block[2] = 0; } - } else for (block = 0; block < EXT4_N_BLOCKS; block++) - raw_inode->i_block[block] = ei->i_data[block]; + } else + for (block = 0; block < EXT4_N_BLOCKS; block++) + raw_inode->i_block[block] = ei->i_data[block]; raw_inode->i_disk_version = cpu_to_le32(inode->i_version); if (ei->i_extra_isize) { @@ -5150,7 +5111,7 @@ int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) * Give this, we know that the caller already has write access to iloc->bh. */ int ext4_mark_iloc_dirty(handle_t *handle, - struct inode *inode, struct ext4_iloc *iloc) + struct inode *inode, struct ext4_iloc *iloc) { int err = 0; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 91e75f7a9e7..bb415408fdb 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -14,6 +14,7 @@ #include <linux/compat.h> #include <linux/smp_lock.h> #include <linux/mount.h> +#include <linux/file.h> #include <asm/uaccess.h> #include "ext4_jbd2.h" #include "ext4.h" @@ -213,6 +214,41 @@ setversion_out: return err; } + + case EXT4_IOC_MOVE_EXT: { + struct move_extent me; + struct file *donor_filp; + int err; + + if (copy_from_user(&me, + (struct move_extent __user *)arg, sizeof(me))) + return -EFAULT; + + donor_filp = fget(me.donor_fd); + if (!donor_filp) + return -EBADF; + + if (!capable(CAP_DAC_OVERRIDE)) { + if ((current->real_cred->fsuid != inode->i_uid) || + !(inode->i_mode & S_IRUSR) || + !(donor_filp->f_dentry->d_inode->i_mode & + S_IRUSR)) { + fput(donor_filp); + return -EACCES; + } + } + + err = ext4_move_extents(filp, donor_filp, me.orig_start, + me.donor_start, me.len, &me.moved_len); + fput(donor_filp); + + if (!err) + if (copy_to_user((struct move_extent *)arg, + &me, sizeof(me))) + return -EFAULT; + return err; + } + case EXT4_IOC_GROUP_ADD: { struct ext4_new_group_data input; struct super_block *sb = inode->i_sb; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index ed8482e22c0..519a0a686d9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -22,6 +22,8 @@ */ #include "mballoc.h" +#include <trace/events/ext4.h> + /* * MUSTDO: * - test ext4_ext_search_left() and ext4_ext_search_right() @@ -340,8 +342,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, ext4_group_t group); static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); - - static inline void *mb_correct_addr_and_bit(int *bit, void *addr) { #if BITS_PER_LONG == 64 @@ -2859,9 +2859,8 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) + entry->start_blk + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", - sb->s_id, (unsigned long long) discard_block, - entry->count); + trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, + entry->count); sb_issue_discard(sb, discard_block, entry->count); kmem_cache_free(ext4_free_ext_cachep, entry); @@ -3629,10 +3628,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) mb_debug("new inode pa %p: %llu/%u for %u\n", pa, pa->pa_pstart, pa->pa_len, pa->pa_lstart); - trace_mark(ext4_mb_new_inode_pa, - "dev %s ino %lu pstart %llu len %u lstart %u", - sb->s_id, ac->ac_inode->i_ino, - pa->pa_pstart, pa->pa_len, pa->pa_lstart); + trace_ext4_mb_new_inode_pa(ac, pa); ext4_mb_use_inode_pa(ac, pa); atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); @@ -3691,9 +3687,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) pa->pa_type = MB_GROUP_PA; mb_debug("new group pa %p: %llu/%u for %u\n", pa, - pa->pa_pstart, pa->pa_len, pa->pa_lstart); - trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u", - sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart); + pa->pa_pstart, pa->pa_len, pa->pa_lstart); + trace_ext4_mb_new_group_pa(ac, pa); ext4_mb_use_group_pa(ac, pa); atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); @@ -3783,10 +3778,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, ext4_mb_store_history(ac); } - trace_mark(ext4_mb_release_inode_pa, - "dev %s ino %lu block %llu count %u", - sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit, - next - bit); + trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit, + next - bit); mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); bit = next + 1; } @@ -3820,8 +3813,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, if (ac) ac->ac_op = EXT4_MB_HISTORY_DISCARD; - trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d", - sb->s_id, pa->pa_pstart, pa->pa_len); + trace_ext4_mb_release_group_pa(ac, pa); BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); BUG_ON(group != e4b->bd_group && pa->pa_len != 0); @@ -3889,6 +3881,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, INIT_LIST_HEAD(&list); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + if (ac) + ac->ac_sb = sb; repeat: ext4_lock_group(sb, group); list_for_each_entry_safe(pa, tmp, @@ -3987,12 +3981,15 @@ void ext4_discard_preallocations(struct inode *inode) } mb_debug("discard preallocation for inode %lu\n", inode->i_ino); - trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id, - inode->i_ino); + trace_ext4_discard_preallocations(inode); INIT_LIST_HEAD(&list); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + if (ac) { + ac->ac_sb = sb; + ac->ac_inode = inode; + } repeat: /* first, collect all pa's in the inode */ spin_lock(&ei->i_prealloc_lock); @@ -4276,6 +4273,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, INIT_LIST_HEAD(&discard_list); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + if (ac) + ac->ac_sb = sb; spin_lock(&lg->lg_prealloc_lock); list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], @@ -4445,8 +4444,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) int ret; int freed = 0; - trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", - sb->s_id, needed); + trace_ext4_mb_discard_preallocations(sb, needed); for (i = 0; i < ngroups && needed > 0; i++) { ret = ext4_mb_discard_group_preallocations(sb, i, needed); freed += ret; @@ -4475,17 +4473,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, sb = ar->inode->i_sb; sbi = EXT4_SB(sb); - trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu " - "lblk %llu goal %llu lleft %llu lright %llu " - "pleft %llu pright %llu ", - sb->s_id, ar->flags, ar->len, - ar->inode ? ar->inode->i_ino : 0, - (unsigned long long) ar->logical, - (unsigned long long) ar->goal, - (unsigned long long) ar->lleft, - (unsigned long long) ar->lright, - (unsigned long long) ar->pleft, - (unsigned long long) ar->pright); + trace_ext4_request_blocks(ar); /* * For delayed allocation, we could skip the ENOSPC and @@ -4521,7 +4509,10 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, } ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); - if (!ac) { + if (ac) { + ac->ac_sb = sb; + ac->ac_inode = ar->inode; + } else { ar->len = 0; *errp = -ENOMEM; goto out1; @@ -4594,18 +4585,7 @@ out3: reserv_blks); } - trace_mark(ext4_allocate_blocks, - "dev %s block %llu flags %u len %u ino %lu " - "logical %llu goal %llu lleft %llu lright %llu " - "pleft %llu pright %llu ", - sb->s_id, (unsigned long long) block, - ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0, - (unsigned long long) ar->logical, - (unsigned long long) ar->goal, - (unsigned long long) ar->lleft, - (unsigned long long) ar->lright, - (unsigned long long) ar->pleft, - (unsigned long long) ar->pright); + trace_ext4_allocate_blocks(ar, (unsigned long long)block); return block; } @@ -4709,7 +4689,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, * Main entry point into mballoc to free blocks */ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, - unsigned long block, unsigned long count, + ext4_fsblk_t block, unsigned long count, int metadata, unsigned long *freed) { struct buffer_head *bitmap_bh = NULL; @@ -4735,15 +4715,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, block + count > ext4_blocks_count(es)) { ext4_error(sb, __func__, "Freeing blocks not in datazone - " - "block = %lu, count = %lu", block, count); + "block = %llu, count = %lu", block, count); goto error_return; } - ext4_debug("freeing block %lu\n", block); - trace_mark(ext4_free_blocks, - "dev %s block %llu count %lu metadata %d ino %lu", - sb->s_id, (unsigned long long) block, count, metadata, - inode ? inode->i_ino : 0); + ext4_debug("freeing block %llu\n", block); + trace_ext4_free_blocks(inode, block, count, metadata); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); if (ac) { @@ -4784,7 +4761,7 @@ do_more: ext4_error(sb, __func__, "Freeing blocks in system zone - " - "Block = %lu, count = %lu", block, count); + "Block = %llu, count = %lu", block, count); /* err = 0. ext4_std_error should be a no op */ goto error_return; } diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 75e34f69215..c96bb19f58f 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -19,7 +19,6 @@ #include <linux/seq_file.h> #include <linux/version.h> #include <linux/blkdev.h> -#include <linux/marker.h> #include <linux/mutex.h> #include "ext4_jbd2.h" #include "ext4.h" diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index fe64d9f7985..313a50b3974 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -458,6 +458,7 @@ int ext4_ext_migrate(struct inode *inode) struct inode *tmp_inode = NULL; struct list_blocks_struct lb; unsigned long max_entries; + __u32 goal; /* * If the filesystem does not support extents, or the inode @@ -483,9 +484,10 @@ int ext4_ext_migrate(struct inode *inode) retval = PTR_ERR(handle); return retval; } - tmp_inode = ext4_new_inode(handle, - inode->i_sb->s_root->d_inode, - S_IFREG); + goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * + EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; + tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, + S_IFREG, 0, goal); if (IS_ERR(tmp_inode)) { retval = -ENOMEM; ext4_journal_stop(handle); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c new file mode 100644 index 00000000000..bbf2dd9404d --- /dev/null +++ b/fs/ext4/move_extent.c @@ -0,0 +1,1320 @@ +/* + * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd. + * Written by Takashi Sato <t-sato@yk.jp.nec.com> + * Akira Fujita <a-fujita@rs.jp.nec.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/fs.h> +#include <linux/quotaops.h> +#include "ext4_jbd2.h" +#include "ext4_extents.h" +#include "ext4.h" + +#define get_ext_path(path, inode, block, ret) \ + do { \ + path = ext4_ext_find_extent(inode, block, path); \ + if (IS_ERR(path)) { \ + ret = PTR_ERR(path); \ + path = NULL; \ + } \ + } while (0) + +/** + * copy_extent_status - Copy the extent's initialization status + * + * @src: an extent for getting initialize status + * @dest: an extent to be set the status + */ +static void +copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest) +{ + if (ext4_ext_is_uninitialized(src)) + ext4_ext_mark_uninitialized(dest); + else + dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest)); +} + +/** + * mext_next_extent - Search for the next extent and set it to "extent" + * + * @inode: inode which is searched + * @path: this will obtain data for the next extent + * @extent: pointer to the next extent we have just gotten + * + * Search the next extent in the array of ext4_ext_path structure (@path) + * and set it to ext4_extent structure (@extent). In addition, the member of + * @path (->p_ext) also points the next extent. Return 0 on success, 1 if + * ext4_ext_path structure refers to the last extent, or a negative error + * value on failure. + */ +static int +mext_next_extent(struct inode *inode, struct ext4_ext_path *path, + struct ext4_extent **extent) +{ + int ppos, leaf_ppos = path->p_depth; + + ppos = leaf_ppos; + if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { + /* leaf block */ + *extent = ++path[ppos].p_ext; + return 0; + } + + while (--ppos >= 0) { + if (EXT_LAST_INDEX(path[ppos].p_hdr) > + path[ppos].p_idx) { + int cur_ppos = ppos; + + /* index block */ + path[ppos].p_idx++; + path[ppos].p_block = idx_pblock(path[ppos].p_idx); + if (path[ppos+1].p_bh) + brelse(path[ppos+1].p_bh); + path[ppos+1].p_bh = + sb_bread(inode->i_sb, path[ppos].p_block); + if (!path[ppos+1].p_bh) + return -EIO; + path[ppos+1].p_hdr = + ext_block_hdr(path[ppos+1].p_bh); + + /* Halfway index block */ + while (++cur_ppos < leaf_ppos) { + path[cur_ppos].p_idx = + EXT_FIRST_INDEX(path[cur_ppos].p_hdr); + path[cur_ppos].p_block = + idx_pblock(path[cur_ppos].p_idx); + if (path[cur_ppos+1].p_bh) + brelse(path[cur_ppos+1].p_bh); + path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, + path[cur_ppos].p_block); + if (!path[cur_ppos+1].p_bh) + return -EIO; + path[cur_ppos+1].p_hdr = + ext_block_hdr(path[cur_ppos+1].p_bh); + } + + /* leaf block */ + path[leaf_ppos].p_ext = *extent = + EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); + return 0; + } + } + /* We found the last extent */ + return 1; +} + +/** + * mext_double_down_read - Acquire two inodes' read semaphore + * + * @orig_inode: original inode structure + * @donor_inode: donor inode structure + * Acquire read semaphore of the two inodes (orig and donor) by i_ino order. + */ +static void +mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) +{ + struct inode *first = orig_inode, *second = donor_inode; + + BUG_ON(orig_inode == NULL || donor_inode == NULL); + + /* + * Use the inode number to provide the stable locking order instead + * of its address, because the C language doesn't guarantee you can + * compare pointers that don't come from the same array. + */ + if (donor_inode->i_ino < orig_inode->i_ino) { + first = donor_inode; + second = orig_inode; + } + + down_read(&EXT4_I(first)->i_data_sem); + down_read(&EXT4_I(second)->i_data_sem); +} + +/** + * mext_double_down_write - Acquire two inodes' write semaphore + * + * @orig_inode: original inode structure + * @donor_inode: donor inode structure + * Acquire write semaphore of the two inodes (orig and donor) by i_ino order. + */ +static void +mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) +{ + struct inode *first = orig_inode, *second = donor_inode; + + BUG_ON(orig_inode == NULL || donor_inode == NULL); + + /* + * Use the inode number to provide the stable locking order instead + * of its address, because the C language doesn't guarantee you can + * compare pointers that don't come from the same array. + */ + if (donor_inode->i_ino < orig_inode->i_ino) { + first = donor_inode; + second = orig_inode; + } + + down_write(&EXT4_I(first)->i_data_sem); + down_write(&EXT4_I(second)->i_data_sem); +} + +/** + * mext_double_up_read - Release two inodes' read semaphore + * + * @orig_inode: original inode structure to be released its lock first + * @donor_inode: donor inode structure to be released its lock second + * Release read semaphore of two inodes (orig and donor). + */ +static void +mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) +{ + BUG_ON(orig_inode == NULL || donor_inode == NULL); + + up_read(&EXT4_I(orig_inode)->i_data_sem); + up_read(&EXT4_I(donor_inode)->i_data_sem); +} + +/** + * mext_double_up_write - Release two inodes' write semaphore + * + * @orig_inode: original inode structure to be released its lock first + * @donor_inode: donor inode structure to be released its lock second + * Release write semaphore of two inodes (orig and donor). + */ +static void +mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) +{ + BUG_ON(orig_inode == NULL || donor_inode == NULL); + + up_write(&EXT4_I(orig_inode)->i_data_sem); + up_write(&EXT4_I(donor_inode)->i_data_sem); +} + +/** + * mext_insert_across_blocks - Insert extents across leaf block + * + * @handle: journal handle + * @orig_inode: original inode + * @o_start: first original extent to be changed + * @o_end: last original extent to be changed + * @start_ext: first new extent to be inserted + * @new_ext: middle of new extent to be inserted + * @end_ext: last new extent to be inserted + * + * Allocate a new leaf block and insert extents into it. Return 0 on success, + * or a negative error value on failure. + */ +static int +mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, + struct ext4_extent *o_start, struct ext4_extent *o_end, + struct ext4_extent *start_ext, struct ext4_extent *new_ext, + struct ext4_extent *end_ext) +{ + struct ext4_ext_path *orig_path = NULL; + ext4_lblk_t eblock = 0; + int new_flag = 0; + int end_flag = 0; + int err = 0; + + if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) { + if (o_start == o_end) { + + /* start_ext new_ext end_ext + * donor |---------|-----------|--------| + * orig |------------------------------| + */ + end_flag = 1; + } else { + + /* start_ext new_ext end_ext + * donor |---------|----------|---------| + * orig |---------------|--------------| + */ + o_end->ee_block = end_ext->ee_block; + o_end->ee_len = end_ext->ee_len; + ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); + } + + o_start->ee_len = start_ext->ee_len; + new_flag = 1; + + } else if (start_ext->ee_len && new_ext->ee_len && + !end_ext->ee_len && o_start == o_end) { + + /* start_ext new_ext + * donor |--------------|---------------| + * orig |------------------------------| + */ + o_start->ee_len = start_ext->ee_len; + new_flag = 1; + + } else if (!start_ext->ee_len && new_ext->ee_len && + end_ext->ee_len && o_start == o_end) { + + /* new_ext end_ext + * donor |--------------|---------------| + * orig |------------------------------| + */ + o_end->ee_block = end_ext->ee_block; + o_end->ee_len = end_ext->ee_len; + ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); + + /* + * Set 0 to the extent block if new_ext was + * the first block. + */ + if (new_ext->ee_block) + eblock = le32_to_cpu(new_ext->ee_block); + + new_flag = 1; + } else { + ext4_debug("ext4 move extent: Unexpected insert case\n"); + return -EIO; + } + + if (new_flag) { + get_ext_path(orig_path, orig_inode, eblock, err); + if (orig_path == NULL) + goto out; + + if (ext4_ext_insert_extent(handle, orig_inode, + orig_path, new_ext)) + goto out; + } + + if (end_flag) { + get_ext_path(orig_path, orig_inode, + le32_to_cpu(end_ext->ee_block) - 1, err); + if (orig_path == NULL) + goto out; + + if (ext4_ext_insert_extent(handle, orig_inode, + orig_path, end_ext)) + goto out; + } +out: + if (orig_path) { + ext4_ext_drop_refs(orig_path); + kfree(orig_path); + } + + return err; + +} + +/** + * mext_insert_inside_block - Insert new extent to the extent block + * + * @o_start: first original extent to be moved + * @o_end: last original extent to be moved + * @start_ext: first new extent to be inserted + * @new_ext: middle of new extent to be inserted + * @end_ext: last new extent to be inserted + * @eh: extent header of target leaf block + * @range_to_move: used to decide how to insert extent + * + * Insert extents into the leaf block. The extent (@o_start) is overwritten + * by inserted extents. + */ +static void +mext_insert_inside_block(struct ext4_extent *o_start, + struct ext4_extent *o_end, + struct ext4_extent *start_ext, + struct ext4_extent *new_ext, + struct ext4_extent *end_ext, + struct ext4_extent_header *eh, + int range_to_move) +{ + int i = 0; + unsigned long len; + + /* Move the existing extents */ + if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) { + len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) - + (unsigned long)(o_end + 1); + memmove(o_end + 1 + range_to_move, o_end + 1, len); + } + + /* Insert start entry */ + if (start_ext->ee_len) + o_start[i++].ee_len = start_ext->ee_len; + + /* Insert new entry */ + if (new_ext->ee_len) { + o_start[i] = *new_ext; + ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext)); + } + + /* Insert end entry */ + if (end_ext->ee_len) + o_start[i] = *end_ext; + + /* Increment the total entries counter on the extent block */ + le16_add_cpu(&eh->eh_entries, range_to_move); +} + +/** + * mext_insert_extents - Insert new extent + * + * @handle: journal handle + * @orig_inode: original inode + * @orig_path: path indicates first extent to be changed + * @o_start: first original extent to be changed + * @o_end: last original extent to be changed + * @start_ext: first new extent to be inserted + * @new_ext: middle of new extent to be inserted + * @end_ext: last new extent to be inserted + * + * Call the function to insert extents. If we cannot add more extents into + * the leaf block, we call mext_insert_across_blocks() to create a + * new leaf block. Otherwise call mext_insert_inside_block(). Return 0 + * on success, or a negative error value on failure. + */ +static int +mext_insert_extents(handle_t *handle, struct inode *orig_inode, + struct ext4_ext_path *orig_path, + struct ext4_extent *o_start, + struct ext4_extent *o_end, + struct ext4_extent *start_ext, + struct ext4_extent *new_ext, + struct ext4_extent *end_ext) +{ + struct ext4_extent_header *eh; + unsigned long need_slots, slots_range; + int range_to_move, depth, ret; + + /* + * The extents need to be inserted + * start_extent + new_extent + end_extent. + */ + need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) + + (new_ext->ee_len ? 1 : 0); + + /* The number of slots between start and end */ + slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1) + / sizeof(struct ext4_extent); + + /* Range to move the end of extent */ + range_to_move = need_slots - slots_range; + depth = orig_path->p_depth; + orig_path += depth; + eh = orig_path->p_hdr; + + if (depth) { + /* Register to journal */ + ret = ext4_journal_get_write_access(handle, orig_path->p_bh); + if (ret) + return ret; + } + + /* Expansion */ + if (range_to_move > 0 && + (range_to_move > le16_to_cpu(eh->eh_max) + - le16_to_cpu(eh->eh_entries))) { + + ret = mext_insert_across_blocks(handle, orig_inode, o_start, + o_end, start_ext, new_ext, end_ext); + if (ret < 0) + return ret; + } else + mext_insert_inside_block(o_start, o_end, start_ext, new_ext, + end_ext, eh, range_to_move); + + if (depth) { + ret = ext4_handle_dirty_metadata(handle, orig_inode, + orig_path->p_bh); + if (ret) + return ret; + } else { + ret = ext4_mark_inode_dirty(handle, orig_inode); + if (ret < 0) + return ret; + } + + return 0; +} + +/** + * mext_leaf_block - Move one leaf extent block into the inode. + * + * @handle: journal handle + * @orig_inode: original inode + * @orig_path: path indicates first extent to be changed + * @dext: donor extent + * @from: start offset on the target file + * + * In order to insert extents into the leaf block, we must divide the extent + * in the leaf block into three extents. The one is located to be inserted + * extents, and the others are located around it. + * + * Therefore, this function creates structures to save extents of the leaf + * block, and inserts extents by calling mext_insert_extents() with + * created extents. Return 0 on success, or a negative error value on failure. + */ +static int +mext_leaf_block(handle_t *handle, struct inode *orig_inode, + struct ext4_ext_path *orig_path, struct ext4_extent *dext, + ext4_lblk_t *from) +{ + struct ext4_extent *oext, *o_start, *o_end, *prev_ext; + struct ext4_extent new_ext, start_ext, end_ext; + ext4_lblk_t new_ext_end; + ext4_fsblk_t new_phys_end; + int oext_alen, new_ext_alen, end_ext_alen; + int depth = ext_depth(orig_inode); + int ret; + + o_start = o_end = oext = orig_path[depth].p_ext; + oext_alen = ext4_ext_get_actual_len(oext); + start_ext.ee_len = end_ext.ee_len = 0; + + new_ext.ee_block = cpu_to_le32(*from); + ext4_ext_store_pblock(&new_ext, ext_pblock(dext)); + new_ext.ee_len = dext->ee_len; + new_ext_alen = ext4_ext_get_actual_len(&new_ext); + new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; + new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1; + + /* + * Case: original extent is first + * oext |--------| + * new_ext |--| + * start_ext |--| + */ + if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) && + le32_to_cpu(new_ext.ee_block) < + le32_to_cpu(oext->ee_block) + oext_alen) { + start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - + le32_to_cpu(oext->ee_block)); + copy_extent_status(oext, &start_ext); + } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { + prev_ext = oext - 1; + /* + * We can merge new_ext into previous extent, + * if these are contiguous and same extent type. + */ + if (ext4_can_extents_be_merged(orig_inode, prev_ext, + &new_ext)) { + o_start = prev_ext; + start_ext.ee_len = cpu_to_le16( + ext4_ext_get_actual_len(prev_ext) + + new_ext_alen); + copy_extent_status(prev_ext, &start_ext); + new_ext.ee_len = 0; + } + } + + /* + * Case: new_ext_end must be less than oext + * oext |-----------| + * new_ext |-------| + */ + BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end); + + /* + * Case: new_ext is smaller than original extent + * oext |---------------| + * new_ext |-----------| + * end_ext |---| + */ + if (le32_to_cpu(oext->ee_block) <= new_ext_end && + new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) { + end_ext.ee_len = + cpu_to_le16(le32_to_cpu(oext->ee_block) + + oext_alen - 1 - new_ext_end); + copy_extent_status(oext, &end_ext); + end_ext_alen = ext4_ext_get_actual_len(&end_ext); + ext4_ext_store_pblock(&end_ext, + (ext_pblock(o_end) + oext_alen - end_ext_alen)); + end_ext.ee_block = + cpu_to_le32(le32_to_cpu(o_end->ee_block) + + oext_alen - end_ext_alen); + } + + ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, + o_end, &start_ext, &new_ext, &end_ext); + return ret; +} + +/** + * mext_calc_swap_extents - Calculate extents for extent swapping. + * + * @tmp_dext: the extent that will belong to the original inode + * @tmp_oext: the extent that will belong to the donor inode + * @orig_off: block offset of original inode + * @donor_off: block offset of donor inode + * @max_count: the maximun length of extents + */ +static void +mext_calc_swap_extents(struct ext4_extent *tmp_dext, + struct ext4_extent *tmp_oext, + ext4_lblk_t orig_off, ext4_lblk_t donor_off, + ext4_lblk_t max_count) +{ + ext4_lblk_t diff, orig_diff; + struct ext4_extent dext_old, oext_old; + + dext_old = *tmp_dext; + oext_old = *tmp_oext; + + /* When tmp_dext is too large, pick up the target range. */ + diff = donor_off - le32_to_cpu(tmp_dext->ee_block); + + ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff); + tmp_dext->ee_block = + cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); + tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); + + if (max_count < ext4_ext_get_actual_len(tmp_dext)) + tmp_dext->ee_len = cpu_to_le16(max_count); + + orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); + ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff); + + /* Adjust extent length if donor extent is larger than orig */ + if (ext4_ext_get_actual_len(tmp_dext) > + ext4_ext_get_actual_len(tmp_oext) - orig_diff) + tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) - + orig_diff); + + tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext)); + + copy_extent_status(&oext_old, tmp_dext); + copy_extent_status(&dext_old, tmp_oext); +} + +/** + * mext_replace_branches - Replace original extents with new extents + * + * @handle: journal handle + * @orig_inode: original inode + * @donor_inode: donor inode + * @from: block offset of orig_inode + * @count: block count to be replaced + * + * Replace original inode extents and donor inode extents page by page. + * We implement this replacement in the following three steps: + * 1. Save the block information of original and donor inodes into + * dummy extents. + * 2. Change the block information of original inode to point at the + * donor inode blocks. + * 3. Change the block information of donor inode to point at the saved + * original inode blocks in the dummy extents. + * + * Return 0 on success, or a negative error value on failure. + */ +static int +mext_replace_branches(handle_t *handle, struct inode *orig_inode, + struct inode *donor_inode, ext4_lblk_t from, + ext4_lblk_t count) +{ + struct ext4_ext_path *orig_path = NULL; + struct ext4_ext_path *donor_path = NULL; + struct ext4_extent *oext, *dext; + struct ext4_extent tmp_dext, tmp_oext; + ext4_lblk_t orig_off = from, donor_off = from; + int err = 0; + int depth; + int replaced_count = 0; + int dext_alen; + + mext_double_down_write(orig_inode, donor_inode); + + /* Get the original extent for the block "orig_off" */ + get_ext_path(orig_path, orig_inode, orig_off, err); + if (orig_path == NULL) + goto out; + + /* Get the donor extent for the head */ + get_ext_path(donor_path, donor_inode, donor_off, err); + if (donor_path == NULL) + goto out; + depth = ext_depth(orig_inode); + oext = orig_path[depth].p_ext; + tmp_oext = *oext; + + depth = ext_depth(donor_inode); + dext = donor_path[depth].p_ext; + tmp_dext = *dext; + + mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, + donor_off, count); + + /* Loop for the donor extents */ + while (1) { + /* The extent for donor must be found. */ + BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block)); + + /* Set donor extent to orig extent */ + err = mext_leaf_block(handle, orig_inode, + orig_path, &tmp_dext, &orig_off); + if (err < 0) + goto out; + + /* Set orig extent to donor extent */ + err = mext_leaf_block(handle, donor_inode, + donor_path, &tmp_oext, &donor_off); + if (err < 0) + goto out; + + dext_alen = ext4_ext_get_actual_len(&tmp_dext); + replaced_count += dext_alen; + donor_off += dext_alen; + orig_off += dext_alen; + + /* Already moved the expected blocks */ + if (replaced_count >= count) + break; + + if (orig_path) + ext4_ext_drop_refs(orig_path); + get_ext_path(orig_path, orig_inode, orig_off, err); + if (orig_path == NULL) + goto out; + depth = ext_depth(orig_inode); + oext = orig_path[depth].p_ext; + if (le32_to_cpu(oext->ee_block) + + ext4_ext_get_actual_len(oext) <= orig_off) { + err = 0; + goto out; + } + tmp_oext = *oext; + + if (donor_path) + ext4_ext_drop_refs(donor_path); + get_ext_path(donor_path, donor_inode, + donor_off, err); + if (donor_path == NULL) + goto out; + depth = ext_depth(donor_inode); + dext = donor_path[depth].p_ext; + if (le32_to_cpu(dext->ee_block) + + ext4_ext_get_actual_len(dext) <= donor_off) { + err = 0; + goto out; + } + tmp_dext = *dext; + + mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, + donor_off, + count - replaced_count); + } + +out: + if (orig_path) { + ext4_ext_drop_refs(orig_path); + kfree(orig_path); + } + if (donor_path) { + ext4_ext_drop_refs(donor_path); + kfree(donor_path); + } + + mext_double_up_write(orig_inode, donor_inode); + return err; +} + +/** + * move_extent_per_page - Move extent data per page + * + * @o_filp: file structure of original file + * @donor_inode: donor inode + * @orig_page_offset: page index on original file + * @data_offset_in_page: block index where data swapping starts + * @block_len_in_page: the number of blocks to be swapped + * @uninit: orig extent is uninitialized or not + * + * Save the data in original inode blocks and replace original inode extents + * with donor inode extents by calling mext_replace_branches(). + * Finally, write out the saved data in new original inode blocks. Return 0 + * on success, or a negative error value on failure. + */ +static int +move_extent_par_page(struct file *o_filp, struct inode *donor_inode, + pgoff_t orig_page_offset, int data_offset_in_page, + int block_len_in_page, int uninit) +{ + struct inode *orig_inode = o_filp->f_dentry->d_inode; + struct address_space *mapping = orig_inode->i_mapping; + struct buffer_head *bh; + struct page *page = NULL; + const struct address_space_operations *a_ops = mapping->a_ops; + handle_t *handle; + ext4_lblk_t orig_blk_offset; + long long offs = orig_page_offset << PAGE_CACHE_SHIFT; + unsigned long blocksize = orig_inode->i_sb->s_blocksize; + unsigned int w_flags = 0; + unsigned int tmp_data_len, data_len; + void *fsdata; + int ret, i, jblocks; + int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; + + /* + * It needs twice the amount of ordinary journal buffers because + * inode and donor_inode may change each different metadata blocks. + */ + jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; + handle = ext4_journal_start(orig_inode, jblocks); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + return ret; + } + + if (segment_eq(get_fs(), KERNEL_DS)) + w_flags |= AOP_FLAG_UNINTERRUPTIBLE; + + orig_blk_offset = orig_page_offset * blocks_per_page + + data_offset_in_page; + + /* + * If orig extent is uninitialized one, + * it's not necessary force the page into memory + * and then force it to be written out again. + * Just swap data blocks between orig and donor. + */ + if (uninit) { + ret = mext_replace_branches(handle, orig_inode, + donor_inode, orig_blk_offset, + block_len_in_page); + + /* Clear the inode cache not to refer to the old data */ + ext4_ext_invalidate_cache(orig_inode); + ext4_ext_invalidate_cache(donor_inode); + goto out2; + } + + offs = (long long)orig_blk_offset << orig_inode->i_blkbits; + + /* Calculate data_len */ + if ((orig_blk_offset + block_len_in_page - 1) == + ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { + /* Replace the last block */ + tmp_data_len = orig_inode->i_size & (blocksize - 1); + /* + * If data_len equal zero, it shows data_len is multiples of + * blocksize. So we set appropriate value. + */ + if (tmp_data_len == 0) + tmp_data_len = blocksize; + + data_len = tmp_data_len + + ((block_len_in_page - 1) << orig_inode->i_blkbits); + } else { + data_len = block_len_in_page << orig_inode->i_blkbits; + } + + ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags, + &page, &fsdata); + if (unlikely(ret < 0)) + goto out; + + if (!PageUptodate(page)) { + mapping->a_ops->readpage(o_filp, page); + lock_page(page); + } + + /* + * try_to_release_page() doesn't call releasepage in writeback mode. + * We should care about the order of writing to the same file + * by multiple move extent processes. + * It needs to call wait_on_page_writeback() to wait for the + * writeback of the page. + */ + if (PageWriteback(page)) + wait_on_page_writeback(page); + + /* Release old bh and drop refs */ + try_to_release_page(page, 0); + + ret = mext_replace_branches(handle, orig_inode, donor_inode, + orig_blk_offset, block_len_in_page); + if (ret < 0) + goto out; + + /* Clear the inode cache not to refer to the old data */ + ext4_ext_invalidate_cache(orig_inode); + ext4_ext_invalidate_cache(donor_inode); + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); + + bh = page_buffers(page); + for (i = 0; i < data_offset_in_page; i++) + bh = bh->b_this_page; + + for (i = 0; i < block_len_in_page; i++) { + ret = ext4_get_block(orig_inode, + (sector_t)(orig_blk_offset + i), bh, 0); + if (ret < 0) + goto out; + + if (bh->b_this_page != NULL) + bh = bh->b_this_page; + } + + ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len, + page, fsdata); + page = NULL; + +out: + if (unlikely(page)) { + if (PageLocked(page)) + unlock_page(page); + page_cache_release(page); + } +out2: + ext4_journal_stop(handle); + + return ret < 0 ? ret : 0; +} + +/** + * mext_check_argumants - Check whether move extent can be done + * + * @orig_inode: original inode + * @donor_inode: donor inode + * @orig_start: logical start offset in block for orig + * @donor_start: logical start offset in block for donor + * @len: the number of blocks to be moved + * @moved_len: moved block length + * + * Check the arguments of ext4_move_extents() whether the files can be + * exchanged with each other. + * Return 0 on success, or a negative error value on failure. + */ +static int +mext_check_arguments(struct inode *orig_inode, + struct inode *donor_inode, __u64 orig_start, + __u64 donor_start, __u64 *len, __u64 moved_len) +{ + /* Regular file check */ + if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { + ext4_debug("ext4 move extent: The argument files should be " + "regular file [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + + /* Ext4 move extent does not support swapfile */ + if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { + ext4_debug("ext4 move extent: The argument files should " + "not be swapfile [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + + /* Files should be in the same ext4 FS */ + if (orig_inode->i_sb != donor_inode->i_sb) { + ext4_debug("ext4 move extent: The argument files " + "should be in same FS [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + + /* orig and donor should be different file */ + if (orig_inode->i_ino == donor_inode->i_ino) { + ext4_debug("ext4 move extent: The argument files should not " + "be same file [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + + /* Ext4 move extent supports only extent based file */ + if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) { + ext4_debug("ext4 move extent: orig file is not extents " + "based file [ino:orig %lu]\n", orig_inode->i_ino); + return -EOPNOTSUPP; + } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) { + ext4_debug("ext4 move extent: donor file is not extents " + "based file [ino:donor %lu]\n", donor_inode->i_ino); + return -EOPNOTSUPP; + } + + if ((!orig_inode->i_size) || (!donor_inode->i_size)) { + ext4_debug("ext4 move extent: File size is 0 byte\n"); + return -EINVAL; + } + + /* Start offset should be same */ + if (orig_start != donor_start) { + ext4_debug("ext4 move extent: orig and donor's start " + "offset are not same [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + + if (moved_len) { + ext4_debug("ext4 move extent: moved_len should be 0 " + "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, + donor_inode->i_ino); + return -EINVAL; + } + + if ((orig_start > MAX_DEFRAG_SIZE) || + (donor_start > MAX_DEFRAG_SIZE) || + (*len > MAX_DEFRAG_SIZE) || + (orig_start + *len > MAX_DEFRAG_SIZE)) { + ext4_debug("ext4 move extent: Can't handle over [%lu] blocks " + "[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE, + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + + if (orig_inode->i_size > donor_inode->i_size) { + if (orig_start >= donor_inode->i_size) { + ext4_debug("ext4 move extent: orig start offset " + "[%llu] should be less than donor file size " + "[%lld] [ino:orig %lu, donor_inode %lu]\n", + orig_start, donor_inode->i_size, + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + + if (orig_start + *len > donor_inode->i_size) { + ext4_debug("ext4 move extent: End offset [%llu] should " + "be less than donor file size [%lld]." + "So adjust length from %llu to %lld " + "[ino:orig %lu, donor %lu]\n", + orig_start + *len, donor_inode->i_size, + *len, donor_inode->i_size - orig_start, + orig_inode->i_ino, donor_inode->i_ino); + *len = donor_inode->i_size - orig_start; + } + } else { + if (orig_start >= orig_inode->i_size) { + ext4_debug("ext4 move extent: start offset [%llu] " + "should be less than original file size " + "[%lld] [inode:orig %lu, donor %lu]\n", + orig_start, orig_inode->i_size, + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + + if (orig_start + *len > orig_inode->i_size) { + ext4_debug("ext4 move extent: Adjust length " + "from %llu to %lld. Because it should be " + "less than original file size " + "[ino:orig %lu, donor %lu]\n", + *len, orig_inode->i_size - orig_start, + orig_inode->i_ino, donor_inode->i_ino); + *len = orig_inode->i_size - orig_start; + } + } + + if (!*len) { + ext4_debug("ext4 move extent: len shoudld not be 0 " + "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, + donor_inode->i_ino); + return -EINVAL; + } + + return 0; +} + +/** + * mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2 + * + * @inode1: the inode structure + * @inode2: the inode structure + * + * Lock two inodes' i_mutex by i_ino order. This function is moved from + * fs/inode.c. + */ +static void +mext_inode_double_lock(struct inode *inode1, struct inode *inode2) +{ + if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { + if (inode1) + mutex_lock(&inode1->i_mutex); + else if (inode2) + mutex_lock(&inode2->i_mutex); + return; + } + + if (inode1->i_ino < inode2->i_ino) { + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); + } +} + +/** + * mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2 + * + * @inode1: the inode that is released first + * @inode2: the inode that is released second + * + * This function is moved from fs/inode.c. + */ + +static void +mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) +{ + if (inode1) + mutex_unlock(&inode1->i_mutex); + + if (inode2 && inode2 != inode1) + mutex_unlock(&inode2->i_mutex); +} + +/** + * ext4_move_extents - Exchange the specified range of a file + * + * @o_filp: file structure of the original file + * @d_filp: file structure of the donor file + * @orig_start: start offset in block for orig + * @donor_start: start offset in block for donor + * @len: the number of blocks to be moved + * @moved_len: moved block length + * + * This function returns 0 and moved block length is set in moved_len + * if succeed, otherwise returns error value. + * + * Note: ext4_move_extents() proceeds the following order. + * 1:ext4_move_extents() calculates the last block number of moving extent + * function by the start block number (orig_start) and the number of blocks + * to be moved (len) specified as arguments. + * If the {orig, donor}_start points a hole, the extent's start offset + * pointed by ext_cur (current extent), holecheck_path, orig_path are set + * after hole behind. + * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent + * or the ext_cur exceeds the block_end which is last logical block number. + * 3:To get the length of continues area, call mext_next_extent() + * specified with the ext_cur (initial value is holecheck_path) re-cursive, + * until find un-continuous extent, the start logical block number exceeds + * the block_end or the extent points to the last extent. + * 4:Exchange the original inode data with donor inode data + * from orig_page_offset to seq_end_page. + * The start indexes of data are specified as arguments. + * That of the original inode is orig_page_offset, + * and the donor inode is also orig_page_offset + * (To easily handle blocksize != pagesize case, the offset for the + * donor inode is block unit). + * 5:Update holecheck_path and orig_path to points a next proceeding extent, + * then returns to step 2. + * 6:Release holecheck_path, orig_path and set the len to moved_len + * which shows the number of moved blocks. + * The moved_len is useful for the command to calculate the file offset + * for starting next move extent ioctl. + * 7:Return 0 on success, or a negative error value on failure. + */ +int +ext4_move_extents(struct file *o_filp, struct file *d_filp, + __u64 orig_start, __u64 donor_start, __u64 len, + __u64 *moved_len) +{ + struct inode *orig_inode = o_filp->f_dentry->d_inode; + struct inode *donor_inode = d_filp->f_dentry->d_inode; + struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL; + struct ext4_extent *ext_prev, *ext_cur, *ext_dummy; + ext4_lblk_t block_start = orig_start; + ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; + ext4_lblk_t rest_blocks; + pgoff_t orig_page_offset = 0, seq_end_page; + int ret, depth, last_extent = 0; + int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; + int data_offset_in_page; + int block_len_in_page; + int uninit; + + /* protect orig and donor against a truncate */ + mext_inode_double_lock(orig_inode, donor_inode); + + mext_double_down_read(orig_inode, donor_inode); + /* Check the filesystem environment whether move_extent can be done */ + ret = mext_check_arguments(orig_inode, donor_inode, orig_start, + donor_start, &len, *moved_len); + mext_double_up_read(orig_inode, donor_inode); + if (ret) + goto out2; + + file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; + block_end = block_start + len - 1; + if (file_end < block_end) + len -= block_end - file_end; + + get_ext_path(orig_path, orig_inode, block_start, ret); + if (orig_path == NULL) + goto out2; + + /* Get path structure to check the hole */ + get_ext_path(holecheck_path, orig_inode, block_start, ret); + if (holecheck_path == NULL) + goto out; + + depth = ext_depth(orig_inode); + ext_cur = holecheck_path[depth].p_ext; + if (ext_cur == NULL) { + ret = -EINVAL; + goto out; + } + + /* + * Get proper extent whose ee_block is beyond block_start + * if block_start was within the hole. + */ + if (le32_to_cpu(ext_cur->ee_block) + + ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { + last_extent = mext_next_extent(orig_inode, + holecheck_path, &ext_cur); + if (last_extent < 0) { + ret = last_extent; + goto out; + } + last_extent = mext_next_extent(orig_inode, orig_path, + &ext_dummy); + if (last_extent < 0) { + ret = last_extent; + goto out; + } + } + seq_start = block_start; + + /* No blocks within the specified range. */ + if (le32_to_cpu(ext_cur->ee_block) > block_end) { + ext4_debug("ext4 move extent: The specified range of file " + "may be the hole\n"); + ret = -EINVAL; + goto out; + } + + /* Adjust start blocks */ + add_blocks = min(le32_to_cpu(ext_cur->ee_block) + + ext4_ext_get_actual_len(ext_cur), block_end + 1) - + max(le32_to_cpu(ext_cur->ee_block), block_start); + + while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) { + seq_blocks += add_blocks; + + /* Adjust tail blocks */ + if (seq_start + seq_blocks - 1 > block_end) + seq_blocks = block_end - seq_start + 1; + + ext_prev = ext_cur; + last_extent = mext_next_extent(orig_inode, holecheck_path, + &ext_cur); + if (last_extent < 0) { + ret = last_extent; + break; + } + add_blocks = ext4_ext_get_actual_len(ext_cur); + + /* + * Extend the length of contiguous block (seq_blocks) + * if extents are contiguous. + */ + if (ext4_can_extents_be_merged(orig_inode, + ext_prev, ext_cur) && + block_end >= le32_to_cpu(ext_cur->ee_block) && + !last_extent) + continue; + + /* Is original extent is uninitialized */ + uninit = ext4_ext_is_uninitialized(ext_prev); + + data_offset_in_page = seq_start % blocks_per_page; + + /* + * Calculate data blocks count that should be swapped + * at the first page. + */ + if (data_offset_in_page + seq_blocks > blocks_per_page) { + /* Swapped blocks are across pages */ + block_len_in_page = + blocks_per_page - data_offset_in_page; + } else { + /* Swapped blocks are in a page */ + block_len_in_page = seq_blocks; + } + + orig_page_offset = seq_start >> + (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); + seq_end_page = (seq_start + seq_blocks - 1) >> + (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); + seq_start = le32_to_cpu(ext_cur->ee_block); + rest_blocks = seq_blocks; + + /* Discard preallocations of two inodes */ + down_write(&EXT4_I(orig_inode)->i_data_sem); + ext4_discard_preallocations(orig_inode); + up_write(&EXT4_I(orig_inode)->i_data_sem); + + down_write(&EXT4_I(donor_inode)->i_data_sem); + ext4_discard_preallocations(donor_inode); + up_write(&EXT4_I(donor_inode)->i_data_sem); + + while (orig_page_offset <= seq_end_page) { + + /* Swap original branches with new branches */ + ret = move_extent_par_page(o_filp, donor_inode, + orig_page_offset, + data_offset_in_page, + block_len_in_page, uninit); + if (ret < 0) + goto out; + orig_page_offset++; + /* Count how many blocks we have exchanged */ + *moved_len += block_len_in_page; + BUG_ON(*moved_len > len); + + data_offset_in_page = 0; + rest_blocks -= block_len_in_page; + if (rest_blocks > blocks_per_page) + block_len_in_page = blocks_per_page; + else + block_len_in_page = rest_blocks; + } + + /* Decrease buffer counter */ + if (holecheck_path) + ext4_ext_drop_refs(holecheck_path); + get_ext_path(holecheck_path, orig_inode, + seq_start, ret); + if (holecheck_path == NULL) + break; + depth = holecheck_path->p_depth; + + /* Decrease buffer counter */ + if (orig_path) + ext4_ext_drop_refs(orig_path); + get_ext_path(orig_path, orig_inode, seq_start, ret); + if (orig_path == NULL) + break; + + ext_cur = holecheck_path[depth].p_ext; + add_blocks = ext4_ext_get_actual_len(ext_cur); + seq_blocks = 0; + + } +out: + if (orig_path) { + ext4_ext_drop_refs(orig_path); + kfree(orig_path); + } + if (holecheck_path) { + ext4_ext_drop_refs(holecheck_path); + kfree(holecheck_path); + } +out2: + mext_inode_double_unlock(orig_inode, donor_inode); + + if (ret) + return ret; + + /* All of the specified blocks must be exchanged in succeed */ + BUG_ON(*moved_len != len); + + return 0; +} diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 07eb6649e4f..de04013d16f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1782,7 +1782,7 @@ retry: if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); - inode = ext4_new_inode (handle, dir, mode); + inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); err = PTR_ERR(inode); if (!IS_ERR(inode)) { inode->i_op = &ext4_file_inode_operations; @@ -1816,7 +1816,7 @@ retry: if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); - inode = ext4_new_inode(handle, dir, mode); + inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); @@ -1853,7 +1853,8 @@ retry: if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); - inode = ext4_new_inode(handle, dir, S_IFDIR | mode); + inode = ext4_new_inode(handle, dir, S_IFDIR | mode, + &dentry->d_name, 0); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; @@ -2264,7 +2265,8 @@ retry: if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); - inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); + inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, + &dentry->d_name, 0); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 27eb289eea3..68b0351fc64 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1002,7 +1002,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, " too large to resize to %llu blocks safely\n", sb->s_id, n_blocks_count); if (sizeof(sector_t) < 8) - ext4_warning(sb, __func__, "CONFIG_LBD not enabled"); + ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled"); return -EINVAL; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 012c4251397..8bb9e2d3e4b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -37,7 +37,6 @@ #include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/ctype.h> -#include <linux/marker.h> #include <linux/log2.h> #include <linux/crc16.h> #include <asm/uaccess.h> @@ -47,6 +46,9 @@ #include "xattr.h" #include "acl.h" +#define CREATE_TRACE_POINTS +#include <trace/events/ext4.h> + static int default_mb_history_length = 1000; module_param_named(default_mb_history_length, default_mb_history_length, @@ -301,7 +303,7 @@ static void ext4_handle_error(struct super_block *sb) if (!test_opt(sb, ERRORS_CONT)) { journal_t *journal = EXT4_SB(sb)->s_journal; - EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; + EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; if (journal) jbd2_journal_abort(journal, -EIO); } @@ -414,7 +416,7 @@ void ext4_abort(struct super_block *sb, const char *function, ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; sb->s_flags |= MS_RDONLY; - EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; + EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; if (EXT4_SB(sb)->s_journal) jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); } @@ -1474,7 +1476,7 @@ set_qf_format: break; #endif case Opt_abort: - set_opt(sbi->s_mount_opt, ABORT); + sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; break; case Opt_nobarrier: clear_opt(sbi->s_mount_opt, BARRIER); @@ -1653,7 +1655,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, ext4_commit_super(sb, 1); if (test_opt(sb, DEBUG)) printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " - "bpg=%lu, ipg=%lu, mo=%04lx]\n", + "bpg=%lu, ipg=%lu, mo=%04x]\n", sb->s_blocksize, sbi->s_groups_count, EXT4_BLOCKS_PER_GROUP(sb), @@ -1957,7 +1959,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) /* small i_blocks in vfs inode? */ if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { /* - * CONFIG_LBD is not enabled implies the inode + * CONFIG_LBDAF is not enabled implies the inode * i_block represent total blocks in 512 bytes * 32 == size of vfs inode i_blocks * 8 */ @@ -2000,7 +2002,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { /* - * !has_huge_files or CONFIG_LBD not enabled implies that + * !has_huge_files or CONFIG_LBDAF not enabled implies that * the inode i_block field represents total file blocks in * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 */ @@ -2204,6 +2206,7 @@ EXT4_RO_ATTR(session_write_kbytes); EXT4_RO_ATTR(lifetime_write_kbytes); EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, inode_readahead_blks_store, s_inode_readahead_blks); +EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); @@ -2216,6 +2219,7 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(session_write_kbytes), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(inode_readahead_blks), + ATTR_LIST(inode_goal), ATTR_LIST(mb_stats), ATTR_LIST(mb_max_to_scan), ATTR_LIST(mb_min_to_scan), @@ -2436,13 +2440,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (has_huge_files) { /* * Large file size enabled file system can only be - * mount if kernel is build with CONFIG_LBD + * mount if kernel is build with CONFIG_LBDAF */ if (sizeof(root->i_blocks) < sizeof(u64) && !(sb->s_flags & MS_RDONLY)) { ext4_msg(sb, KERN_ERR, "Filesystem with huge " "files cannot be mounted read-write " - "without CONFIG_LBD"); + "without CONFIG_LBDAF"); goto failed_mount; } } @@ -2566,7 +2570,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_msg(sb, KERN_ERR, "filesystem" " too large to mount safely"); if (sizeof(sector_t) < 8) - ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled"); + ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); goto failed_mount; } @@ -3346,7 +3350,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) int ret = 0; tid_t target; - trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); + trace_ext4_sync_fs(sb, wait); if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { if (wait) jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); @@ -3450,7 +3454,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) + if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) ext4_abort(sb, __func__, "Abort forced by user"); sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | @@ -3465,7 +3469,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || n_blocks_count > ext4_blocks_count(es)) { - if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { + if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { err = -EROFS; goto restore_opts; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 304b411cb8b..8970d8c49bb 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -966,7 +966,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, opts->fs_uid = current_uid(); opts->fs_gid = current_gid(); - opts->fs_fmask = current_umask(); + opts->fs_fmask = opts->fs_dmask = current_umask(); opts->allow_utime = -1; opts->codepage = fat_default_codepage; opts->iocharset = fat_default_iocharset; diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index cad957cdb1e..5971359d209 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -1,6 +1,6 @@ config GFS2_FS tristate "GFS2 file system support" - depends on EXPERIMENTAL && (64BIT || LBD) + depends on EXPERIMENTAL && (64BIT || LBDAF) select DLM if GFS2_FS_LOCKING_DLM select CONFIGFS_FS if GFS2_FS_LOCKING_DLM select SYSFS if GFS2_FS_LOCKING_DLM diff --git a/fs/inode.c b/fs/inode.c index f643be565df..04c785bb63c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -665,12 +665,17 @@ void unlock_new_inode(struct inode *inode) if (inode->i_mode & S_IFDIR) { struct file_system_type *type = inode->i_sb->s_type; - /* - * ensure nobody is actually holding i_mutex - */ - mutex_destroy(&inode->i_mutex); - mutex_init(&inode->i_mutex); - lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); + /* Set new key only if filesystem hasn't already changed it */ + if (!lockdep_match_class(&inode->i_mutex, + &type->i_mutex_key)) { + /* + * ensure nobody is actually holding i_mutex + */ + mutex_destroy(&inode->i_mutex); + mutex_init(&inode->i_mutex); + lockdep_set_class(&inode->i_mutex, + &type->i_mutex_dir_key); + } } #endif /* diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index 2f0dc5a1463..8ba5441063b 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -195,9 +195,8 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, * Do not report hidden files if so instructed, or associated * files unless instructed to do so */ - if ((sbi->s_hide == 'y' && - (de->flags[-sbi->s_high_sierra] & 1)) || - (sbi->s_showassoc =='n' && + if ((sbi->s_hide && (de->flags[-sbi->s_high_sierra] & 1)) || + (!sbi->s_showassoc && (de->flags[-sbi->s_high_sierra] & 4))) { filp->f_pos += de_len; continue; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 068b34b5a10..58a7963e168 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -141,13 +141,17 @@ static const struct dentry_operations isofs_dentry_ops[] = { }; struct iso9660_options{ - char map; - char rock; + unsigned int rock:1; + unsigned int cruft:1; + unsigned int hide:1; + unsigned int showassoc:1; + unsigned int nocompress:1; + unsigned int overriderockperm:1; + unsigned int uid_set:1; + unsigned int gid_set:1; + unsigned int utf8:1; + unsigned char map; char joliet; - char cruft; - char hide; - char showassoc; - char nocompress; unsigned char check; unsigned int blocksize; mode_t fmode; @@ -155,7 +159,6 @@ struct iso9660_options{ gid_t gid; uid_t uid; char *iocharset; - unsigned char utf8; /* LVE */ s32 session; s32 sbsector; @@ -312,7 +315,7 @@ enum { Opt_block, Opt_check_r, Opt_check_s, Opt_cruft, Opt_gid, Opt_ignore, Opt_iocharset, Opt_map_a, Opt_map_n, Opt_map_o, Opt_mode, Opt_nojoliet, Opt_norock, Opt_sb, Opt_session, Opt_uid, Opt_unhide, Opt_utf8, Opt_err, - Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, + Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, Opt_overriderockperm, }; static const match_table_t tokens = { @@ -340,6 +343,7 @@ static const match_table_t tokens = { {Opt_gid, "gid=%u"}, {Opt_mode, "mode=%u"}, {Opt_dmode, "dmode=%u"}, + {Opt_overriderockperm, "overriderockperm"}, {Opt_block, "block=%u"}, {Opt_ignore, "conv=binary"}, {Opt_ignore, "conv=b"}, @@ -359,24 +363,22 @@ static int parse_options(char *options, struct iso9660_options *popt) int option; popt->map = 'n'; - popt->rock = 'y'; - popt->joliet = 'y'; - popt->cruft = 'n'; - popt->hide = 'n'; - popt->showassoc = 'n'; + popt->rock = 1; + popt->joliet = 1; + popt->cruft = 0; + popt->hide = 0; + popt->showassoc = 0; popt->check = 'u'; /* unset */ popt->nocompress = 0; popt->blocksize = 1024; - popt->fmode = popt->dmode = S_IRUGO | S_IXUGO; /* - * r-x for all. The disc could - * be shared with DOS machines so - * virtually anything could be - * a valid executable. - */ + popt->fmode = popt->dmode = ISOFS_INVALID_MODE; + popt->uid_set = 0; + popt->gid_set = 0; popt->gid = 0; popt->uid = 0; popt->iocharset = NULL; popt->utf8 = 0; + popt->overriderockperm = 0; popt->session=-1; popt->sbsector=-1; if (!options) @@ -393,20 +395,20 @@ static int parse_options(char *options, struct iso9660_options *popt) token = match_token(p, tokens, args); switch (token) { case Opt_norock: - popt->rock = 'n'; + popt->rock = 0; break; case Opt_nojoliet: - popt->joliet = 'n'; + popt->joliet = 0; break; case Opt_hide: - popt->hide = 'y'; + popt->hide = 1; break; case Opt_unhide: case Opt_showassoc: - popt->showassoc = 'y'; + popt->showassoc = 1; break; case Opt_cruft: - popt->cruft = 'y'; + popt->cruft = 1; break; case Opt_utf8: popt->utf8 = 1; @@ -450,11 +452,13 @@ static int parse_options(char *options, struct iso9660_options *popt) if (match_int(&args[0], &option)) return 0; popt->uid = option; + popt->uid_set = 1; break; case Opt_gid: if (match_int(&args[0], &option)) return 0; popt->gid = option; + popt->gid_set = 1; break; case Opt_mode: if (match_int(&args[0], &option)) @@ -466,6 +470,9 @@ static int parse_options(char *options, struct iso9660_options *popt) return 0; popt->dmode = option; break; + case Opt_overriderockperm: + popt->overriderockperm = 1; + break; case Opt_block: if (match_int(&args[0], &option)) return 0; @@ -650,7 +657,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) goto out_freebh; sbi->s_high_sierra = 1; - opt.rock = 'n'; + opt.rock = 0; h_pri = (struct hs_primary_descriptor *)vdp; goto root_found; } @@ -673,7 +680,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) root_found: - if (joliet_level && (pri == NULL || opt.rock == 'n')) { + if (joliet_level && (pri == NULL || !opt.rock)) { /* This is the case of Joliet with the norock mount flag. * A disc with both Joliet and Rock Ridge is handled later */ @@ -802,22 +809,31 @@ root_found: s->s_op = &isofs_sops; s->s_export_op = &isofs_export_ops; sbi->s_mapping = opt.map; - sbi->s_rock = (opt.rock == 'y' ? 2 : 0); + sbi->s_rock = (opt.rock ? 2 : 0); sbi->s_rock_offset = -1; /* initial offset, will guess until SP is found*/ sbi->s_cruft = opt.cruft; sbi->s_hide = opt.hide; sbi->s_showassoc = opt.showassoc; sbi->s_uid = opt.uid; sbi->s_gid = opt.gid; + sbi->s_uid_set = opt.uid_set; + sbi->s_gid_set = opt.gid_set; sbi->s_utf8 = opt.utf8; sbi->s_nocompress = opt.nocompress; + sbi->s_overriderockperm = opt.overriderockperm; /* * It would be incredibly stupid to allow people to mark every file * on the disk as suid, so we merely allow them to set the default * permissions. */ - sbi->s_fmode = opt.fmode & 0777; - sbi->s_dmode = opt.dmode & 0777; + if (opt.fmode != ISOFS_INVALID_MODE) + sbi->s_fmode = opt.fmode & 0777; + else + sbi->s_fmode = ISOFS_INVALID_MODE; + if (opt.dmode != ISOFS_INVALID_MODE) + sbi->s_dmode = opt.dmode & 0777; + else + sbi->s_dmode = ISOFS_INVALID_MODE; /* * Read the root inode, which _may_ result in changing @@ -1095,18 +1111,6 @@ static const struct address_space_operations isofs_aops = { .bmap = _isofs_bmap }; -static inline void test_and_set_uid(uid_t *p, uid_t value) -{ - if (value) - *p = value; -} - -static inline void test_and_set_gid(gid_t *p, gid_t value) -{ - if (value) - *p = value; -} - static int isofs_read_level3_size(struct inode *inode) { unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); @@ -1261,7 +1265,10 @@ static int isofs_read_inode(struct inode *inode) ei->i_file_format = isofs_file_normal; if (de->flags[-high_sierra] & 2) { - inode->i_mode = sbi->s_dmode | S_IFDIR; + if (sbi->s_dmode != ISOFS_INVALID_MODE) + inode->i_mode = S_IFDIR | sbi->s_dmode; + else + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; inode->i_nlink = 1; /* * Set to 1. We know there are 2, but * the find utility tries to optimize @@ -1270,8 +1277,16 @@ static int isofs_read_inode(struct inode *inode) * do it the hard way. */ } else { - /* Everybody gets to read the file. */ - inode->i_mode = sbi->s_fmode | S_IFREG; + if (sbi->s_fmode != ISOFS_INVALID_MODE) { + inode->i_mode = S_IFREG | sbi->s_fmode; + } else { + /* + * Set default permissions: r-x for all. The disc + * could be shared with DOS machines so virtually + * anything could be a valid executable. + */ + inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO; + } inode->i_nlink = 1; } inode->i_uid = sbi->s_uid; @@ -1300,7 +1315,7 @@ static int isofs_read_inode(struct inode *inode) * this CDROM was mounted with the cruft option. */ - if (sbi->s_cruft == 'y') + if (sbi->s_cruft) inode->i_size &= 0x00ffffff; if (de->interleave[0]) { @@ -1346,9 +1361,18 @@ static int isofs_read_inode(struct inode *inode) if (!high_sierra) { parse_rock_ridge_inode(de, inode); /* if we want uid/gid set, override the rock ridge setting */ - test_and_set_uid(&inode->i_uid, sbi->s_uid); - test_and_set_gid(&inode->i_gid, sbi->s_gid); + if (sbi->s_uid_set) + inode->i_uid = sbi->s_uid; + if (sbi->s_gid_set) + inode->i_gid = sbi->s_gid; } + /* Now set final access rights if overriding rock ridge setting */ + if (S_ISDIR(inode->i_mode) && sbi->s_overriderockperm && + sbi->s_dmode != ISOFS_INVALID_MODE) + inode->i_mode = S_IFDIR | sbi->s_dmode; + if (S_ISREG(inode->i_mode) && sbi->s_overriderockperm && + sbi->s_fmode != ISOFS_INVALID_MODE) + inode->i_mode = S_IFREG | sbi->s_fmode; /* Install the inode operations vector */ if (S_ISREG(inode->i_mode)) { diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index ccbf72faf27..7d33de84f52 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -35,21 +35,20 @@ struct isofs_sb_info { unsigned long s_log_zone_size; unsigned long s_max_size; - unsigned char s_high_sierra; /* A simple flag */ - unsigned char s_mapping; int s_rock_offset; /* offset of SUSP fields within SU area */ - unsigned char s_rock; unsigned char s_joliet_level; - unsigned char s_utf8; - unsigned char s_cruft; /* Broken disks with high - byte of length containing - junk */ - unsigned char s_unhide; - unsigned char s_nosuid; - unsigned char s_nodev; - unsigned char s_nocompress; - unsigned char s_hide; - unsigned char s_showassoc; + unsigned char s_mapping; + unsigned int s_high_sierra:1; + unsigned int s_rock:2; + unsigned int s_utf8:1; + unsigned int s_cruft:1; /* Broken disks with high byte of length + * containing junk */ + unsigned int s_nocompress:1; + unsigned int s_hide:1; + unsigned int s_showassoc:1; + unsigned int s_overriderockperm:1; + unsigned int s_uid_set:1; + unsigned int s_gid_set:1; mode_t s_fmode; mode_t s_dmode; @@ -58,6 +57,8 @@ struct isofs_sb_info { struct nls_table *s_nls_iocharset; /* Native language support table */ }; +#define ISOFS_INVALID_MODE ((mode_t) -1) + static inline struct isofs_sb_info *ISOFS_SB(struct super_block *sb) { return sb->s_fs_info; diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 8299889a835..eaa831311c9 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -142,9 +142,9 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, */ match = 0; if (dlen > 0 && - (sbi->s_hide =='n' || + (!sbi->s_hide || (!(de->flags[-sbi->s_high_sierra] & 1))) && - (sbi->s_showassoc =='y' || + (sbi->s_showassoc || (!(de->flags[-sbi->s_high_sierra] & 4)))) { match = (isofs_cmp(dentry, dpnt, dlen) == 0); } diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index ed886e6db39..73242ba7c7b 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1686,35 +1686,6 @@ out: return; } -/* - * journal_try_to_free_buffers() could race with journal_commit_transaction() - * The latter might still hold the a count on buffers when inspecting - * them on t_syncdata_list or t_locked_list. - * - * journal_try_to_free_buffers() will call this function to - * wait for the current transaction to finish syncing data buffers, before - * tryinf to free that buffer. - * - * Called with journal->j_state_lock held. - */ -static void journal_wait_for_transaction_sync_data(journal_t *journal) -{ - transaction_t *transaction = NULL; - tid_t tid; - - spin_lock(&journal->j_state_lock); - transaction = journal->j_committing_transaction; - - if (!transaction) { - spin_unlock(&journal->j_state_lock); - return; - } - - tid = transaction->t_tid; - spin_unlock(&journal->j_state_lock); - log_wait_commit(journal, tid); -} - /** * int journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation @@ -1786,25 +1757,6 @@ int journal_try_to_free_buffers(journal_t *journal, ret = try_to_free_buffers(page); - /* - * There are a number of places where journal_try_to_free_buffers() - * could race with journal_commit_transaction(), the later still - * holds the reference to the buffers to free while processing them. - * try_to_free_buffers() failed to free those buffers. Some of the - * caller of releasepage() request page buffers to be dropped, otherwise - * treat the fail-to-free as errors (such as generic_file_direct_IO()) - * - * So, if the caller of try_to_release_page() wants the synchronous - * behaviour(i.e make sure buffers are dropped upon return), - * let's wait for the current transaction to finish flush of - * dirty data buffers, then try to free those buffers again, - * with the journal locked. - */ - if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) { - journal_wait_for_transaction_sync_data(journal); - ret = try_to_free_buffers(page); - } - busy: return ret; } diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 17159cacbd9..5d70b3e6d49 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -20,9 +20,9 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> -#include <linux/marker.h> #include <linux/errno.h> #include <linux/slab.h> +#include <trace/events/jbd2.h> /* * Unlink a buffer from a transaction checkpoint list. @@ -358,8 +358,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) * journal straight away. */ result = jbd2_cleanup_journal_tail(journal); - trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", - journal->j_devname, result); + trace_jbd2_checkpoint(journal, result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0b7d3b8226f..7b4088b2364 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -16,7 +16,6 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> -#include <linux/marker.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/mm.h> @@ -26,6 +25,7 @@ #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/bio.h> +#include <trace/events/jbd2.h> /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -253,6 +253,7 @@ static int journal_submit_data_buffers(journal_t *journal, * block allocation with delalloc. We need to write * only allocated blocks here. */ + trace_jbd2_submit_inode_data(jinode->i_vfs_inode); err = journal_submit_inode_data_buffers(mapping); if (!ret) ret = err; @@ -394,8 +395,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction = journal->j_running_transaction; J_ASSERT(commit_transaction->t_state == T_RUNNING); - trace_mark(jbd2_start_commit, "dev %s transaction %d", - journal->j_devname, commit_transaction->t_tid); + trace_jbd2_start_commit(journal, commit_transaction); jbd_debug(1, "JBD: starting commit of transaction %d\n", commit_transaction->t_tid); @@ -409,6 +409,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ if (commit_transaction->t_synchronous_commit) write_op = WRITE_SYNC_PLUG; + trace_jbd2_commit_locking(journal, commit_transaction); stats.u.run.rs_wait = commit_transaction->t_max_wait; stats.u.run.rs_locked = jiffies; stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, @@ -484,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ jbd2_journal_switch_revoke_table(journal); + trace_jbd2_commit_flushing(journal, commit_transaction); stats.u.run.rs_flushing = jiffies; stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked, stats.u.run.rs_flushing); @@ -520,6 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction->t_state = T_COMMIT; spin_unlock(&journal->j_state_lock); + trace_jbd2_commit_logging(journal, commit_transaction); stats.u.run.rs_logging = jiffies; stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, stats.u.run.rs_logging); @@ -1054,9 +1057,7 @@ restart_loop: if (journal->j_commit_callback) journal->j_commit_callback(journal, commit_transaction); - trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", - journal->j_devname, commit_transaction->t_tid, - journal->j_tail_sequence); + trace_jbd2_end_commit(journal, commit_transaction); jbd_debug(1, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); if (to_free) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 62be7d294ec..18bfd5dab64 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -38,6 +38,10 @@ #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/math64.h> +#include <linux/hash.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/jbd2.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -2377,6 +2381,71 @@ static void __exit journal_exit(void) jbd2_journal_destroy_caches(); } +/* + * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 + * tracing infrastructure to map a dev_t to a device name. + * + * The caller should use rcu_read_lock() in order to make sure the + * device name stays valid until its done with it. We use + * rcu_read_lock() as well to make sure we're safe in case the caller + * gets sloppy, and because rcu_read_lock() is cheap and can be safely + * nested. + */ +struct devname_cache { + struct rcu_head rcu; + dev_t device; + char devname[BDEVNAME_SIZE]; +}; +#define CACHE_SIZE_BITS 6 +static struct devname_cache *devcache[1 << CACHE_SIZE_BITS]; +static DEFINE_SPINLOCK(devname_cache_lock); + +static void free_devcache(struct rcu_head *rcu) +{ + kfree(rcu); +} + +const char *jbd2_dev_to_name(dev_t device) +{ + int i = hash_32(device, CACHE_SIZE_BITS); + char *ret; + struct block_device *bd; + + rcu_read_lock(); + if (devcache[i] && devcache[i]->device == device) { + ret = devcache[i]->devname; + rcu_read_unlock(); + return ret; + } + rcu_read_unlock(); + + spin_lock(&devname_cache_lock); + if (devcache[i]) { + if (devcache[i]->device == device) { + ret = devcache[i]->devname; + spin_unlock(&devname_cache_lock); + return ret; + } + call_rcu(&devcache[i]->rcu, free_devcache); + } + devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); + if (!devcache[i]) { + spin_unlock(&devname_cache_lock); + return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ + } + devcache[i]->device = device; + bd = bdget(device); + if (bd) { + bdevname(bd, devcache[i]->devname); + bdput(bd); + } else + __bdevname(device, devcache[i]->devname); + ret = devcache[i]->devname; + spin_unlock(&devname_cache_lock); + return ret; +} +EXPORT_SYMBOL(jbd2_dev_to_name); + MODULE_LICENSE("GPL"); module_init(journal_init); module_exit(journal_exit); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 996ffda06bf..494501edba6 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1547,36 +1547,6 @@ out: return; } -/* - * jbd2_journal_try_to_free_buffers() could race with - * jbd2_journal_commit_transaction(). The later might still hold the - * reference count to the buffers when inspecting them on - * t_syncdata_list or t_locked_list. - * - * jbd2_journal_try_to_free_buffers() will call this function to - * wait for the current transaction to finish syncing data buffers, before - * try to free that buffer. - * - * Called with journal->j_state_lock hold. - */ -static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal) -{ - transaction_t *transaction; - tid_t tid; - - spin_lock(&journal->j_state_lock); - transaction = journal->j_committing_transaction; - - if (!transaction) { - spin_unlock(&journal->j_state_lock); - return; - } - - tid = transaction->t_tid; - spin_unlock(&journal->j_state_lock); - jbd2_log_wait_commit(journal, tid); -} - /** * int jbd2_journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation @@ -1649,25 +1619,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, ret = try_to_free_buffers(page); - /* - * There are a number of places where jbd2_journal_try_to_free_buffers() - * could race with jbd2_journal_commit_transaction(), the later still - * holds the reference to the buffers to free while processing them. - * try_to_free_buffers() failed to free those buffers. Some of the - * caller of releasepage() request page buffers to be dropped, otherwise - * treat the fail-to-free as errors (such as generic_file_direct_IO()) - * - * So, if the caller of try_to_release_page() wants the synchronous - * behaviour(i.e make sure buffers are dropped upon return), - * let's wait for the current transaction to finish flush of - * dirty data buffers, then try to free those buffers again, - * with the journal locked. - */ - if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) { - jbd2_journal_wait_for_transaction_sync_data(journal); - ret = try_to_free_buffers(page); - } - busy: return ret; } diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 1d437de1e9a..7515e73e2bf 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -196,7 +196,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) if (c->nextblock) { ret = file_dirty(c, c->nextblock); if (ret) - return ret; + goto out; /* deleting summary information of the old nextblock */ jffs2_sum_reset_collected(c->summary); } @@ -207,7 +207,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) } else { ret = file_dirty(c, jeb); if (ret) - return ret; + goto out; } break; diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index dd7957064a8..f2fdcbce143 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -126,7 +126,6 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) struct nlm_lock *lock = &argp->lock; nlmclnt_next_cookie(&argp->cookie); - argp->state = nsm_local_state; memcpy(&lock->fh, NFS_FH(fl->fl_file->f_path.dentry->d_inode), sizeof(struct nfs_fh)); lock->caller = utsname()->nodename; lock->oh.data = req->a_owner; @@ -165,6 +164,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) /* Set up the argument struct */ nlmclnt_setlockargs(call, fl); + lock_kernel(); if (IS_SETLK(cmd) || IS_SETLKW(cmd)) { if (fl->fl_type != F_UNLCK) { call->a_args.block = IS_SETLKW(cmd) ? 1 : 0; @@ -178,6 +178,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) fl->fl_ops->fl_release_private(fl); fl->fl_ops = NULL; + unlock_kernel(); dprintk("lockd: clnt proc returns %d\n", status); return status; @@ -519,6 +520,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) if (nsm_monitor(host) < 0) goto out; + req->a_args.state = nsm_local_state; fl->fl_flags |= FL_ACCESS; status = do_vfs_lock(fl); diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 6d5d4a4169e..7fce1b52584 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -53,7 +53,7 @@ static DEFINE_SPINLOCK(nsm_lock); /* * Local NSM state */ -int __read_mostly nsm_local_state; +u32 __read_mostly nsm_local_state; int __read_mostly nsm_use_hostnames; static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) @@ -112,6 +112,7 @@ static struct rpc_clnt *nsm_create(void) .program = &nsm_program, .version = NSM_VERSION, .authflavor = RPC_AUTH_NULL, + .flags = RPC_CLNT_CREATE_NOPING, }; return rpc_create(&args); @@ -184,13 +185,19 @@ int nsm_monitor(const struct nlm_host *host) nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; status = nsm_mon_unmon(nsm, NSMPROC_MON, &res); - if (res.status != 0) + if (unlikely(res.status != 0)) status = -EIO; - if (status < 0) + if (unlikely(status < 0)) { printk(KERN_NOTICE "lockd: cannot monitor %s\n", nsm->sm_name); - else - nsm->sm_monitored = 1; - return status; + return status; + } + + nsm->sm_monitored = 1; + if (unlikely(nsm_local_state != res.state)) { + nsm_local_state = res.state; + dprintk("lockd: NSM state changed to %d\n", nsm_local_state); + } + return 0; } /** diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 83ee34203bd..e577a78d7ba 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -326,6 +326,8 @@ static void nlmsvc_freegrantargs(struct nlm_rqst *call) { if (call->a_args.lock.oh.data != call->a_owner) kfree(call->a_args.lock.oh.data); + + locks_release_private(&call->a_args.lock.fl); } /* diff --git a/fs/locks.c b/fs/locks.c index ec3deea29e3..b6440f52178 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -151,7 +151,7 @@ static struct file_lock *locks_alloc_lock(void) return kmem_cache_alloc(filelock_cache, GFP_KERNEL); } -static void locks_release_private(struct file_lock *fl) +void locks_release_private(struct file_lock *fl) { if (fl->fl_ops) { if (fl->fl_ops->fl_release_private) @@ -165,6 +165,7 @@ static void locks_release_private(struct file_lock *fl) } } +EXPORT_SYMBOL_GPL(locks_release_private); /* Free a lock which is not in use. */ static void locks_free_lock(struct file_lock *fl) diff --git a/fs/minix/minix.h b/fs/minix/minix.h index cb7fdd11f9a..9dcf95b4211 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -1,3 +1,6 @@ +#ifndef FS_MINIX_H +#define FS_MINIX_H + #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/minix_fs.h> @@ -86,3 +89,5 @@ static inline struct minix_inode_info *minix_i(struct inode *inode) { return list_entry(inode, struct minix_inode_info, vfs_inode); } + +#endif /* FS_MINIX_H */ diff --git a/fs/namespace.c b/fs/namespace.c index 2dd333b0fe7..a7bea8c8bd4 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1937,6 +1937,21 @@ dput_out: return retval; } +static struct mnt_namespace *alloc_mnt_ns(void) +{ + struct mnt_namespace *new_ns; + + new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); + if (!new_ns) + return ERR_PTR(-ENOMEM); + atomic_set(&new_ns->count, 1); + new_ns->root = NULL; + INIT_LIST_HEAD(&new_ns->list); + init_waitqueue_head(&new_ns->poll); + new_ns->event = 0; + return new_ns; +} + /* * Allocate a new namespace structure and populate it with contents * copied from the namespace of the passed in task structure. @@ -1948,14 +1963,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; struct vfsmount *p, *q; - new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); - if (!new_ns) - return ERR_PTR(-ENOMEM); - - atomic_set(&new_ns->count, 1); - INIT_LIST_HEAD(&new_ns->list); - init_waitqueue_head(&new_ns->poll); - new_ns->event = 0; + new_ns = alloc_mnt_ns(); + if (IS_ERR(new_ns)) + return new_ns; down_write(&namespace_sem); /* First pass: copy the tree topology */ @@ -2019,6 +2029,24 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, return new_ns; } +/** + * create_mnt_ns - creates a private namespace and adds a root filesystem + * @mnt: pointer to the new root filesystem mountpoint + */ +struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) +{ + struct mnt_namespace *new_ns; + + new_ns = alloc_mnt_ns(); + if (!IS_ERR(new_ns)) { + mnt->mnt_ns = new_ns; + new_ns->root = mnt; + list_add(&new_ns->list, &new_ns->root->mnt_list); + } + return new_ns; +} +EXPORT_SYMBOL(create_mnt_ns); + SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, char __user *, type, unsigned long, flags, void __user *, data) { @@ -2246,10 +2274,14 @@ void __init mnt_init(void) init_mount_tree(); } -void __put_mnt_ns(struct mnt_namespace *ns) +void put_mnt_ns(struct mnt_namespace *ns) { - struct vfsmount *root = ns->root; + struct vfsmount *root; LIST_HEAD(umount_list); + + if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock)) + return; + root = ns->root; ns->root = NULL; spin_unlock(&vfsmount_lock); down_write(&namespace_sem); @@ -2260,3 +2292,4 @@ void __put_mnt_ns(struct mnt_namespace *ns) release_mounts(&umount_list); kfree(ns); } +EXPORT_SYMBOL(put_mnt_ns); diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index e67f3ec0773..2a77bc25d5a 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -1,6 +1,6 @@ config NFS_FS tristate "NFS client support" - depends on INET + depends on INET && FILE_LOCKING select LOCKD select SUNRPC select NFS_ACL_SUPPORT if NFS_V3_ACL @@ -74,6 +74,15 @@ config NFS_V4 If unsure, say N. +config NFS_V4_1 + bool "NFS client support for NFSv4.1 (DEVELOPER ONLY)" + depends on NFS_V4 && EXPERIMENTAL + help + This option enables support for minor version 1 of the NFSv4 protocol + (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. + + Unless you're an NFS developer, say N. + config ROOT_NFS bool "Root file system on NFS" depends on NFS_FS=y && IP_PNP diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index a886e692ddd..7f604c7941f 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -17,6 +17,9 @@ #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/sunrpc/svcauth_gss.h> +#if defined(CONFIG_NFS_V4_1) +#include <linux/sunrpc/bc_xprt.h> +#endif #include <net/inet_sock.h> @@ -28,11 +31,12 @@ struct nfs_callback_data { unsigned int users; + struct svc_serv *serv; struct svc_rqst *rqst; struct task_struct *task; }; -static struct nfs_callback_data nfs_callback_info; +static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1]; static DEFINE_MUTEX(nfs_callback_mutex); static struct svc_program nfs4_callback_program; @@ -56,10 +60,10 @@ module_param_call(callback_tcpport, param_set_port, param_get_int, &nfs_callback_set_tcpport, 0644); /* - * This is the callback kernel thread. + * This is the NFSv4 callback kernel thread. */ static int -nfs_callback_svc(void *vrqstp) +nfs4_callback_svc(void *vrqstp) { int err, preverr = 0; struct svc_rqst *rqstp = vrqstp; @@ -97,20 +101,12 @@ nfs_callback_svc(void *vrqstp) } /* - * Bring up the callback thread if it is not already up. + * Prepare to bring up the NFSv4 callback service */ -int nfs_callback_up(void) +struct svc_rqst * +nfs4_callback_up(struct svc_serv *serv) { - struct svc_serv *serv = NULL; - int ret = 0; - - mutex_lock(&nfs_callback_mutex); - if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) - goto out; - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); - ret = -ENOMEM; - if (!serv) - goto out_err; + int ret; ret = svc_create_xprt(serv, "tcp", PF_INET, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); @@ -127,27 +123,174 @@ int nfs_callback_up(void) nfs_callback_tcpport6 = ret; dprintk("NFS: Callback listener port = %u (af %u)\n", nfs_callback_tcpport6, PF_INET6); - } else if (ret != -EAFNOSUPPORT) + } else if (ret == -EAFNOSUPPORT) + ret = 0; + else goto out_err; #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ - nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); - if (IS_ERR(nfs_callback_info.rqst)) { - ret = PTR_ERR(nfs_callback_info.rqst); - nfs_callback_info.rqst = NULL; + return svc_prepare_thread(serv, &serv->sv_pools[0]); + +out_err: + if (ret == 0) + ret = -ENOMEM; + return ERR_PTR(ret); +} + +#if defined(CONFIG_NFS_V4_1) +/* + * The callback service for NFSv4.1 callbacks + */ +static int +nfs41_callback_svc(void *vrqstp) +{ + struct svc_rqst *rqstp = vrqstp; + struct svc_serv *serv = rqstp->rq_server; + struct rpc_rqst *req; + int error; + DEFINE_WAIT(wq); + + set_freezable(); + + /* + * FIXME: do we really need to run this under the BKL? If so, please + * add a comment about what it's intended to protect. + */ + lock_kernel(); + while (!kthread_should_stop()) { + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); + spin_lock_bh(&serv->sv_cb_lock); + if (!list_empty(&serv->sv_cb_list)) { + req = list_first_entry(&serv->sv_cb_list, + struct rpc_rqst, rq_bc_list); + list_del(&req->rq_bc_list); + spin_unlock_bh(&serv->sv_cb_lock); + dprintk("Invoking bc_svc_process()\n"); + error = bc_svc_process(serv, req, rqstp); + dprintk("bc_svc_process() returned w/ error code= %d\n", + error); + } else { + spin_unlock_bh(&serv->sv_cb_lock); + schedule(); + } + finish_wait(&serv->sv_cb_waitq, &wq); + } + unlock_kernel(); + return 0; +} + +/* + * Bring up the NFSv4.1 callback service + */ +struct svc_rqst * +nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) +{ + struct svc_xprt *bc_xprt; + struct svc_rqst *rqstp = ERR_PTR(-ENOMEM); + + dprintk("--> %s\n", __func__); + /* Create a svc_sock for the service */ + bc_xprt = svc_sock_create(serv, xprt->prot); + if (!bc_xprt) + goto out; + + /* + * Save the svc_serv in the transport so that it can + * be referenced when the session backchannel is initialized + */ + serv->bc_xprt = bc_xprt; + xprt->bc_serv = serv; + + INIT_LIST_HEAD(&serv->sv_cb_list); + spin_lock_init(&serv->sv_cb_lock); + init_waitqueue_head(&serv->sv_cb_waitq); + rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); + if (IS_ERR(rqstp)) + svc_sock_destroy(bc_xprt); +out: + dprintk("--> %s return %p\n", __func__, rqstp); + return rqstp; +} + +static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, + struct svc_serv *serv, struct rpc_xprt *xprt, + struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) +{ + if (minorversion) { + *rqstpp = nfs41_callback_up(serv, xprt); + *callback_svc = nfs41_callback_svc; + } + return minorversion; +} + +static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, + struct nfs_callback_data *cb_info) +{ + if (minorversion) + xprt->bc_serv = cb_info->serv; +} +#else +static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, + struct svc_serv *serv, struct rpc_xprt *xprt, + struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) +{ + return 0; +} + +static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, + struct nfs_callback_data *cb_info) +{ +} +#endif /* CONFIG_NFS_V4_1 */ + +/* + * Bring up the callback thread if it is not already up. + */ +int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) +{ + struct svc_serv *serv = NULL; + struct svc_rqst *rqstp; + int (*callback_svc)(void *vrqstp); + struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + char svc_name[12]; + int ret = 0; + int minorversion_setup; + + mutex_lock(&nfs_callback_mutex); + if (cb_info->users++ || cb_info->task != NULL) { + nfs_callback_bc_serv(minorversion, xprt, cb_info); + goto out; + } + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); + if (!serv) { + ret = -ENOMEM; + goto out_err; + } + + minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, + serv, xprt, &rqstp, &callback_svc); + if (!minorversion_setup) { + /* v4.0 callback setup */ + rqstp = nfs4_callback_up(serv); + callback_svc = nfs4_callback_svc; + } + + if (IS_ERR(rqstp)) { + ret = PTR_ERR(rqstp); goto out_err; } svc_sock_update_bufs(serv); - nfs_callback_info.task = kthread_run(nfs_callback_svc, - nfs_callback_info.rqst, - "nfsv4-svc"); - if (IS_ERR(nfs_callback_info.task)) { - ret = PTR_ERR(nfs_callback_info.task); - svc_exit_thread(nfs_callback_info.rqst); - nfs_callback_info.rqst = NULL; - nfs_callback_info.task = NULL; + sprintf(svc_name, "nfsv4.%u-svc", minorversion); + cb_info->serv = serv; + cb_info->rqst = rqstp; + cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name); + if (IS_ERR(cb_info->task)) { + ret = PTR_ERR(cb_info->task); + svc_exit_thread(cb_info->rqst); + cb_info->rqst = NULL; + cb_info->task = NULL; goto out_err; } out: @@ -164,22 +307,25 @@ out: out_err: dprintk("NFS: Couldn't create callback socket or server thread; " "err = %d\n", ret); - nfs_callback_info.users--; + cb_info->users--; goto out; } /* * Kill the callback thread if it's no longer being used. */ -void nfs_callback_down(void) +void nfs_callback_down(int minorversion) { + struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + mutex_lock(&nfs_callback_mutex); - nfs_callback_info.users--; - if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) { - kthread_stop(nfs_callback_info.task); - svc_exit_thread(nfs_callback_info.rqst); - nfs_callback_info.rqst = NULL; - nfs_callback_info.task = NULL; + cb_info->users--; + if (cb_info->users == 0 && cb_info->task != NULL) { + kthread_stop(cb_info->task); + svc_exit_thread(cb_info->rqst); + cb_info->serv = NULL; + cb_info->rqst = NULL; + cb_info->task = NULL; } mutex_unlock(&nfs_callback_mutex); } diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index e110e286a26..07baa8254ca 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -20,13 +20,24 @@ enum nfs4_callback_procnum { enum nfs4_callback_opnum { OP_CB_GETATTR = 3, OP_CB_RECALL = 4, +/* Callback operations new to NFSv4.1 */ + OP_CB_LAYOUTRECALL = 5, + OP_CB_NOTIFY = 6, + OP_CB_PUSH_DELEG = 7, + OP_CB_RECALL_ANY = 8, + OP_CB_RECALLABLE_OBJ_AVAIL = 9, + OP_CB_RECALL_SLOT = 10, + OP_CB_SEQUENCE = 11, + OP_CB_WANTS_CANCELLED = 12, + OP_CB_NOTIFY_LOCK = 13, + OP_CB_NOTIFY_DEVICEID = 14, OP_CB_ILLEGAL = 10044, }; struct cb_compound_hdr_arg { unsigned int taglen; const char *tag; - unsigned int callback_ident; + unsigned int minorversion; unsigned nops; }; @@ -59,16 +70,59 @@ struct cb_recallargs { uint32_t truncate; }; +#if defined(CONFIG_NFS_V4_1) + +struct referring_call { + uint32_t rc_sequenceid; + uint32_t rc_slotid; +}; + +struct referring_call_list { + struct nfs4_sessionid rcl_sessionid; + uint32_t rcl_nrefcalls; + struct referring_call *rcl_refcalls; +}; + +struct cb_sequenceargs { + struct sockaddr *csa_addr; + struct nfs4_sessionid csa_sessionid; + uint32_t csa_sequenceid; + uint32_t csa_slotid; + uint32_t csa_highestslotid; + uint32_t csa_cachethis; + uint32_t csa_nrclists; + struct referring_call_list *csa_rclists; +}; + +struct cb_sequenceres { + __be32 csr_status; + struct nfs4_sessionid csr_sessionid; + uint32_t csr_sequenceid; + uint32_t csr_slotid; + uint32_t csr_highestslotid; + uint32_t csr_target_highestslotid; +}; + +extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, + struct cb_sequenceres *res); + +#endif /* CONFIG_NFS_V4_1 */ + extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy); #ifdef CONFIG_NFS_V4 -extern int nfs_callback_up(void); -extern void nfs_callback_down(void); -#else -#define nfs_callback_up() (0) -#define nfs_callback_down() do {} while(0) -#endif +extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); +extern void nfs_callback_down(int minorversion); +#endif /* CONFIG_NFS_V4 */ + +/* + * nfs41: Callbacks are expected to not cause substantial latency, + * so we limit their concurrency to 1 by setting up the maximum number + * of slots for the backchannel. + */ +#define NFS41_BC_MIN_CALLBACKS 1 +#define NFS41_BC_MAX_CALLBACKS 1 extern unsigned int nfs_callback_set_tcpport; extern unsigned short nfs_callback_tcpport; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index f7e83e23cf9..b7da1f54da6 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -101,3 +101,130 @@ out: dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); return res; } + +#if defined(CONFIG_NFS_V4_1) + +/* + * Validate the sequenceID sent by the server. + * Return success if the sequenceID is one more than what we last saw on + * this slot, accounting for wraparound. Increments the slot's sequence. + * + * We don't yet implement a duplicate request cache, so at this time + * we will log replays, and process them as if we had not seen them before, + * but we don't bump the sequence in the slot. Not too worried about it, + * since we only currently implement idempotent callbacks anyway. + * + * We have a single slot backchannel at this time, so we don't bother + * checking the used_slots bit array on the table. The lower layer guarantees + * a single outstanding callback request at a time. + */ +static int +validate_seqid(struct nfs4_slot_table *tbl, u32 slotid, u32 seqid) +{ + struct nfs4_slot *slot; + + dprintk("%s enter. slotid %d seqid %d\n", + __func__, slotid, seqid); + + if (slotid > NFS41_BC_MAX_CALLBACKS) + return htonl(NFS4ERR_BADSLOT); + + slot = tbl->slots + slotid; + dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr); + + /* Normal */ + if (likely(seqid == slot->seq_nr + 1)) { + slot->seq_nr++; + return htonl(NFS4_OK); + } + + /* Replay */ + if (seqid == slot->seq_nr) { + dprintk("%s seqid %d is a replay - no DRC available\n", + __func__, seqid); + return htonl(NFS4_OK); + } + + /* Wraparound */ + if (seqid == 1 && (slot->seq_nr + 1) == 0) { + slot->seq_nr = 1; + return htonl(NFS4_OK); + } + + /* Misordered request */ + return htonl(NFS4ERR_SEQ_MISORDERED); +} + +/* + * Returns a pointer to a held 'struct nfs_client' that matches the server's + * address, major version number, and session ID. It is the caller's + * responsibility to release the returned reference. + * + * Returns NULL if there are no connections with sessions, or if no session + * matches the one of interest. + */ + static struct nfs_client *find_client_with_session( + const struct sockaddr *addr, u32 nfsversion, + struct nfs4_sessionid *sessionid) +{ + struct nfs_client *clp; + + clp = nfs_find_client(addr, 4); + if (clp == NULL) + return NULL; + + do { + struct nfs_client *prev = clp; + + if (clp->cl_session != NULL) { + if (memcmp(clp->cl_session->sess_id.data, + sessionid->data, + NFS4_MAX_SESSIONID_LEN) == 0) { + /* Returns a held reference to clp */ + return clp; + } + } + clp = nfs_find_client_next(prev); + nfs_put_client(prev); + } while (clp != NULL); + + return NULL; +} + +/* FIXME: referring calls should be processed */ +unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, + struct cb_sequenceres *res) +{ + struct nfs_client *clp; + int i, status; + + for (i = 0; i < args->csa_nrclists; i++) + kfree(args->csa_rclists[i].rcl_refcalls); + kfree(args->csa_rclists); + + status = htonl(NFS4ERR_BADSESSION); + clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid); + if (clp == NULL) + goto out; + + status = validate_seqid(&clp->cl_session->bc_slot_table, + args->csa_slotid, args->csa_sequenceid); + if (status) + goto out_putclient; + + memcpy(&res->csr_sessionid, &args->csa_sessionid, + sizeof(res->csr_sessionid)); + res->csr_sequenceid = args->csa_sequenceid; + res->csr_slotid = args->csa_slotid; + res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + +out_putclient: + nfs_put_client(clp); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + res->csr_status = status; + return res->csr_status; +} + +#endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index dd0ef34b584..e5a2dac5f71 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -20,6 +20,11 @@ 2 + 2 + 3 + 3) #define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) +#if defined(CONFIG_NFS_V4_1) +#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ + 4 + 1 + 3) +#endif /* CONFIG_NFS_V4_1 */ + #define NFSDBG_FACILITY NFSDBG_CALLBACK typedef __be32 (*callback_process_op_t)(void *, void *); @@ -132,7 +137,6 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr) { __be32 *p; - unsigned int minor_version; __be32 status; status = decode_string(xdr, &hdr->taglen, &hdr->tag); @@ -147,15 +151,19 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound p = read_buf(xdr, 12); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - minor_version = ntohl(*p++); - /* Check minor version is zero. */ - if (minor_version != 0) { - printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n", - __func__, minor_version); + hdr->minorversion = ntohl(*p++); + /* Check minor version is zero or one. */ + if (hdr->minorversion <= 1) { + p++; /* skip callback_ident */ + } else { + printk(KERN_WARNING "%s: NFSv4 server callback with " + "illegal minor version %u!\n", + __func__, hdr->minorversion); return htonl(NFS4ERR_MINOR_VERS_MISMATCH); } - hdr->callback_ident = ntohl(*p++); hdr->nops = ntohl(*p); + dprintk("%s: minorversion %d nops %d\n", __func__, + hdr->minorversion, hdr->nops); return 0; } @@ -204,6 +212,122 @@ out: return status; } +#if defined(CONFIG_NFS_V4_1) + +static unsigned decode_sessionid(struct xdr_stream *xdr, + struct nfs4_sessionid *sid) +{ + uint32_t *p; + int len = NFS4_MAX_SESSIONID_LEN; + + p = read_buf(xdr, len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE);; + + memcpy(sid->data, p, len); + return 0; +} + +static unsigned decode_rc_list(struct xdr_stream *xdr, + struct referring_call_list *rc_list) +{ + uint32_t *p; + int i; + unsigned status; + + status = decode_sessionid(xdr, &rc_list->rcl_sessionid); + if (status) + goto out; + + status = htonl(NFS4ERR_RESOURCE); + p = read_buf(xdr, sizeof(uint32_t)); + if (unlikely(p == NULL)) + goto out; + + rc_list->rcl_nrefcalls = ntohl(*p++); + if (rc_list->rcl_nrefcalls) { + p = read_buf(xdr, + rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t)); + if (unlikely(p == NULL)) + goto out; + rc_list->rcl_refcalls = kmalloc(rc_list->rcl_nrefcalls * + sizeof(*rc_list->rcl_refcalls), + GFP_KERNEL); + if (unlikely(rc_list->rcl_refcalls == NULL)) + goto out; + for (i = 0; i < rc_list->rcl_nrefcalls; i++) { + rc_list->rcl_refcalls[i].rc_sequenceid = ntohl(*p++); + rc_list->rcl_refcalls[i].rc_slotid = ntohl(*p++); + } + } + status = 0; + +out: + return status; +} + +static unsigned decode_cb_sequence_args(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct cb_sequenceargs *args) +{ + uint32_t *p; + int i; + unsigned status; + + status = decode_sessionid(xdr, &args->csa_sessionid); + if (status) + goto out; + + status = htonl(NFS4ERR_RESOURCE); + p = read_buf(xdr, 5 * sizeof(uint32_t)); + if (unlikely(p == NULL)) + goto out; + + args->csa_addr = svc_addr(rqstp); + args->csa_sequenceid = ntohl(*p++); + args->csa_slotid = ntohl(*p++); + args->csa_highestslotid = ntohl(*p++); + args->csa_cachethis = ntohl(*p++); + args->csa_nrclists = ntohl(*p++); + args->csa_rclists = NULL; + if (args->csa_nrclists) { + args->csa_rclists = kmalloc(args->csa_nrclists * + sizeof(*args->csa_rclists), + GFP_KERNEL); + if (unlikely(args->csa_rclists == NULL)) + goto out; + + for (i = 0; i < args->csa_nrclists; i++) { + status = decode_rc_list(xdr, &args->csa_rclists[i]); + if (status) + goto out_free; + } + } + status = 0; + + dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u slotid %u " + "highestslotid %u cachethis %d nrclists %u\n", + __func__, + ((u32 *)&args->csa_sessionid)[0], + ((u32 *)&args->csa_sessionid)[1], + ((u32 *)&args->csa_sessionid)[2], + ((u32 *)&args->csa_sessionid)[3], + args->csa_sequenceid, args->csa_slotid, + args->csa_highestslotid, args->csa_cachethis, + args->csa_nrclists); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; + +out_free: + for (i = 0; i < args->csa_nrclists; i++) + kfree(args->csa_rclists[i].rcl_refcalls); + kfree(args->csa_rclists); + goto out; +} + +#endif /* CONFIG_NFS_V4_1 */ + static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) { __be32 *p; @@ -353,31 +477,134 @@ out: return status; } -static __be32 process_op(struct svc_rqst *rqstp, +#if defined(CONFIG_NFS_V4_1) + +static unsigned encode_sessionid(struct xdr_stream *xdr, + const struct nfs4_sessionid *sid) +{ + uint32_t *p; + int len = NFS4_MAX_SESSIONID_LEN; + + p = xdr_reserve_space(xdr, len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + + memcpy(p, sid, len); + return 0; +} + +static unsigned encode_cb_sequence_res(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + const struct cb_sequenceres *res) +{ + uint32_t *p; + unsigned status = res->csr_status; + + if (unlikely(status != 0)) + goto out; + + encode_sessionid(xdr, &res->csr_sessionid); + + p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t)); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + + *p++ = htonl(res->csr_sequenceid); + *p++ = htonl(res->csr_slotid); + *p++ = htonl(res->csr_highestslotid); + *p++ = htonl(res->csr_target_highestslotid); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; +} + +static __be32 +preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) +{ + if (op_nr == OP_CB_SEQUENCE) { + if (nop != 0) + return htonl(NFS4ERR_SEQUENCE_POS); + } else { + if (nop == 0) + return htonl(NFS4ERR_OP_NOT_IN_SESSION); + } + + switch (op_nr) { + case OP_CB_GETATTR: + case OP_CB_RECALL: + case OP_CB_SEQUENCE: + *op = &callback_ops[op_nr]; + break; + + case OP_CB_LAYOUTRECALL: + case OP_CB_NOTIFY_DEVICEID: + case OP_CB_NOTIFY: + case OP_CB_PUSH_DELEG: + case OP_CB_RECALL_ANY: + case OP_CB_RECALLABLE_OBJ_AVAIL: + case OP_CB_RECALL_SLOT: + case OP_CB_WANTS_CANCELLED: + case OP_CB_NOTIFY_LOCK: + return htonl(NFS4ERR_NOTSUPP); + + default: + return htonl(NFS4ERR_OP_ILLEGAL); + } + + return htonl(NFS_OK); +} + +#else /* CONFIG_NFS_V4_1 */ + +static __be32 +preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) +{ + return htonl(NFS4ERR_MINOR_VERS_MISMATCH); +} + +#endif /* CONFIG_NFS_V4_1 */ + +static __be32 +preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) +{ + switch (op_nr) { + case OP_CB_GETATTR: + case OP_CB_RECALL: + *op = &callback_ops[op_nr]; + break; + default: + return htonl(NFS4ERR_OP_ILLEGAL); + } + + return htonl(NFS_OK); +} + +static __be32 process_op(uint32_t minorversion, int nop, + struct svc_rqst *rqstp, struct xdr_stream *xdr_in, void *argp, struct xdr_stream *xdr_out, void *resp) { struct callback_op *op = &callback_ops[0]; unsigned int op_nr = OP_CB_ILLEGAL; - __be32 status = 0; + __be32 status; long maxlen; __be32 res; dprintk("%s: start\n", __func__); status = decode_op_hdr(xdr_in, &op_nr); - if (likely(status == 0)) { - switch (op_nr) { - case OP_CB_GETATTR: - case OP_CB_RECALL: - op = &callback_ops[op_nr]; - break; - default: - op_nr = OP_CB_ILLEGAL; - op = &callback_ops[0]; - status = htonl(NFS4ERR_OP_ILLEGAL); - } + if (unlikely(status)) { + status = htonl(NFS4ERR_OP_ILLEGAL); + goto out; } + dprintk("%s: minorversion=%d nop=%d op_nr=%u\n", + __func__, minorversion, nop, op_nr); + + status = minorversion ? preprocess_nfs41_op(nop, op_nr, &op) : + preprocess_nfs4_op(op_nr, &op); + if (status == htonl(NFS4ERR_OP_ILLEGAL)) + op_nr = OP_CB_ILLEGAL; +out: maxlen = xdr_out->end - xdr_out->p; if (maxlen > 0 && maxlen < PAGE_SIZE) { if (likely(status == 0 && op->decode_args != NULL)) @@ -425,7 +652,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r return rpc_system_err; while (status == 0 && nops != hdr_arg.nops) { - status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp); + status = process_op(hdr_arg.minorversion, nops, + rqstp, &xdr_in, argp, &xdr_out, resp); nops++; } @@ -452,7 +680,15 @@ static struct callback_op callback_ops[] = { .process_op = (callback_process_op_t)nfs4_callback_recall, .decode_args = (callback_decode_arg_t)decode_recall_args, .res_maxsize = CB_OP_RECALL_RES_MAXSZ, - } + }, +#if defined(CONFIG_NFS_V4_1) + [OP_CB_SEQUENCE] = { + .process_op = (callback_process_op_t)nfs4_callback_sequence, + .decode_args = (callback_decode_arg_t)decode_cb_sequence_args, + .encode_res = (callback_encode_res_t)encode_cb_sequence_res, + .res_maxsize = CB_OP_SEQUENCE_RES_MAXSZ, + }, +#endif /* CONFIG_NFS_V4_1 */ }; /* diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 75c9cd2aa11..c2d061675d8 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -37,6 +37,7 @@ #include <linux/in6.h> #include <net/ipv6.h> #include <linux/nfs_xdr.h> +#include <linux/sunrpc/bc_xprt.h> #include <asm/system.h> @@ -102,6 +103,7 @@ struct nfs_client_initdata { size_t addrlen; const struct nfs_rpc_ops *rpc_ops; int proto; + u32 minorversion; }; /* @@ -114,18 +116,13 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ { struct nfs_client *clp; struct rpc_cred *cred; + int err = -ENOMEM; if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; clp->rpc_ops = cl_init->rpc_ops; - if (cl_init->rpc_ops->version == 4) { - if (nfs_callback_up() < 0) - goto error_2; - __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); - } - atomic_set(&clp->cl_count, 1); clp->cl_cons_state = NFS_CS_INITING; @@ -133,9 +130,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_addrlen = cl_init->addrlen; if (cl_init->hostname) { + err = -ENOMEM; clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL); if (!clp->cl_hostname) - goto error_3; + goto error_cleanup; } INIT_LIST_HEAD(&clp->cl_superblocks); @@ -150,6 +148,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); clp->cl_boot_time = CURRENT_TIME; clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; + clp->cl_minorversion = cl_init->minorversion; #endif cred = rpc_lookup_machine_cred(); if (!IS_ERR(cred)) @@ -159,13 +158,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ return clp; -error_3: - if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) - nfs_callback_down(); -error_2: +error_cleanup: kfree(clp); error_0: - return NULL; + return ERR_PTR(err); } static void nfs4_shutdown_client(struct nfs_client *clp) @@ -182,12 +178,42 @@ static void nfs4_shutdown_client(struct nfs_client *clp) } /* + * Destroy the NFS4 callback service + */ +static void nfs4_destroy_callback(struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4 + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) + nfs_callback_down(clp->cl_minorversion); +#endif /* CONFIG_NFS_V4 */ +} + +/* + * Clears/puts all minor version specific parts from an nfs_client struct + * reverting it to minorversion 0. + */ +static void nfs4_clear_client_minor_version(struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4_1 + if (nfs4_has_session(clp)) { + nfs4_destroy_session(clp->cl_session); + clp->cl_session = NULL; + } + + clp->cl_call_sync = _nfs4_call_sync; +#endif /* CONFIG_NFS_V4_1 */ + + nfs4_destroy_callback(clp); +} + +/* * Destroy a shared client record */ static void nfs_free_client(struct nfs_client *clp) { dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version); + nfs4_clear_client_minor_version(clp); nfs4_shutdown_client(clp); nfs_fscache_release_client_cookie(clp); @@ -196,9 +222,6 @@ static void nfs_free_client(struct nfs_client *clp) if (!IS_ERR(clp->cl_rpcclient)) rpc_shutdown_client(clp->cl_rpcclient); - if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) - nfs_callback_down(); - if (clp->cl_machine_cred != NULL) put_rpccred(clp->cl_machine_cred); @@ -347,7 +370,8 @@ struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion) struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; /* Don't match clients that failed to initialise properly */ - if (clp->cl_cons_state != NFS_CS_READY) + if (!(clp->cl_cons_state == NFS_CS_READY || + clp->cl_cons_state == NFS_CS_SESSION_INITING)) continue; /* Different NFS versions cannot share the same nfs_client */ @@ -420,7 +444,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat if (clp->cl_proto != data->proto) continue; - + /* Match nfsv4 minorversion */ + if (clp->cl_minorversion != data->minorversion) + continue; /* Match the full socket address */ if (!nfs_sockaddr_cmp(sap, clap)) continue; @@ -456,9 +482,10 @@ static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_in spin_unlock(&nfs_client_lock); new = nfs_alloc_client(cl_init); - } while (new); + } while (!IS_ERR(new)); - return ERR_PTR(-ENOMEM); + dprintk("--> nfs_get_client() = %ld [failed]\n", PTR_ERR(new)); + return new; /* install a new client and return with it unready */ install_client: @@ -478,7 +505,7 @@ found_client: nfs_free_client(new); error = wait_event_killable(nfs_client_active_wq, - clp->cl_cons_state != NFS_CS_INITING); + clp->cl_cons_state < NFS_CS_INITING); if (error < 0) { nfs_put_client(clp); return ERR_PTR(-ERESTARTSYS); @@ -499,13 +526,29 @@ found_client: /* * Mark a server as ready or failed */ -static void nfs_mark_client_ready(struct nfs_client *clp, int state) +void nfs_mark_client_ready(struct nfs_client *clp, int state) { clp->cl_cons_state = state; wake_up_all(&nfs_client_active_wq); } /* + * With sessions, the client is not marked ready until after a + * successful EXCHANGE_ID and CREATE_SESSION. + * + * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate + * other versions of NFS can be tried. + */ +int nfs4_check_client_ready(struct nfs_client *clp) +{ + if (!nfs4_has_session(clp)) + return 0; + if (clp->cl_cons_state < NFS_CS_READY) + return -EPROTONOSUPPORT; + return 0; +} + +/* * Initialise the timeout values for a connection */ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, @@ -1050,6 +1093,61 @@ error: #ifdef CONFIG_NFS_V4 /* + * Initialize the NFS4 callback service + */ +static int nfs4_init_callback(struct nfs_client *clp) +{ + int error; + + if (clp->rpc_ops->version == 4) { + if (nfs4_has_session(clp)) { + error = xprt_setup_backchannel( + clp->cl_rpcclient->cl_xprt, + NFS41_BC_MIN_CALLBACKS); + if (error < 0) + return error; + } + + error = nfs_callback_up(clp->cl_minorversion, + clp->cl_rpcclient->cl_xprt); + if (error < 0) { + dprintk("%s: failed to start callback. Error = %d\n", + __func__, error); + return error; + } + __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + } + return 0; +} + +/* + * Initialize the minor version specific parts of an NFS4 client record + */ +static int nfs4_init_client_minor_version(struct nfs_client *clp) +{ + clp->cl_call_sync = _nfs4_call_sync; + +#if defined(CONFIG_NFS_V4_1) + if (clp->cl_minorversion) { + struct nfs4_session *session = NULL; + /* + * Create the session and mark it expired. + * When a SEQUENCE operation encounters the expired session + * it will do session recovery to initialize it. + */ + session = nfs4_alloc_session(clp); + if (!session) + return -ENOMEM; + + clp->cl_session = session; + clp->cl_call_sync = _nfs4_call_sync_session; + } +#endif /* CONFIG_NFS_V4_1 */ + + return nfs4_init_callback(clp); +} + +/* * Initialise an NFS4 client record */ static int nfs4_init_client(struct nfs_client *clp, @@ -1083,7 +1181,12 @@ static int nfs4_init_client(struct nfs_client *clp, } __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); - nfs_mark_client_ready(clp, NFS_CS_READY); + error = nfs4_init_client_minor_version(clp); + if (error < 0) + goto error; + + if (!nfs4_has_session(clp)) + nfs_mark_client_ready(clp, NFS_CS_READY); return 0; error: @@ -1101,7 +1204,8 @@ static int nfs4_set_client(struct nfs_server *server, const size_t addrlen, const char *ip_addr, rpc_authflavor_t authflavour, - int proto, const struct rpc_timeout *timeparms) + int proto, const struct rpc_timeout *timeparms, + u32 minorversion) { struct nfs_client_initdata cl_init = { .hostname = hostname, @@ -1109,6 +1213,7 @@ static int nfs4_set_client(struct nfs_server *server, .addrlen = addrlen, .rpc_ops = &nfs_v4_clientops, .proto = proto, + .minorversion = minorversion, }; struct nfs_client *clp; int error; @@ -1138,6 +1243,36 @@ error: } /* + * Initialize a session. + * Note: save the mount rsize and wsize for create_server negotiation. + */ +static void nfs4_init_session(struct nfs_client *clp, + unsigned int wsize, unsigned int rsize) +{ +#if defined(CONFIG_NFS_V4_1) + if (nfs4_has_session(clp)) { + clp->cl_session->fc_attrs.max_rqst_sz = wsize; + clp->cl_session->fc_attrs.max_resp_sz = rsize; + } +#endif /* CONFIG_NFS_V4_1 */ +} + +/* + * Session has been established, and the client marked ready. + * Set the mount rsize and wsize with negotiated fore channel + * attributes which will be bound checked in nfs_server_set_fsinfo. + */ +static void nfs4_session_set_rwsize(struct nfs_server *server) +{ +#ifdef CONFIG_NFS_V4_1 + if (!nfs4_has_session(server->nfs_client)) + return; + server->rsize = server->nfs_client->cl_session->fc_attrs.max_resp_sz; + server->wsize = server->nfs_client->cl_session->fc_attrs.max_rqst_sz; +#endif /* CONFIG_NFS_V4_1 */ +} + +/* * Create a version 4 volume record */ static int nfs4_init_server(struct nfs_server *server, @@ -1164,7 +1299,8 @@ static int nfs4_init_server(struct nfs_server *server, data->client_address, data->auth_flavors[0], data->nfs_server.protocol, - &timeparms); + &timeparms, + data->minorversion); if (error < 0) goto error; @@ -1214,6 +1350,8 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, BUG_ON(!server->nfs_client->rpc_ops); BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + nfs4_init_session(server->nfs_client, server->wsize, server->rsize); + /* Probe the root fh to retrieve its FSID */ error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path); if (error < 0) @@ -1224,6 +1362,8 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, (unsigned long long) server->fsid.minor); dprintk("Mount FH: %d\n", mntfh->size); + nfs4_session_set_rwsize(server); + error = nfs_probe_fsinfo(server, mntfh, &fattr); if (error < 0) goto error; @@ -1282,7 +1422,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, parent_client->cl_ipaddr, data->authflavor, parent_server->client->cl_xprt->prot, - parent_server->client->cl_timeout); + parent_server->client->cl_timeout, + parent_client->cl_minorversion); if (error < 0) goto error; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 968225a8801..af05b918cb5 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -68,29 +68,26 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ { struct inode *inode = state->inode; struct file_lock *fl; - int status; + int status = 0; + + if (inode->i_flock == NULL) + goto out; + /* Protect inode->i_flock using the BKL */ + lock_kernel(); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; if (nfs_file_open_context(fl->fl_file) != ctx) continue; + unlock_kernel(); status = nfs4_lock_delegation_recall(state, fl); - if (status >= 0) - continue; - switch (status) { - default: - printk(KERN_ERR "%s: unhandled error %d.\n", - __func__, status); - case -NFS4ERR_EXPIRED: - /* kill_proc(fl->fl_pid, SIGLOST, 1); */ - case -NFS4ERR_STALE_CLIENTID: - nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client); - goto out_err; - } + if (status < 0) + goto out; + lock_kernel(); } - return 0; -out_err: + unlock_kernel(); +out: return status; } @@ -268,7 +265,10 @@ static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegat struct nfs_inode *nfsi = NFS_I(inode); nfs_msync_inode(inode); - /* Guard against new delegated open calls */ + /* + * Guard against new delegated open/lock/unlock calls and against + * state recovery + */ down_write(&nfsi->rwsem); nfs_delegation_claim_opens(inode, &delegation->stateid); up_write(&nfsi->rwsem); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 08f6b040d28..489fc01a320 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -259,6 +259,9 @@ static void nfs_direct_read_release(void *calldata) } static const struct rpc_call_ops nfs_read_direct_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_read_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_read_result, .rpc_release = nfs_direct_read_release, }; @@ -535,6 +538,9 @@ static void nfs_direct_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_direct_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_commit_result, .rpc_release = nfs_direct_commit_release, }; @@ -673,6 +679,9 @@ out_unlock: } static const struct rpc_call_ops nfs_write_direct_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_write_result, .rpc_release = nfs_direct_write_release, }; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index ec7e27d00bc..0055b813ec2 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -48,6 +48,9 @@ static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos, size_t count, unsigned int flags); static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, unsigned long nr_segs, loff_t pos); +static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, + struct file *filp, loff_t *ppos, + size_t count, unsigned int flags); static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, unsigned long nr_segs, loff_t pos); static int nfs_file_flush(struct file *, fl_owner_t id); @@ -73,6 +76,7 @@ const struct file_operations nfs_file_operations = { .lock = nfs_lock, .flock = nfs_flock, .splice_read = nfs_file_splice_read, + .splice_write = nfs_file_splice_write, .check_flags = nfs_check_flags, .setlease = nfs_setlease, }; @@ -587,12 +591,38 @@ out_swapfile: goto out; } +static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, + struct file *filp, loff_t *ppos, + size_t count, unsigned int flags) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + ssize_t ret; + + dprintk("NFS splice_write(%s/%s, %lu@%llu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (unsigned long long) *ppos); + + /* + * The combination of splice and an O_APPEND destination is disallowed. + */ + + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); + + ret = generic_file_splice_write(pipe, filp, ppos, count, flags); + if (ret >= 0 && nfs_need_sync_write(filp, inode)) { + int err = nfs_do_fsync(nfs_file_open_context(filp), inode); + if (err < 0) + ret = err; + } + return ret; +} + static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; int status = 0; - lock_kernel(); /* Try local locking first */ posix_test_lock(filp, fl); if (fl->fl_type != F_UNLCK) { @@ -608,7 +638,6 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) status = NFS_PROTO(inode)->lock(filp, cmd, fl); out: - unlock_kernel(); return status; out_noconflict: fl->fl_type = F_UNLCK; @@ -650,13 +679,11 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) * If we're signalled while cleaning up locks on process exit, we * still need to complete the unlock. */ - lock_kernel(); /* Use local locking if mounted with "-onolock" */ if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else status = do_vfs_lock(filp, fl); - unlock_kernel(); return status; } @@ -673,13 +700,11 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) if (status != 0) goto out; - lock_kernel(); /* Use local locking if mounted with "-onolock" */ if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else status = do_vfs_lock(filp, fl); - unlock_kernel(); if (status < 0) goto out; /* diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e4d6a8348ad..7dd90a6769d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -2,6 +2,7 @@ * NFS internal definitions */ +#include "nfs4_fs.h" #include <linux/mount.h> #include <linux/security.h> @@ -17,6 +18,18 @@ struct nfs_string; */ #define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) +/* + * Determine if sessions are in use. + */ +static inline int nfs4_has_session(const struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4_1 + if (clp->cl_session) + return 1; +#endif /* CONFIG_NFS_V4_1 */ + return 0; +} + struct nfs_clone_mount { const struct super_block *sb; const struct dentry *dentry; @@ -30,6 +43,12 @@ struct nfs_clone_mount { }; /* + * Note: RFC 1813 doesn't limit the number of auth flavors that + * a server can return, so make something up. + */ +#define NFS_MAX_SECFLAVORS (12) + +/* * In-kernel mount arguments */ struct nfs_parsed_mount_data { @@ -44,6 +63,7 @@ struct nfs_parsed_mount_data { unsigned int auth_flavor_len; rpc_authflavor_t auth_flavors[1]; char *client_address; + unsigned int minorversion; char *fscache_uniq; struct { @@ -77,6 +97,8 @@ struct nfs_mount_request { unsigned short protocol; struct nfs_fh *fh; int noresvport; + unsigned int *auth_flav_len; + rpc_authflavor_t *auth_flavs; }; extern int nfs_mount(struct nfs_mount_request *info); @@ -99,6 +121,8 @@ extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); +extern void nfs_mark_client_ready(struct nfs_client *clp, int state); +extern int nfs4_check_client_ready(struct nfs_client *clp); #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); @@ -146,6 +170,20 @@ extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int); extern struct rpc_procinfo nfs3_procedures[]; extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); +/* nfs4proc.c */ +static inline void nfs4_restart_rpc(struct rpc_task *task, + const struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4_1 + if (nfs4_has_session(clp) && + test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) { + rpc_restart_call_prepare(task); + return; + } +#endif /* CONFIG_NFS_V4_1 */ + rpc_restart_call(task); +} + /* nfs4xdr.c */ #ifdef CONFIG_NFS_V4 extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); @@ -205,6 +243,38 @@ extern int nfs4_path_walk(struct nfs_server *server, const char *path); #endif +/* read.c */ +extern void nfs_read_prepare(struct rpc_task *task, void *calldata); + +/* write.c */ +extern void nfs_write_prepare(struct rpc_task *task, void *calldata); + +/* nfs4proc.c */ +extern int _nfs4_call_sync(struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply); +extern int _nfs4_call_sync_session(struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply); + +#ifdef CONFIG_NFS_V4_1 +extern void nfs41_sequence_free_slot(const struct nfs_client *, + struct nfs4_sequence_res *res); +#endif /* CONFIG_NFS_V4_1 */ + +static inline void nfs4_sequence_free_slot(const struct nfs_client *clp, + struct nfs4_sequence_res *res) +{ +#ifdef CONFIG_NFS_V4_1 + if (nfs4_has_session(clp)) + nfs41_sequence_free_slot(clp, res); +#endif /* CONFIG_NFS_V4_1 */ +} + /* * Determine the device name as a string */ diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index ca905a5bb1b..38ef9eaec40 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -20,8 +20,116 @@ # define NFSDBG_FACILITY NFSDBG_MOUNT #endif +/* + * Defined by RFC 1094, section A.3; and RFC 1813, section 5.1.4 + */ +#define MNTPATHLEN (1024) + +/* + * XDR data type sizes + */ +#define encode_dirpath_sz (1 + XDR_QUADLEN(MNTPATHLEN)) +#define MNT_status_sz (1) +#define MNT_fhs_status_sz (1) +#define MNT_fhandle_sz XDR_QUADLEN(NFS2_FHSIZE) +#define MNT_fhandle3_sz (1 + XDR_QUADLEN(NFS3_FHSIZE)) +#define MNT_authflav3_sz (1 + NFS_MAX_SECFLAVORS) + +/* + * XDR argument and result sizes + */ +#define MNT_enc_dirpath_sz encode_dirpath_sz +#define MNT_dec_mountres_sz (MNT_status_sz + MNT_fhandle_sz) +#define MNT_dec_mountres3_sz (MNT_status_sz + MNT_fhandle_sz + \ + MNT_authflav3_sz) + +/* + * Defined by RFC 1094, section A.5 + */ +enum { + MOUNTPROC_NULL = 0, + MOUNTPROC_MNT = 1, + MOUNTPROC_DUMP = 2, + MOUNTPROC_UMNT = 3, + MOUNTPROC_UMNTALL = 4, + MOUNTPROC_EXPORT = 5, +}; + +/* + * Defined by RFC 1813, section 5.2 + */ +enum { + MOUNTPROC3_NULL = 0, + MOUNTPROC3_MNT = 1, + MOUNTPROC3_DUMP = 2, + MOUNTPROC3_UMNT = 3, + MOUNTPROC3_UMNTALL = 4, + MOUNTPROC3_EXPORT = 5, +}; + static struct rpc_program mnt_program; +/* + * Defined by OpenGroup XNFS Version 3W, chapter 8 + */ +enum mountstat { + MNT_OK = 0, + MNT_EPERM = 1, + MNT_ENOENT = 2, + MNT_EACCES = 13, + MNT_EINVAL = 22, +}; + +static struct { + u32 status; + int errno; +} mnt_errtbl[] = { + { .status = MNT_OK, .errno = 0, }, + { .status = MNT_EPERM, .errno = -EPERM, }, + { .status = MNT_ENOENT, .errno = -ENOENT, }, + { .status = MNT_EACCES, .errno = -EACCES, }, + { .status = MNT_EINVAL, .errno = -EINVAL, }, +}; + +/* + * Defined by RFC 1813, section 5.1.5 + */ +enum mountstat3 { + MNT3_OK = 0, /* no error */ + MNT3ERR_PERM = 1, /* Not owner */ + MNT3ERR_NOENT = 2, /* No such file or directory */ + MNT3ERR_IO = 5, /* I/O error */ + MNT3ERR_ACCES = 13, /* Permission denied */ + MNT3ERR_NOTDIR = 20, /* Not a directory */ + MNT3ERR_INVAL = 22, /* Invalid argument */ + MNT3ERR_NAMETOOLONG = 63, /* Filename too long */ + MNT3ERR_NOTSUPP = 10004, /* Operation not supported */ + MNT3ERR_SERVERFAULT = 10006, /* A failure on the server */ +}; + +static struct { + u32 status; + int errno; +} mnt3_errtbl[] = { + { .status = MNT3_OK, .errno = 0, }, + { .status = MNT3ERR_PERM, .errno = -EPERM, }, + { .status = MNT3ERR_NOENT, .errno = -ENOENT, }, + { .status = MNT3ERR_IO, .errno = -EIO, }, + { .status = MNT3ERR_ACCES, .errno = -EACCES, }, + { .status = MNT3ERR_NOTDIR, .errno = -ENOTDIR, }, + { .status = MNT3ERR_INVAL, .errno = -EINVAL, }, + { .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, }, + { .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, }, + { .status = MNT3ERR_SERVERFAULT, .errno = -ESERVERFAULT, }, +}; + +struct mountres { + int errno; + struct nfs_fh *fh; + unsigned int *auth_count; + rpc_authflavor_t *auth_flavors; +}; + struct mnt_fhstatus { u32 status; struct nfs_fh *fh; @@ -35,8 +143,10 @@ struct mnt_fhstatus { */ int nfs_mount(struct nfs_mount_request *info) { - struct mnt_fhstatus result = { - .fh = info->fh + struct mountres result = { + .fh = info->fh, + .auth_count = info->auth_flav_len, + .auth_flavors = info->auth_flavs, }; struct rpc_message msg = { .rpc_argp = info->dirpath, @@ -68,14 +178,14 @@ int nfs_mount(struct nfs_mount_request *info) if (info->version == NFS_MNT3_VERSION) msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; else - msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; + msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT]; status = rpc_call_sync(mnt_clnt, &msg, 0); rpc_shutdown_client(mnt_clnt); if (status < 0) goto out_call_err; - if (result.status != 0) + if (result.errno != 0) goto out_mnt_err; dprintk("NFS: MNT request succeeded\n"); @@ -86,72 +196,215 @@ out: out_clnt_err: status = PTR_ERR(mnt_clnt); - dprintk("NFS: failed to create RPC client, status=%d\n", status); + dprintk("NFS: failed to create MNT RPC client, status=%d\n", status); goto out; out_call_err: - dprintk("NFS: failed to start MNT request, status=%d\n", status); + dprintk("NFS: MNT request failed, status=%d\n", status); goto out; out_mnt_err: - dprintk("NFS: MNT server returned result %d\n", result.status); - status = nfs_stat_to_errno(result.status); + dprintk("NFS: MNT server returned result %d\n", result.errno); + status = result.errno; goto out; } /* * XDR encode/decode functions for MOUNT */ -static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, - const char *path) + +static int encode_mntdirpath(struct xdr_stream *xdr, const char *pathname) +{ + const u32 pathname_len = strlen(pathname); + __be32 *p; + + if (unlikely(pathname_len > MNTPATHLEN)) + return -EIO; + + p = xdr_reserve_space(xdr, sizeof(u32) + pathname_len); + if (unlikely(p == NULL)) + return -EIO; + xdr_encode_opaque(p, pathname, pathname_len); + + return 0; +} + +static int mnt_enc_dirpath(struct rpc_rqst *req, __be32 *p, + const char *dirpath) +{ + struct xdr_stream xdr; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + return encode_mntdirpath(&xdr, dirpath); +} + +/* + * RFC 1094: "A non-zero status indicates some sort of error. In this + * case, the status is a UNIX error number." This can be problematic + * if the server and client use different errno values for the same + * error. + * + * However, the OpenGroup XNFS spec provides a simple mapping that is + * independent of local errno values on the server and the client. + */ +static int decode_status(struct xdr_stream *xdr, struct mountres *res) { - p = xdr_encode_string(p, path); + unsigned int i; + u32 status; + __be32 *p; + + p = xdr_inline_decode(xdr, sizeof(status)); + if (unlikely(p == NULL)) + return -EIO; + status = ntohl(*p); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + for (i = 0; i <= ARRAY_SIZE(mnt_errtbl); i++) { + if (mnt_errtbl[i].status == status) { + res->errno = mnt_errtbl[i].errno; + return 0; + } + } + + dprintk("NFS: unrecognized MNT status code: %u\n", status); + res->errno = -EACCES; return 0; } -static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, - struct mnt_fhstatus *res) +static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res) { struct nfs_fh *fh = res->fh; + __be32 *p; + + p = xdr_inline_decode(xdr, NFS2_FHSIZE); + if (unlikely(p == NULL)) + return -EIO; + + fh->size = NFS2_FHSIZE; + memcpy(fh->data, p, NFS2_FHSIZE); + return 0; +} + +static int mnt_dec_mountres(struct rpc_rqst *req, __be32 *p, + struct mountres *res) +{ + struct xdr_stream xdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + + status = decode_status(&xdr, res); + if (unlikely(status != 0 || res->errno != 0)) + return status; + return decode_fhandle(&xdr, res); +} + +static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res) +{ + unsigned int i; + u32 status; + __be32 *p; - if ((res->status = ntohl(*p++)) == 0) { - fh->size = NFS2_FHSIZE; - memcpy(fh->data, p, NFS2_FHSIZE); + p = xdr_inline_decode(xdr, sizeof(status)); + if (unlikely(p == NULL)) + return -EIO; + status = ntohl(*p); + + for (i = 0; i <= ARRAY_SIZE(mnt3_errtbl); i++) { + if (mnt3_errtbl[i].status == status) { + res->errno = mnt3_errtbl[i].errno; + return 0; + } } + + dprintk("NFS: unrecognized MNT3 status code: %u\n", status); + res->errno = -EACCES; return 0; } -static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, - struct mnt_fhstatus *res) +static int decode_fhandle3(struct xdr_stream *xdr, struct mountres *res) { struct nfs_fh *fh = res->fh; - unsigned size; - - if ((res->status = ntohl(*p++)) == 0) { - size = ntohl(*p++); - if (size <= NFS3_FHSIZE && size != 0) { - fh->size = size; - memcpy(fh->data, p, size); - } else - res->status = -EBADHANDLE; + u32 size; + __be32 *p; + + p = xdr_inline_decode(xdr, sizeof(size)); + if (unlikely(p == NULL)) + return -EIO; + + size = ntohl(*p++); + if (size > NFS3_FHSIZE || size == 0) + return -EIO; + + p = xdr_inline_decode(xdr, size); + if (unlikely(p == NULL)) + return -EIO; + + fh->size = size; + memcpy(fh->data, p, size); + return 0; +} + +static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res) +{ + rpc_authflavor_t *flavors = res->auth_flavors; + unsigned int *count = res->auth_count; + u32 entries, i; + __be32 *p; + + if (*count == 0) + return 0; + + p = xdr_inline_decode(xdr, sizeof(entries)); + if (unlikely(p == NULL)) + return -EIO; + entries = ntohl(*p); + dprintk("NFS: received %u auth flavors\n", entries); + if (entries > NFS_MAX_SECFLAVORS) + entries = NFS_MAX_SECFLAVORS; + + p = xdr_inline_decode(xdr, sizeof(u32) * entries); + if (unlikely(p == NULL)) + return -EIO; + + if (entries > *count) + entries = *count; + + for (i = 0; i < entries; i++) { + flavors[i] = ntohl(*p++); + dprintk("NFS:\tflavor %u: %d\n", i, flavors[i]); } + *count = i; + return 0; } -#define MNT_dirpath_sz (1 + 256) -#define MNT_fhstatus_sz (1 + 8) -#define MNT_fhstatus3_sz (1 + 16) +static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p, + struct mountres *res) +{ + struct xdr_stream xdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + + status = decode_fhs_status(&xdr, res); + if (unlikely(status != 0 || res->errno != 0)) + return status; + status = decode_fhandle3(&xdr, res); + if (unlikely(status != 0)) { + res->errno = -EBADHANDLE; + return 0; + } + return decode_auth_flavors(&xdr, res); +} static struct rpc_procinfo mnt_procedures[] = { - [MNTPROC_MNT] = { - .p_proc = MNTPROC_MNT, - .p_encode = (kxdrproc_t) xdr_encode_dirpath, - .p_decode = (kxdrproc_t) xdr_decode_fhstatus, - .p_arglen = MNT_dirpath_sz, - .p_replen = MNT_fhstatus_sz, - .p_statidx = MNTPROC_MNT, + [MOUNTPROC_MNT] = { + .p_proc = MOUNTPROC_MNT, + .p_encode = (kxdrproc_t)mnt_enc_dirpath, + .p_decode = (kxdrproc_t)mnt_dec_mountres, + .p_arglen = MNT_enc_dirpath_sz, + .p_replen = MNT_dec_mountres_sz, + .p_statidx = MOUNTPROC_MNT, .p_name = "MOUNT", }, }; @@ -159,10 +412,10 @@ static struct rpc_procinfo mnt_procedures[] = { static struct rpc_procinfo mnt3_procedures[] = { [MOUNTPROC3_MNT] = { .p_proc = MOUNTPROC3_MNT, - .p_encode = (kxdrproc_t) xdr_encode_dirpath, - .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, - .p_arglen = MNT_dirpath_sz, - .p_replen = MNT_fhstatus3_sz, + .p_encode = (kxdrproc_t)mnt_enc_dirpath, + .p_decode = (kxdrproc_t)mnt_dec_mountres3, + .p_arglen = MNT_enc_dirpath_sz, + .p_replen = MNT_dec_mountres3_sz, .p_statidx = MOUNTPROC3_MNT, .p_name = "MOUNT", }, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index f01caec8446..40c76678289 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -65,6 +65,11 @@ char *nfs_path(const char *base, dentry = dentry->d_parent; } spin_unlock(&dcache_lock); + if (*end != '/') { + if (--buflen < 0) + goto Elong; + *--end = '/'; + } namelen = strlen(base); /* Strip off excess slashes in base string */ while (namelen > 0 && base[namelen - 1] == '/') diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 6bbf0e6daad..bac60515a4b 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -207,8 +207,6 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) status = nfs_revalidate_inode(server, inode); if (status < 0) return ERR_PTR(status); - if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) - nfs_zap_acl_cache(inode); acl = nfs3_get_cached_acl(inode, type); if (acl != ERR_PTR(-EAGAIN)) return acl; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 84345deab26..61bc3a32e1e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -44,6 +44,7 @@ enum nfs4_client_state { NFS4CLNT_RECLAIM_REBOOT, NFS4CLNT_RECLAIM_NOGRACE, NFS4CLNT_DELEGRETURN, + NFS4CLNT_SESSION_SETUP, }; /* @@ -177,6 +178,14 @@ struct nfs4_state_recovery_ops { int state_flag_bit; int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); int (*recover_lock)(struct nfs4_state *, struct file_lock *); + int (*establish_clid)(struct nfs_client *, struct rpc_cred *); + struct rpc_cred * (*get_clid_cred)(struct nfs_client *); +}; + +struct nfs4_state_maintenance_ops { + int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *); + struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); + int (*renew_lease)(struct nfs_client *, struct rpc_cred *); }; extern const struct dentry_operations nfs4_dentry_operations; @@ -193,6 +202,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); +extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); @@ -200,8 +210,26 @@ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fh extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, struct nfs4_fs_locations *fs_locations, struct page *page); -extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; -extern struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops; +extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[]; +extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[]; +#if defined(CONFIG_NFS_V4_1) +extern int nfs4_setup_sequence(struct nfs_client *clp, + struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, + int cache_reply, struct rpc_task *task); +extern void nfs4_destroy_session(struct nfs4_session *session); +extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); +extern int nfs4_proc_create_session(struct nfs_client *, int reset); +extern int nfs4_proc_destroy_session(struct nfs4_session *); +#else /* CONFIG_NFS_v4_1 */ +static inline int nfs4_setup_sequence(struct nfs_client *clp, + struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, + int cache_reply, struct rpc_task *task) +{ + return 0; +} +#endif /* CONFIG_NFS_V4_1 */ + +extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[]; extern const u32 nfs4_fattr_bitmap[2]; extern const u32 nfs4_statfs_bitmap[2]; @@ -216,7 +244,12 @@ extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(struct work_struct *); /* nfs4state.c */ +struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp); struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); +#if defined(CONFIG_NFS_V4_1) +struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); +struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); +#endif /* CONFIG_NFS_V4_1 */ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4674f8092da..92ce4351781 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -48,11 +48,14 @@ #include <linux/smp_lock.h> #include <linux/namei.h> #include <linux/mount.h> +#include <linux/module.h> +#include <linux/sunrpc/bc_xprt.h> #include "nfs4_fs.h" #include "delegation.h" #include "internal.h" #include "iostat.h" +#include "callback.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -247,7 +250,25 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, ret = nfs4_wait_clnt_recover(clp); if (ret == 0) exception->retry = 1; +#if !defined(CONFIG_NFS_V4_1) break; +#else /* !defined(CONFIG_NFS_V4_1) */ + if (!nfs4_has_session(server->nfs_client)) + break; + /* FALLTHROUGH */ + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + dprintk("%s ERROR: %d Reset session\n", __func__, + errorcode); + set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); + exception->retry = 1; + /* FALLTHROUGH */ +#endif /* !defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: @@ -271,6 +292,353 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp spin_unlock(&clp->cl_lock); } +#if defined(CONFIG_NFS_V4_1) + +/* + * nfs4_free_slot - free a slot and efficiently update slot table. + * + * freeing a slot is trivially done by clearing its respective bit + * in the bitmap. + * If the freed slotid equals highest_used_slotid we want to update it + * so that the server would be able to size down the slot table if needed, + * otherwise we know that the highest_used_slotid is still in use. + * When updating highest_used_slotid there may be "holes" in the bitmap + * so we need to scan down from highest_used_slotid to 0 looking for the now + * highest slotid in use. + * If none found, highest_used_slotid is set to -1. + */ +static void +nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) +{ + int slotid = free_slotid; + + spin_lock(&tbl->slot_tbl_lock); + /* clear used bit in bitmap */ + __clear_bit(slotid, tbl->used_slots); + + /* update highest_used_slotid when it is freed */ + if (slotid == tbl->highest_used_slotid) { + slotid = find_last_bit(tbl->used_slots, tbl->max_slots); + if (slotid >= 0 && slotid < tbl->max_slots) + tbl->highest_used_slotid = slotid; + else + tbl->highest_used_slotid = -1; + } + rpc_wake_up_next(&tbl->slot_tbl_waitq); + spin_unlock(&tbl->slot_tbl_lock); + dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__, + free_slotid, tbl->highest_used_slotid); +} + +void nfs41_sequence_free_slot(const struct nfs_client *clp, + struct nfs4_sequence_res *res) +{ + struct nfs4_slot_table *tbl; + + if (!nfs4_has_session(clp)) { + dprintk("%s: No session\n", __func__); + return; + } + tbl = &clp->cl_session->fc_slot_table; + if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { + dprintk("%s: No slot\n", __func__); + /* just wake up the next guy waiting since + * we may have not consumed a slot after all */ + rpc_wake_up_next(&tbl->slot_tbl_waitq); + return; + } + nfs4_free_slot(tbl, res->sr_slotid); + res->sr_slotid = NFS4_MAX_SLOT_TABLE; +} + +static void nfs41_sequence_done(struct nfs_client *clp, + struct nfs4_sequence_res *res, + int rpc_status) +{ + unsigned long timestamp; + struct nfs4_slot_table *tbl; + struct nfs4_slot *slot; + + /* + * sr_status remains 1 if an RPC level error occurred. The server + * may or may not have processed the sequence operation.. + * Proceed as if the server received and processed the sequence + * operation. + */ + if (res->sr_status == 1) + res->sr_status = NFS_OK; + + /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ + if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) + goto out; + + tbl = &clp->cl_session->fc_slot_table; + slot = tbl->slots + res->sr_slotid; + + if (res->sr_status == 0) { + /* Update the slot's sequence and clientid lease timer */ + ++slot->seq_nr; + timestamp = res->sr_renewal_time; + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal, timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); + return; + } +out: + /* The session may be reset by one of the error handlers. */ + dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); + nfs41_sequence_free_slot(clp, res); +} + +/* + * nfs4_find_slot - efficiently look for a free slot + * + * nfs4_find_slot looks for an unset bit in the used_slots bitmap. + * If found, we mark the slot as used, update the highest_used_slotid, + * and respectively set up the sequence operation args. + * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise. + * + * Note: must be called with under the slot_tbl_lock. + */ +static u8 +nfs4_find_slot(struct nfs4_slot_table *tbl, struct rpc_task *task) +{ + int slotid; + u8 ret_id = NFS4_MAX_SLOT_TABLE; + BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE); + + dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n", + __func__, tbl->used_slots[0], tbl->highest_used_slotid, + tbl->max_slots); + slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots); + if (slotid >= tbl->max_slots) + goto out; + __set_bit(slotid, tbl->used_slots); + if (slotid > tbl->highest_used_slotid) + tbl->highest_used_slotid = slotid; + ret_id = slotid; +out: + dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n", + __func__, tbl->used_slots[0], tbl->highest_used_slotid, ret_id); + return ret_id; +} + +static int nfs4_recover_session(struct nfs4_session *session) +{ + struct nfs_client *clp = session->clp; + int ret; + + for (;;) { + ret = nfs4_wait_clnt_recover(clp); + if (ret != 0) + return ret; + if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) + break; + nfs4_schedule_state_manager(clp); + } + return 0; +} + +static int nfs41_setup_sequence(struct nfs4_session *session, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply, + struct rpc_task *task) +{ + struct nfs4_slot *slot; + struct nfs4_slot_table *tbl; + int status = 0; + u8 slotid; + + dprintk("--> %s\n", __func__); + /* slot already allocated? */ + if (res->sr_slotid != NFS4_MAX_SLOT_TABLE) + return 0; + + memset(res, 0, sizeof(*res)); + res->sr_slotid = NFS4_MAX_SLOT_TABLE; + tbl = &session->fc_slot_table; + + spin_lock(&tbl->slot_tbl_lock); + if (test_bit(NFS4CLNT_SESSION_SETUP, &session->clp->cl_state)) { + if (tbl->highest_used_slotid != -1) { + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + spin_unlock(&tbl->slot_tbl_lock); + dprintk("<-- %s: Session reset: draining\n", __func__); + return -EAGAIN; + } + + /* The slot table is empty; start the reset thread */ + dprintk("%s Session Reset\n", __func__); + spin_unlock(&tbl->slot_tbl_lock); + status = nfs4_recover_session(session); + if (status) + return status; + spin_lock(&tbl->slot_tbl_lock); + } + + slotid = nfs4_find_slot(tbl, task); + if (slotid == NFS4_MAX_SLOT_TABLE) { + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + spin_unlock(&tbl->slot_tbl_lock); + dprintk("<-- %s: no free slots\n", __func__); + return -EAGAIN; + } + spin_unlock(&tbl->slot_tbl_lock); + + slot = tbl->slots + slotid; + args->sa_session = session; + args->sa_slotid = slotid; + args->sa_cache_this = cache_reply; + + dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); + + res->sr_session = session; + res->sr_slotid = slotid; + res->sr_renewal_time = jiffies; + /* + * sr_status is only set in decode_sequence, and so will remain + * set to 1 if an rpc level failure occurs. + */ + res->sr_status = 1; + return 0; +} + +int nfs4_setup_sequence(struct nfs_client *clp, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply, + struct rpc_task *task) +{ + int ret = 0; + + dprintk("--> %s clp %p session %p sr_slotid %d\n", + __func__, clp, clp->cl_session, res->sr_slotid); + + if (!nfs4_has_session(clp)) + goto out; + ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply, + task); + if (ret != -EAGAIN) { + /* terminate rpc task */ + task->tk_status = ret; + task->tk_action = NULL; + } +out: + dprintk("<-- %s status=%d\n", __func__, ret); + return ret; +} + +struct nfs41_call_sync_data { + struct nfs_client *clp; + struct nfs4_sequence_args *seq_args; + struct nfs4_sequence_res *seq_res; + int cache_reply; +}; + +static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs41_call_sync_data *data = calldata; + + dprintk("--> %s data->clp->cl_session %p\n", __func__, + data->clp->cl_session); + if (nfs4_setup_sequence(data->clp, data->seq_args, + data->seq_res, data->cache_reply, task)) + return; + rpc_call_start(task); +} + +static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) +{ + struct nfs41_call_sync_data *data = calldata; + + nfs41_sequence_done(data->clp, data->seq_res, task->tk_status); + nfs41_sequence_free_slot(data->clp, data->seq_res); +} + +struct rpc_call_ops nfs41_call_sync_ops = { + .rpc_call_prepare = nfs41_call_sync_prepare, + .rpc_call_done = nfs41_call_sync_done, +}; + +static int nfs4_call_sync_sequence(struct nfs_client *clp, + struct rpc_clnt *clnt, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply) +{ + int ret; + struct rpc_task *task; + struct nfs41_call_sync_data data = { + .clp = clp, + .seq_args = args, + .seq_res = res, + .cache_reply = cache_reply, + }; + struct rpc_task_setup task_setup = { + .rpc_client = clnt, + .rpc_message = msg, + .callback_ops = &nfs41_call_sync_ops, + .callback_data = &data + }; + + res->sr_slotid = NFS4_MAX_SLOT_TABLE; + task = rpc_run_task(&task_setup); + if (IS_ERR(task)) + ret = PTR_ERR(task); + else { + ret = task->tk_status; + rpc_put_task(task); + } + return ret; +} + +int _nfs4_call_sync_session(struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply) +{ + return nfs4_call_sync_sequence(server->nfs_client, server->client, + msg, args, res, cache_reply); +} + +#endif /* CONFIG_NFS_V4_1 */ + +int _nfs4_call_sync(struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply) +{ + args->sa_session = res->sr_session = NULL; + return rpc_call_sync(server->client, msg, 0); +} + +#define nfs4_call_sync(server, msg, args, res, cache_reply) \ + (server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \ + &(res)->seq_res, (cache_reply)) + +static void nfs4_sequence_done(const struct nfs_server *server, + struct nfs4_sequence_res *res, int rpc_status) +{ +#ifdef CONFIG_NFS_V4_1 + if (nfs4_has_session(server->nfs_client)) + nfs41_sequence_done(server->nfs_client, res, rpc_status); +#endif /* CONFIG_NFS_V4_1 */ +} + +/* no restart, therefore free slot here */ +static void nfs4_sequence_done_free_slot(const struct nfs_server *server, + struct nfs4_sequence_res *res, + int rpc_status) +{ + nfs4_sequence_done(server, res, rpc_status); + nfs4_sequence_free_slot(server->nfs_client, res); +} + static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) { struct nfs_inode *nfsi = NFS_I(dir); @@ -312,6 +680,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) p->o_res.server = p->o_arg.server; nfs_fattr_init(&p->f_attr); nfs_fattr_init(&p->dir_attr); + p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; } static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, @@ -804,16 +1173,30 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state err = _nfs4_open_delegation_recall(ctx, state, stateid); switch (err) { case 0: - return err; + case -ENOENT: + case -ESTALE: + goto out; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: /* Don't recall a delegation if it was lost */ nfs4_schedule_state_recovery(server->nfs_client); - return err; + goto out; + case -ERESTARTSYS: + /* + * The show must go on: exit, but mark the + * stateid as needing recovery. + */ + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + nfs4_state_mark_reclaim_nograce(server->nfs_client, state); + case -ENOMEM: + err = 0; + goto out; } err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); +out: return err; } @@ -929,6 +1312,10 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); } data->timestamp = jiffies; + if (nfs4_setup_sequence(data->o_arg.server->nfs_client, + &data->o_arg.seq_args, + &data->o_res.seq_res, 1, task)) + return; rpc_call_start(task); return; out_no_action: @@ -941,6 +1328,10 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) struct nfs4_opendata *data = calldata; data->rpc_status = task->tk_status; + + nfs4_sequence_done_free_slot(data->o_arg.server, &data->o_res.seq_res, + task->tk_status); + if (RPC_ASSASSINATED(task)) return; if (task->tk_status == 0) { @@ -1269,7 +1660,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, } else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &arg, &res, 1); if (status == 0 && state != NULL) renew_lease(server, timestamp); return status; @@ -1318,6 +1709,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) struct nfs4_state *state = calldata->state; struct nfs_server *server = NFS_SERVER(calldata->inode); + nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status); if (RPC_ASSASSINATED(task)) return; /* hmm. we are done with the inode, and in the process of freeing @@ -1336,10 +1728,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data) break; default: if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { - rpc_restart_call(task); + nfs4_restart_rpc(task, server->nfs_client); return; } } + nfs4_sequence_free_slot(server->nfs_client, &calldata->res.seq_res); nfs_refresh_inode(calldata->inode, calldata->res.fattr); } @@ -1380,6 +1773,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) calldata->arg.fmode = FMODE_WRITE; } calldata->timestamp = jiffies; + if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client, + &calldata->arg.seq_args, &calldata->res.seq_res, + 1, task)) + return; rpc_call_start(task); } @@ -1419,13 +1816,15 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) }; int status = -ENOMEM; - calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); + calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); if (calldata == NULL) goto out; calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); calldata->arg.stateid = &state->open_stateid; + if (nfs4_has_session(server->nfs_client)) + memset(calldata->arg.stateid->data, 0, 4); /* clear seqid */ /* Serialization for the sequence id */ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); if (calldata->arg.seqid == NULL) @@ -1435,6 +1834,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; + calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; calldata->path.mnt = mntget(path->mnt); calldata->path.dentry = dget(path->dentry); @@ -1584,15 +1984,18 @@ void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { + struct nfs4_server_caps_arg args = { + .fhandle = fhandle, + }; struct nfs4_server_caps_res res = {}; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SERVER_CAPS], - .rpc_argp = fhandle, + .rpc_argp = &args, .rpc_resp = &res, }; int status; - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &args, &res, 0); if (status == 0) { memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) @@ -1606,6 +2009,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; server->acl_bitmask = res.acl_bitmask; } + return status; } @@ -1637,8 +2041,15 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; + int status; + nfs_fattr_init(info->fattr); - return rpc_call_sync(server->client, &msg, 0); + status = nfs4_recover_expired_lease(server); + if (!status) + status = nfs4_check_client_ready(server->nfs_client); + if (!status) + status = nfs4_call_sync(server, &msg, &args, &res, 0); + return status; } static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, @@ -1728,7 +2139,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, }; nfs_fattr_init(fattr); - return rpc_call_sync(server->client, &msg, 0); + return nfs4_call_sync(server, &msg, &args, &res, 0); } static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) @@ -1812,7 +2223,7 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *d nfs_fattr_init(fattr); dprintk("NFS call lookupfh %s\n", name->name); - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &args, &res, 0); dprintk("NFS reply lookupfh: %d\n", status); return status; } @@ -1898,7 +2309,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry args.access |= NFS4_ACCESS_EXECUTE; } nfs_fattr_init(&fattr); - status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + status = nfs4_call_sync(server, &msg, &args, &res, 0); if (!status) { entry->mask = 0; if (res.access & NFS4_ACCESS_READ) @@ -1957,13 +2368,14 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page, .pglen = pglen, .pages = &page, }; + struct nfs4_readlink_res res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK], .rpc_argp = &args, - .rpc_resp = NULL, + .rpc_resp = &res, }; - return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + return nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); } static int nfs4_proc_readlink(struct inode *inode, struct page *page, @@ -2057,7 +2469,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) int status; nfs_fattr_init(&res.dir_attr); - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &args, &res, 1); if (status == 0) { update_changeattr(dir, &res.cinfo); nfs_post_op_update_inode(dir, &res.dir_attr); @@ -2092,8 +2504,10 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) { struct nfs_removeres *res = task->tk_msg.rpc_resp; + nfs4_sequence_done(res->server, &res->seq_res, task->tk_status); if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) return 0; + nfs4_sequence_free_slot(res->server->nfs_client, &res->seq_res); update_changeattr(dir, &res->cinfo); nfs_post_op_update_inode(dir, &res->dir_attr); return 1; @@ -2125,7 +2539,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, nfs_fattr_init(res.old_fattr); nfs_fattr_init(res.new_fattr); - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &arg, &res, 1); if (!status) { update_changeattr(old_dir, &res.old_cinfo); @@ -2174,7 +2588,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * nfs_fattr_init(res.fattr); nfs_fattr_init(res.dir_attr); - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &arg, &res, 1); if (!status) { update_changeattr(dir, &res.cinfo); nfs_post_op_update_inode(dir, res.dir_attr); @@ -2235,7 +2649,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) { - int status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); + int status = nfs4_call_sync(NFS_SERVER(dir), &data->msg, + &data->arg, &data->res, 1); if (status == 0) { update_changeattr(dir, &data->res.dir_cinfo); nfs_post_op_update_inode(dir, data->res.dir_fattr); @@ -2344,7 +2759,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, (unsigned long long)cookie); nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); res.pgbase = args.pgbase; - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); if (status == 0) memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); @@ -2422,14 +2837,17 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, .fh = fhandle, .bitmask = server->attr_bitmask, }; + struct nfs4_statfs_res res = { + .fsstat = fsstat, + }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS], .rpc_argp = &args, - .rpc_resp = fsstat, + .rpc_resp = &res, }; nfs_fattr_init(fsstat->fattr); - return rpc_call_sync(server->client, &msg, 0); + return nfs4_call_sync(server, &msg, &args, &res, 0); } static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) @@ -2451,13 +2869,16 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, .fh = fhandle, .bitmask = server->attr_bitmask, }; + struct nfs4_fsinfo_res res = { + .fsinfo = fsinfo, + }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO], .rpc_argp = &args, - .rpc_resp = fsinfo, + .rpc_resp = &res, }; - return rpc_call_sync(server->client, &msg, 0); + return nfs4_call_sync(server, &msg, &args, &res, 0); } static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) @@ -2486,10 +2907,13 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle .fh = fhandle, .bitmask = server->attr_bitmask, }; + struct nfs4_pathconf_res res = { + .pathconf = pathconf, + }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF], .rpc_argp = &args, - .rpc_resp = pathconf, + .rpc_resp = &res, }; /* None of the pathconf attributes are mandatory to implement */ @@ -2499,7 +2923,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle } nfs_fattr_init(pathconf->fattr); - return rpc_call_sync(server->client, &msg, 0); + return nfs4_call_sync(server, &msg, &args, &res, 0); } static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, @@ -2520,8 +2944,13 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) { struct nfs_server *server = NFS_SERVER(data->inode); + dprintk("--> %s\n", __func__); + + /* nfs4_sequence_free_slot called in the read rpc_call_done */ + nfs4_sequence_done(server, &data->res.seq_res, task->tk_status); + if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { - rpc_restart_call(task); + nfs4_restart_rpc(task, server->nfs_client); return -EAGAIN; } @@ -2541,8 +2970,12 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->inode; + /* slot is freed in nfs_writeback_done */ + nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, + task->tk_status); + if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { - rpc_restart_call(task); + nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client); return -EAGAIN; } if (task->tk_status >= 0) { @@ -2567,10 +3000,14 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->inode; + nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, + task->tk_status); if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { - rpc_restart_call(task); + nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client); return -EAGAIN; } + nfs4_sequence_free_slot(NFS_SERVER(inode)->nfs_client, + &data->res.seq_res); nfs_refresh_inode(inode, data->res.fattr); return 0; } @@ -2603,6 +3040,9 @@ static void nfs4_renew_done(struct rpc_task *task, void *data) if (time_before(clp->cl_last_renewal,timestamp)) clp->cl_last_renewal = timestamp; spin_unlock(&clp->cl_lock); + dprintk("%s calling put_rpccred on rpc_cred %p\n", __func__, + task->tk_msg.rpc_cred); + put_rpccred(task->tk_msg.rpc_cred); } static const struct rpc_call_ops nfs4_renew_ops = { @@ -2742,12 +3182,14 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .acl_pages = pages, .acl_len = buflen, }; - size_t resp_len = buflen; + struct nfs_getaclres res = { + .acl_len = buflen, + }; void *resp_buf; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], .rpc_argp = &args, - .rpc_resp = &resp_len, + .rpc_resp = &res, }; struct page *localpage = NULL; int ret; @@ -2761,26 +3203,26 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu return -ENOMEM; args.acl_pages[0] = localpage; args.acl_pgbase = 0; - resp_len = args.acl_len = PAGE_SIZE; + args.acl_len = PAGE_SIZE; } else { resp_buf = buf; buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); } - ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + ret = nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); if (ret) goto out_free; - if (resp_len > args.acl_len) - nfs4_write_cached_acl(inode, NULL, resp_len); + if (res.acl_len > args.acl_len) + nfs4_write_cached_acl(inode, NULL, res.acl_len); else - nfs4_write_cached_acl(inode, resp_buf, resp_len); + nfs4_write_cached_acl(inode, resp_buf, res.acl_len); if (buf) { ret = -ERANGE; - if (resp_len > buflen) + if (res.acl_len > buflen) goto out_free; if (localpage) - memcpy(buf, resp_buf, resp_len); + memcpy(buf, resp_buf, res.acl_len); } - ret = resp_len; + ret = res.acl_len; out_free: if (localpage) __free_page(localpage); @@ -2810,8 +3252,6 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) ret = nfs_revalidate_inode(server, inode); if (ret < 0) return ret; - if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) - nfs_zap_acl_cache(inode); ret = nfs4_read_cached_acl(inode, buf, buflen); if (ret != -ENOENT) return ret; @@ -2827,10 +3267,11 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl .acl_pages = pages, .acl_len = buflen, }; + struct nfs_setaclres res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], .rpc_argp = &arg, - .rpc_resp = NULL, + .rpc_resp = &res, }; int ret; @@ -2838,7 +3279,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl return -EOPNOTSUPP; nfs_inode_return_delegation(inode); buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); - ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + ret = nfs4_call_sync(server, &msg, &arg, &res, 1); nfs_access_zap_cache(inode); nfs_zap_acl_cache(inode); return ret; @@ -2857,10 +3298,8 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen } static int -nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) +_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state) { - struct nfs_client *clp = server->nfs_client; - if (!clp || task->tk_status >= 0) return 0; switch(task->tk_status) { @@ -2879,8 +3318,23 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); task->tk_status = 0; return -EAGAIN; +#if defined(CONFIG_NFS_V4_1) + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + dprintk("%s ERROR %d, Reset session\n", __func__, + task->tk_status); + set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); + task->tk_status = 0; + return -EAGAIN; +#endif /* CONFIG_NFS_V4_1 */ case -NFS4ERR_DELAY: - nfs_inc_server_stats(server, NFSIOS_DELAY); + if (server) + nfs_inc_server_stats(server, NFSIOS_DELAY); case -NFS4ERR_GRACE: rpc_delay(task, NFS4_POLL_RETRY_MAX); task->tk_status = 0; @@ -2893,6 +3347,12 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, return 0; } +static int +nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) +{ + return _nfs4_async_handle_error(task, server, server->nfs_client, state); +} + int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) { nfs4_verifier sc_verifier; @@ -3000,6 +3460,10 @@ struct nfs4_delegreturndata { static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) { struct nfs4_delegreturndata *data = calldata; + + nfs4_sequence_done_free_slot(data->res.server, &data->res.seq_res, + task->tk_status); + data->rpc_status = task->tk_status; if (data->rpc_status == 0) renew_lease(data->res.server, data->timestamp); @@ -3010,7 +3474,25 @@ static void nfs4_delegreturn_release(void *calldata) kfree(calldata); } +#if defined(CONFIG_NFS_V4_1) +static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) +{ + struct nfs4_delegreturndata *d_data; + + d_data = (struct nfs4_delegreturndata *)data; + + if (nfs4_setup_sequence(d_data->res.server->nfs_client, + &d_data->args.seq_args, + &d_data->res.seq_res, 1, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + static const struct rpc_call_ops nfs4_delegreturn_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs4_delegreturn_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs4_delegreturn_done, .rpc_release = nfs4_delegreturn_release, }; @@ -3032,7 +3514,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co }; int status = 0; - data = kmalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) return -ENOMEM; data->args.fhandle = &data->fh; @@ -3042,6 +3524,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co memcpy(&data->stateid, stateid, sizeof(data->stateid)); data->res.fattr = &data->fattr; data->res.server = server; + data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; @@ -3127,7 +3610,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock goto out; lsp = request->fl_u.nfs4_fl.owner; arg.lock_owner.id = lsp->ls_id.id; - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &arg, &res, 1); switch (status) { case 0: request->fl_type = F_UNLCK; @@ -3187,13 +3670,14 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, struct nfs4_unlockdata *p; struct inode *inode = lsp->ls_state->inode; - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (p == NULL) return NULL; p->arg.fh = NFS_FH(inode); p->arg.fl = &p->fl; p->arg.seqid = seqid; p->res.seqid = seqid; + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; p->arg.stateid = &lsp->ls_stateid; p->lsp = lsp; atomic_inc(&lsp->ls_count); @@ -3217,6 +3701,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) { struct nfs4_unlockdata *calldata = data; + nfs4_sequence_done(calldata->server, &calldata->res.seq_res, + task->tk_status); if (RPC_ASSASSINATED(task)) return; switch (task->tk_status) { @@ -3233,8 +3719,11 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) break; default: if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) - rpc_restart_call(task); + nfs4_restart_rpc(task, + calldata->server->nfs_client); } + nfs4_sequence_free_slot(calldata->server->nfs_client, + &calldata->res.seq_res); } static void nfs4_locku_prepare(struct rpc_task *task, void *data) @@ -3249,6 +3738,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) return; } calldata->timestamp = jiffies; + if (nfs4_setup_sequence(calldata->server->nfs_client, + &calldata->arg.seq_args, + &calldata->res.seq_res, 1, task)) + return; rpc_call_start(task); } @@ -3341,6 +3834,7 @@ struct nfs4_lockdata { unsigned long timestamp; int rpc_status; int cancelled; + struct nfs_server *server; }; static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, @@ -3366,7 +3860,9 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; p->arg.lock_owner.id = lsp->ls_id.id; p->res.lock_seqid = p->arg.lock_seqid; + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; p->lsp = lsp; + p->server = server; atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); memcpy(&p->fl, fl, sizeof(p->fl)); @@ -3396,6 +3892,9 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) } else data->arg.new_lock_owner = 0; data->timestamp = jiffies; + if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args, + &data->res.seq_res, 1, task)) + return; rpc_call_start(task); dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); } @@ -3406,6 +3905,9 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) dprintk("%s: begin!\n", __func__); + nfs4_sequence_done_free_slot(data->server, &data->res.seq_res, + task->tk_status); + data->rpc_status = task->tk_status; if (RPC_ASSASSINATED(task)) goto out; @@ -3487,8 +3989,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f ret = nfs4_wait_for_completion_rpc_task(task); if (ret == 0) { ret = data->rpc_status; - if (ret == -NFS4ERR_DENIED) - ret = -EAGAIN; } else data->cancelled = 1; rpc_put_task(task); @@ -3576,9 +4076,11 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock * int err; do { + err = _nfs4_proc_setlk(state, cmd, request); + if (err == -NFS4ERR_DENIED) + err = -EAGAIN; err = nfs4_handle_exception(NFS_SERVER(state->inode), - _nfs4_proc_setlk(state, cmd, request), - &exception); + err, &exception); } while (exception.retry); return err; } @@ -3630,8 +4132,37 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) goto out; do { err = _nfs4_do_setlk(state, F_SETLK, fl, 0); - if (err != -NFS4ERR_DELAY) - break; + switch (err) { + default: + printk(KERN_ERR "%s: unhandled error %d.\n", + __func__, err); + case 0: + case -ESTALE: + goto out; + case -NFS4ERR_EXPIRED: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + nfs4_schedule_state_recovery(server->nfs_client); + goto out; + case -ERESTARTSYS: + /* + * The show must go on: exit, but mark the + * stateid as needing recovery. + */ + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OPENMODE: + nfs4_state_mark_reclaim_nograce(server->nfs_client, state); + err = 0; + goto out; + case -ENOMEM: + case -NFS4ERR_DENIED: + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ + err = 0; + goto out; + case -NFS4ERR_DELAY: + break; + } err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); out: @@ -3706,10 +4237,13 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, .page = page, .bitmask = bitmask, }; + struct nfs4_fs_locations_res res = { + .fs_locations = fs_locations, + }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], .rpc_argp = &args, - .rpc_resp = fs_locations, + .rpc_resp = &res, }; int status; @@ -3717,24 +4251,720 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, nfs_fattr_init(&fs_locations->fattr); fs_locations->server = server; fs_locations->nlocations = 0; - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &args, &res, 0); nfs_fixup_referral_attributes(&fs_locations->fattr); dprintk("%s: returned status = %d\n", __func__, status); return status; } -struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { +#ifdef CONFIG_NFS_V4_1 +/* + * nfs4_proc_exchange_id() + * + * Since the clientid has expired, all compounds using sessions + * associated with the stale clientid will be returning + * NFS4ERR_BADSESSION in the sequence operation, and will therefore + * be in some phase of session reset. + */ +static int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) +{ + nfs4_verifier verifier; + struct nfs41_exchange_id_args args = { + .client = clp, + .flags = clp->cl_exchange_flags, + }; + struct nfs41_exchange_id_res res = { + .client = clp, + }; + int status; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + __be32 *p; + + dprintk("--> %s\n", __func__); + BUG_ON(clp == NULL); + + p = (u32 *)verifier.data; + *p++ = htonl((u32)clp->cl_boot_time.tv_sec); + *p = htonl((u32)clp->cl_boot_time.tv_nsec); + args.verifier = &verifier; + + while (1) { + args.id_len = scnprintf(args.id, sizeof(args.id), + "%s/%s %u", + clp->cl_ipaddr, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR), + clp->cl_id_uniquifier); + + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + + if (status != NFS4ERR_CLID_INUSE) + break; + + if (signalled()) + break; + + if (++clp->cl_id_uniquifier == 0) + break; + } + + dprintk("<-- %s status= %d\n", __func__, status); + return status; +} + +struct nfs4_get_lease_time_data { + struct nfs4_get_lease_time_args *args; + struct nfs4_get_lease_time_res *res; + struct nfs_client *clp; +}; + +static void nfs4_get_lease_time_prepare(struct rpc_task *task, + void *calldata) +{ + int ret; + struct nfs4_get_lease_time_data *data = + (struct nfs4_get_lease_time_data *)calldata; + + dprintk("--> %s\n", __func__); + /* just setup sequence, do not trigger session recovery + since we're invoked within one */ + ret = nfs41_setup_sequence(data->clp->cl_session, + &data->args->la_seq_args, + &data->res->lr_seq_res, 0, task); + + BUG_ON(ret == -EAGAIN); + rpc_call_start(task); + dprintk("<-- %s\n", __func__); +} + +/* + * Called from nfs4_state_manager thread for session setup, so don't recover + * from sequence operation or clientid errors. + */ +static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_get_lease_time_data *data = + (struct nfs4_get_lease_time_data *)calldata; + + dprintk("--> %s\n", __func__); + nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status); + switch (task->tk_status) { + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); + rpc_delay(task, NFS4_POLL_RETRY_MIN); + task->tk_status = 0; + nfs4_restart_rpc(task, data->clp); + return; + } + nfs41_sequence_free_slot(data->clp, &data->res->lr_seq_res); + dprintk("<-- %s\n", __func__); +} + +struct rpc_call_ops nfs4_get_lease_time_ops = { + .rpc_call_prepare = nfs4_get_lease_time_prepare, + .rpc_call_done = nfs4_get_lease_time_done, +}; + +int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) +{ + struct rpc_task *task; + struct nfs4_get_lease_time_args args; + struct nfs4_get_lease_time_res res = { + .lr_fsinfo = fsinfo, + }; + struct nfs4_get_lease_time_data data = { + .args = &args, + .res = &res, + .clp = clp, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GET_LEASE_TIME], + .rpc_argp = &args, + .rpc_resp = &res, + }; + struct rpc_task_setup task_setup = { + .rpc_client = clp->cl_rpcclient, + .rpc_message = &msg, + .callback_ops = &nfs4_get_lease_time_ops, + .callback_data = &data + }; + int status; + + res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + dprintk("--> %s\n", __func__); + task = rpc_run_task(&task_setup); + + if (IS_ERR(task)) + status = PTR_ERR(task); + else { + status = task->tk_status; + rpc_put_task(task); + } + dprintk("<-- %s return %d\n", __func__, status); + + return status; +} + +/* + * Reset a slot table + */ +static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, int max_slots, + int old_max_slots, int ivalue) +{ + int i; + int ret = 0; + + dprintk("--> %s: max_reqs=%u, tbl %p\n", __func__, max_slots, tbl); + + /* + * Until we have dynamic slot table adjustment, insist + * upon the same slot table size + */ + if (max_slots != old_max_slots) { + dprintk("%s reset slot table does't match old\n", + __func__); + ret = -EINVAL; /*XXX NFS4ERR_REQ_TOO_BIG ? */ + goto out; + } + spin_lock(&tbl->slot_tbl_lock); + for (i = 0; i < max_slots; ++i) + tbl->slots[i].seq_nr = ivalue; + tbl->highest_used_slotid = -1; + spin_unlock(&tbl->slot_tbl_lock); + dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, + tbl, tbl->slots, tbl->max_slots); +out: + dprintk("<-- %s: return %d\n", __func__, ret); + return ret; +} + +/* + * Reset the forechannel and backchannel slot tables + */ +static int nfs4_reset_slot_tables(struct nfs4_session *session) +{ + int status; + + status = nfs4_reset_slot_table(&session->fc_slot_table, + session->fc_attrs.max_reqs, + session->fc_slot_table.max_slots, + 1); + if (status) + return status; + + status = nfs4_reset_slot_table(&session->bc_slot_table, + session->bc_attrs.max_reqs, + session->bc_slot_table.max_slots, + 0); + return status; +} + +/* Destroy the slot table */ +static void nfs4_destroy_slot_tables(struct nfs4_session *session) +{ + if (session->fc_slot_table.slots != NULL) { + kfree(session->fc_slot_table.slots); + session->fc_slot_table.slots = NULL; + } + if (session->bc_slot_table.slots != NULL) { + kfree(session->bc_slot_table.slots); + session->bc_slot_table.slots = NULL; + } + return; +} + +/* + * Initialize slot table + */ +static int nfs4_init_slot_table(struct nfs4_slot_table *tbl, + int max_slots, int ivalue) +{ + int i; + struct nfs4_slot *slot; + int ret = -ENOMEM; + + BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE); + + dprintk("--> %s: max_reqs=%u\n", __func__, max_slots); + + slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL); + if (!slot) + goto out; + for (i = 0; i < max_slots; ++i) + slot[i].seq_nr = ivalue; + ret = 0; + + spin_lock(&tbl->slot_tbl_lock); + if (tbl->slots != NULL) { + spin_unlock(&tbl->slot_tbl_lock); + dprintk("%s: slot table already initialized. tbl=%p slots=%p\n", + __func__, tbl, tbl->slots); + WARN_ON(1); + goto out_free; + } + tbl->max_slots = max_slots; + tbl->slots = slot; + tbl->highest_used_slotid = -1; /* no slot is currently used */ + spin_unlock(&tbl->slot_tbl_lock); + dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, + tbl, tbl->slots, tbl->max_slots); +out: + dprintk("<-- %s: return %d\n", __func__, ret); + return ret; + +out_free: + kfree(slot); + goto out; +} + +/* + * Initialize the forechannel and backchannel tables + */ +static int nfs4_init_slot_tables(struct nfs4_session *session) +{ + int status; + + status = nfs4_init_slot_table(&session->fc_slot_table, + session->fc_attrs.max_reqs, 1); + if (status) + return status; + + status = nfs4_init_slot_table(&session->bc_slot_table, + session->bc_attrs.max_reqs, 0); + if (status) + nfs4_destroy_slot_tables(session); + + return status; +} + +struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) +{ + struct nfs4_session *session; + struct nfs4_slot_table *tbl; + + session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL); + if (!session) + return NULL; + + set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); + /* + * The create session reply races with the server back + * channel probe. Mark the client NFS_CS_SESSION_INITING + * so that the client back channel can find the + * nfs_client struct + */ + clp->cl_cons_state = NFS_CS_SESSION_INITING; + + tbl = &session->fc_slot_table; + spin_lock_init(&tbl->slot_tbl_lock); + rpc_init_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); + + tbl = &session->bc_slot_table; + spin_lock_init(&tbl->slot_tbl_lock); + rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); + + session->clp = clp; + return session; +} + +void nfs4_destroy_session(struct nfs4_session *session) +{ + nfs4_proc_destroy_session(session); + dprintk("%s Destroy backchannel for xprt %p\n", + __func__, session->clp->cl_rpcclient->cl_xprt); + xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt, + NFS41_BC_MIN_CALLBACKS); + nfs4_destroy_slot_tables(session); + kfree(session); +} + +/* + * Initialize the values to be used by the client in CREATE_SESSION + * If nfs4_init_session set the fore channel request and response sizes, + * use them. + * + * Set the back channel max_resp_sz_cached to zero to force the client to + * always set csa_cachethis to FALSE because the current implementation + * of the back channel DRC only supports caching the CB_SEQUENCE operation. + */ +static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) +{ + struct nfs4_session *session = args->client->cl_session; + unsigned int mxrqst_sz = session->fc_attrs.max_rqst_sz, + mxresp_sz = session->fc_attrs.max_resp_sz; + + if (mxrqst_sz == 0) + mxrqst_sz = NFS_MAX_FILE_IO_SIZE; + if (mxresp_sz == 0) + mxresp_sz = NFS_MAX_FILE_IO_SIZE; + /* Fore channel attributes */ + args->fc_attrs.headerpadsz = 0; + args->fc_attrs.max_rqst_sz = mxrqst_sz; + args->fc_attrs.max_resp_sz = mxresp_sz; + args->fc_attrs.max_resp_sz_cached = mxresp_sz; + args->fc_attrs.max_ops = NFS4_MAX_OPS; + args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs; + + dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u " + "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", + __func__, + args->fc_attrs.max_rqst_sz, args->fc_attrs.max_resp_sz, + args->fc_attrs.max_resp_sz_cached, args->fc_attrs.max_ops, + args->fc_attrs.max_reqs); + + /* Back channel attributes */ + args->bc_attrs.headerpadsz = 0; + args->bc_attrs.max_rqst_sz = PAGE_SIZE; + args->bc_attrs.max_resp_sz = PAGE_SIZE; + args->bc_attrs.max_resp_sz_cached = 0; + args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; + args->bc_attrs.max_reqs = 1; + + dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u " + "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", + __func__, + args->bc_attrs.max_rqst_sz, args->bc_attrs.max_resp_sz, + args->bc_attrs.max_resp_sz_cached, args->bc_attrs.max_ops, + args->bc_attrs.max_reqs); +} + +static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd) +{ + if (rcvd <= sent) + return 0; + printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. " + "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd); + return -EINVAL; +} + +#define _verify_fore_channel_attr(_name_) \ + _verify_channel_attr("fore", #_name_, \ + args->fc_attrs._name_, \ + session->fc_attrs._name_) + +#define _verify_back_channel_attr(_name_) \ + _verify_channel_attr("back", #_name_, \ + args->bc_attrs._name_, \ + session->bc_attrs._name_) + +/* + * The server is not allowed to increase the fore channel header pad size, + * maximum response size, or maximum number of operations. + * + * The back channel attributes are only negotiatied down: We send what the + * (back channel) server insists upon. + */ +static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, + struct nfs4_session *session) +{ + int ret = 0; + + ret |= _verify_fore_channel_attr(headerpadsz); + ret |= _verify_fore_channel_attr(max_resp_sz); + ret |= _verify_fore_channel_attr(max_ops); + + ret |= _verify_back_channel_attr(headerpadsz); + ret |= _verify_back_channel_attr(max_rqst_sz); + ret |= _verify_back_channel_attr(max_resp_sz); + ret |= _verify_back_channel_attr(max_resp_sz_cached); + ret |= _verify_back_channel_attr(max_ops); + ret |= _verify_back_channel_attr(max_reqs); + + return ret; +} + +static int _nfs4_proc_create_session(struct nfs_client *clp) +{ + struct nfs4_session *session = clp->cl_session; + struct nfs41_create_session_args args = { + .client = clp, + .cb_program = NFS4_CALLBACK, + }; + struct nfs41_create_session_res res = { + .client = clp, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int status; + + nfs4_init_channel_attrs(&args); + args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); + + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0); + + if (!status) + /* Verify the session's negotiated channel_attrs values */ + status = nfs4_verify_channel_attrs(&args, session); + if (!status) { + /* Increment the clientid slot sequence id */ + clp->cl_seqid++; + } + + return status; +} + +/* + * Issues a CREATE_SESSION operation to the server. + * It is the responsibility of the caller to verify the session is + * expired before calling this routine. + */ +int nfs4_proc_create_session(struct nfs_client *clp, int reset) +{ + int status; + unsigned *ptr; + struct nfs_fsinfo fsinfo; + struct nfs4_session *session = clp->cl_session; + + dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); + + status = _nfs4_proc_create_session(clp); + if (status) + goto out; + + /* Init or reset the fore channel */ + if (reset) + status = nfs4_reset_slot_tables(session); + else + status = nfs4_init_slot_tables(session); + dprintk("fore channel slot table initialization returned %d\n", status); + if (status) + goto out; + + ptr = (unsigned *)&session->sess_id.data[0]; + dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__, + clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]); + + if (reset) + /* Lease time is aleady set */ + goto out; + + /* Get the lease time */ + status = nfs4_proc_get_lease_time(clp, &fsinfo); + if (status == 0) { + /* Update lease time and schedule renewal */ + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo.lease_time * HZ; + clp->cl_last_renewal = jiffies; + clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + spin_unlock(&clp->cl_lock); + + nfs4_schedule_state_renewal(clp); + } +out: + dprintk("<-- %s\n", __func__); + return status; +} + +/* + * Issue the over-the-wire RPC DESTROY_SESSION. + * The caller must serialize access to this routine. + */ +int nfs4_proc_destroy_session(struct nfs4_session *session) +{ + int status = 0; + struct rpc_message msg; + + dprintk("--> nfs4_proc_destroy_session\n"); + + /* session is still being setup */ + if (session->clp->cl_cons_state != NFS_CS_READY) + return status; + + msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION]; + msg.rpc_argp = session; + msg.rpc_resp = NULL; + msg.rpc_cred = NULL; + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0); + + if (status) + printk(KERN_WARNING + "Got error %d from the server on DESTROY_SESSION. " + "Session has been destroyed regardless...\n", status); + + dprintk("<-- nfs4_proc_destroy_session\n"); + return status; +} + +/* + * Renew the cl_session lease. + */ +static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) +{ + struct nfs4_sequence_args args; + struct nfs4_sequence_res res; + + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + + args.sa_cache_this = 0; + + return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args, + &res, 0); +} + +void nfs41_sequence_call_done(struct rpc_task *task, void *data) +{ + struct nfs_client *clp = (struct nfs_client *)data; + + nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status); + + if (task->tk_status < 0) { + dprintk("%s ERROR %d\n", __func__, task->tk_status); + + if (_nfs4_async_handle_error(task, NULL, clp, NULL) + == -EAGAIN) { + nfs4_restart_rpc(task, clp); + return; + } + } + nfs41_sequence_free_slot(clp, task->tk_msg.rpc_resp); + dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); + + put_rpccred(task->tk_msg.rpc_cred); + kfree(task->tk_msg.rpc_argp); + kfree(task->tk_msg.rpc_resp); + + dprintk("<-- %s\n", __func__); +} + +static void nfs41_sequence_prepare(struct rpc_task *task, void *data) +{ + struct nfs_client *clp; + struct nfs4_sequence_args *args; + struct nfs4_sequence_res *res; + + clp = (struct nfs_client *)data; + args = task->tk_msg.rpc_argp; + res = task->tk_msg.rpc_resp; + + if (nfs4_setup_sequence(clp, args, res, 0, task)) + return; + rpc_call_start(task); +} + +static const struct rpc_call_ops nfs41_sequence_ops = { + .rpc_call_done = nfs41_sequence_call_done, + .rpc_call_prepare = nfs41_sequence_prepare, +}; + +static int nfs41_proc_async_sequence(struct nfs_client *clp, + struct rpc_cred *cred) +{ + struct nfs4_sequence_args *args; + struct nfs4_sequence_res *res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE], + .rpc_cred = cred, + }; + + args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!args) + return -ENOMEM; + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) { + kfree(args); + return -ENOMEM; + } + res->sr_slotid = NFS4_MAX_SLOT_TABLE; + msg.rpc_argp = args; + msg.rpc_resp = res; + + return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, + &nfs41_sequence_ops, (void *)clp); +} + +#endif /* CONFIG_NFS_V4_1 */ + +struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, + .establish_clid = nfs4_init_clientid, + .get_clid_cred = nfs4_get_setclientid_cred, +}; + +#if defined(CONFIG_NFS_V4_1) +struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { + .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, + .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, + .recover_open = nfs4_open_reclaim, + .recover_lock = nfs4_lock_reclaim, + .establish_clid = nfs4_proc_exchange_id, + .get_clid_cred = nfs4_get_exchange_id_cred, +}; +#endif /* CONFIG_NFS_V4_1 */ + +struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { + .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, + .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, + .recover_open = nfs4_open_expired, + .recover_lock = nfs4_lock_expired, + .establish_clid = nfs4_init_clientid, + .get_clid_cred = nfs4_get_setclientid_cred, }; -struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops = { +#if defined(CONFIG_NFS_V4_1) +struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, .recover_open = nfs4_open_expired, .recover_lock = nfs4_lock_expired, + .establish_clid = nfs4_proc_exchange_id, + .get_clid_cred = nfs4_get_exchange_id_cred, +}; +#endif /* CONFIG_NFS_V4_1 */ + +struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { + .sched_state_renewal = nfs4_proc_async_renew, + .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked, + .renew_lease = nfs4_proc_renew, +}; + +#if defined(CONFIG_NFS_V4_1) +struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { + .sched_state_renewal = nfs41_proc_async_sequence, + .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked, + .renew_lease = nfs4_proc_sequence, +}; +#endif + +/* + * Per minor version reboot and network partition recovery ops + */ + +struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = { + &nfs40_reboot_recovery_ops, +#if defined(CONFIG_NFS_V4_1) + &nfs41_reboot_recovery_ops, +#endif +}; + +struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = { + &nfs40_nograce_recovery_ops, +#if defined(CONFIG_NFS_V4_1) + &nfs41_nograce_recovery_ops, +#endif +}; + +struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = { + &nfs40_state_renewal_ops, +#if defined(CONFIG_NFS_V4_1) + &nfs41_state_renewal_ops, +#endif }; static const struct inode_operations nfs4_file_inode_operations = { diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index f524e932ff7..e27c6cef18f 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -59,12 +59,14 @@ void nfs4_renew_state(struct work_struct *work) { + struct nfs4_state_maintenance_ops *ops; struct nfs_client *clp = container_of(work, struct nfs_client, cl_renewd.work); struct rpc_cred *cred; long lease, timeout; unsigned long last, now; + ops = nfs4_state_renewal_ops[clp->cl_minorversion]; dprintk("%s: start\n", __func__); /* Are there any active superblocks? */ if (list_empty(&clp->cl_superblocks)) @@ -76,7 +78,7 @@ nfs4_renew_state(struct work_struct *work) timeout = (2 * lease) / 3 + (long)last - (long)now; /* Are we close to a lease timeout? */ if (time_after(now, last + lease/3)) { - cred = nfs4_get_renew_cred_locked(clp); + cred = ops->get_state_renewal_cred_locked(clp); spin_unlock(&clp->cl_lock); if (cred == NULL) { if (list_empty(&clp->cl_delegations)) { @@ -86,7 +88,7 @@ nfs4_renew_state(struct work_struct *work) nfs_expire_all_delegations(clp); } else { /* Queue an asynchronous RENEW. */ - nfs4_proc_async_renew(clp, cred); + ops->sched_state_renewal(clp, cred); put_rpccred(cred); } timeout = (2 * lease) / 3; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0298e909559..b73c5a72865 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -60,7 +60,7 @@ const nfs4_stateid zero_stateid; static LIST_HEAD(nfs4_clientid_list); -static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) +int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) { unsigned short port; int status; @@ -77,7 +77,7 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) return status; } -static struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) +struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) { struct rpc_cred *cred = NULL; @@ -114,17 +114,21 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) return cred; } -static struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) +#if defined(CONFIG_NFS_V4_1) + +struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) { struct rpc_cred *cred; spin_lock(&clp->cl_lock); - cred = nfs4_get_renew_cred_locked(clp); + cred = nfs4_get_machine_cred_locked(clp); spin_unlock(&clp->cl_lock); return cred; } -static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) +#endif /* CONFIG_NFS_V4_1 */ + +struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; struct rb_node *pos; @@ -738,12 +742,14 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) { - if (status == -NFS4ERR_BAD_SEQID) { - struct nfs4_state_owner *sp = container_of(seqid->sequence, - struct nfs4_state_owner, so_seqid); + struct nfs4_state_owner *sp = container_of(seqid->sequence, + struct nfs4_state_owner, so_seqid); + struct nfs_server *server = sp->so_server; + + if (status == -NFS4ERR_BAD_SEQID) nfs4_drop_state_owner(sp); - } - nfs_increment_seqid(status, seqid); + if (!nfs4_has_session(server->nfs_client)) + nfs_increment_seqid(status, seqid); } /* @@ -847,32 +853,45 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ struct file_lock *fl; int status = 0; + if (inode->i_flock == NULL) + return 0; + + /* Guard against delegation returns and new lock/unlock calls */ down_write(&nfsi->rwsem); + /* Protect inode->i_flock using the BKL */ + lock_kernel(); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; if (nfs_file_open_context(fl->fl_file)->state != state) continue; + unlock_kernel(); status = ops->recover_lock(state, fl); - if (status >= 0) - continue; switch (status) { + case 0: + break; + case -ESTALE: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: + case -NFS4ERR_STALE_CLIENTID: + goto out; default: printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", __func__, status); - case -NFS4ERR_EXPIRED: - case -NFS4ERR_NO_GRACE: + case -ENOMEM: + case -NFS4ERR_DENIED: case -NFS4ERR_RECLAIM_BAD: case -NFS4ERR_RECLAIM_CONFLICT: /* kill_proc(fl->fl_pid, SIGLOST, 1); */ - break; - case -NFS4ERR_STALE_CLIENTID: - goto out_err; + status = 0; } + lock_kernel(); } - up_write(&nfsi->rwsem); - return 0; -out_err: + unlock_kernel(); +out: up_write(&nfsi->rwsem); return status; } @@ -918,6 +937,7 @@ restart: printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", __func__, status); case -ENOENT: + case -ENOMEM: case -ESTALE: /* * Open state on this file cannot be recovered @@ -928,6 +948,9 @@ restart: /* Mark the file as being 'closed' */ state->state = 0; break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: case -NFS4ERR_RECLAIM_BAD: case -NFS4ERR_RECLAIM_CONFLICT: nfs4_state_mark_reclaim_nograce(sp->so_client, state); @@ -1042,6 +1065,14 @@ static void nfs4_recovery_handle_error(struct nfs_client *clp, int error) case -NFS4ERR_EXPIRED: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); nfs4_state_start_reclaim_nograce(clp); + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); } } @@ -1075,18 +1106,22 @@ restart: static int nfs4_check_lease(struct nfs_client *clp) { struct rpc_cred *cred; + struct nfs4_state_maintenance_ops *ops = + nfs4_state_renewal_ops[clp->cl_minorversion]; int status = -NFS4ERR_EXPIRED; /* Is the client already known to have an expired lease? */ if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) return 0; - cred = nfs4_get_renew_cred(clp); + spin_lock(&clp->cl_lock); + cred = ops->get_state_renewal_cred_locked(clp); + spin_unlock(&clp->cl_lock); if (cred == NULL) { cred = nfs4_get_setclientid_cred(clp); if (cred == NULL) goto out; } - status = nfs4_proc_renew(clp, cred); + status = ops->renew_lease(clp, cred); put_rpccred(cred); out: nfs4_recovery_handle_error(clp, status); @@ -1096,21 +1131,98 @@ out: static int nfs4_reclaim_lease(struct nfs_client *clp) { struct rpc_cred *cred; + struct nfs4_state_recovery_ops *ops = + nfs4_reboot_recovery_ops[clp->cl_minorversion]; int status = -ENOENT; - cred = nfs4_get_setclientid_cred(clp); + cred = ops->get_clid_cred(clp); if (cred != NULL) { - status = nfs4_init_client(clp, cred); + status = ops->establish_clid(clp, cred); put_rpccred(cred); /* Handle case where the user hasn't set up machine creds */ if (status == -EACCES && cred == clp->cl_machine_cred) { nfs4_clear_machine_cred(clp); status = -EAGAIN; } + if (status == -NFS4ERR_MINOR_VERS_MISMATCH) + status = -EPROTONOSUPPORT; + } + return status; +} + +#ifdef CONFIG_NFS_V4_1 +static void nfs4_session_recovery_handle_error(struct nfs_client *clp, int err) +{ + switch (err) { + case -NFS4ERR_STALE_CLIENTID: + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); + } +} + +static int nfs4_reset_session(struct nfs_client *clp) +{ + int status; + + status = nfs4_proc_destroy_session(clp->cl_session); + if (status && status != -NFS4ERR_BADSESSION && + status != -NFS4ERR_DEADSESSION) { + nfs4_session_recovery_handle_error(clp, status); + goto out; } + + memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); + status = nfs4_proc_create_session(clp, 1); + if (status) + nfs4_session_recovery_handle_error(clp, status); + /* fall through*/ +out: + /* Wake up the next rpc task even on error */ + rpc_wake_up_next(&clp->cl_session->fc_slot_table.slot_tbl_waitq); return status; } +static int nfs4_initialize_session(struct nfs_client *clp) +{ + int status; + + status = nfs4_proc_create_session(clp, 0); + if (!status) { + nfs_mark_client_ready(clp, NFS_CS_READY); + } else if (status == -NFS4ERR_STALE_CLIENTID) { + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); + } else { + nfs_mark_client_ready(clp, status); + } + return status; +} +#else /* CONFIG_NFS_V4_1 */ +static int nfs4_reset_session(struct nfs_client *clp) { return 0; } +static int nfs4_initialize_session(struct nfs_client *clp) { return 0; } +#endif /* CONFIG_NFS_V4_1 */ + +/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors + * on EXCHANGE_ID for v4.1 + */ +static void nfs4_set_lease_expired(struct nfs_client *clp, int status) +{ + if (nfs4_has_session(clp)) { + switch (status) { + case -NFS4ERR_DELAY: + case -NFS4ERR_CLID_INUSE: + case -EAGAIN: + break; + + case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery + * in nfs4_exchange_id */ + default: + return; + } + } + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); +} + static void nfs4_state_manager(struct nfs_client *clp) { int status = 0; @@ -1121,9 +1233,12 @@ static void nfs4_state_manager(struct nfs_client *clp) /* We're going to have to re-establish a clientid */ status = nfs4_reclaim_lease(clp); if (status) { - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_set_lease_expired(clp, status); if (status == -EAGAIN) continue; + if (clp->cl_cons_state == + NFS_CS_SESSION_INITING) + nfs_mark_client_ready(clp, status); goto out_error; } clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); @@ -1134,25 +1249,44 @@ static void nfs4_state_manager(struct nfs_client *clp) if (status != 0) continue; } - + /* Initialize or reset the session */ + if (nfs4_has_session(clp) && + test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) { + if (clp->cl_cons_state == NFS_CS_SESSION_INITING) + status = nfs4_initialize_session(clp); + else + status = nfs4_reset_session(clp); + if (status) { + if (status == -NFS4ERR_STALE_CLIENTID) + continue; + goto out_error; + } + } /* First recover reboot state... */ if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { - status = nfs4_do_reclaim(clp, &nfs4_reboot_recovery_ops); + status = nfs4_do_reclaim(clp, + nfs4_reboot_recovery_ops[clp->cl_minorversion]); if (status == -NFS4ERR_STALE_CLIENTID) continue; + if (test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) + continue; nfs4_state_end_reclaim_reboot(clp); continue; } /* Now recover expired state... */ if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { - status = nfs4_do_reclaim(clp, &nfs4_nograce_recovery_ops); + status = nfs4_do_reclaim(clp, + nfs4_nograce_recovery_ops[clp->cl_minorversion]); if (status < 0) { set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); if (status == -NFS4ERR_STALE_CLIENTID) continue; if (status == -NFS4ERR_EXPIRED) continue; + if (test_bit(NFS4CLNT_SESSION_SETUP, + &clp->cl_state)) + continue; goto out_error; } else nfs4_state_end_reclaim_nograce(clp); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1690f0e44b9..617273e7d47 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -192,12 +192,16 @@ static int nfs4_stat_to_errno(int); decode_verifier_maxsz) #define encode_remove_maxsz (op_encode_hdr_maxsz + \ nfs4_name_maxsz) +#define decode_remove_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz) #define encode_rename_maxsz (op_encode_hdr_maxsz + \ 2 * nfs4_name_maxsz) -#define decode_rename_maxsz (op_decode_hdr_maxsz + 5 + 5) +#define decode_rename_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz + \ + decode_change_info_maxsz) #define encode_link_maxsz (op_encode_hdr_maxsz + \ nfs4_name_maxsz) -#define decode_link_maxsz (op_decode_hdr_maxsz + 5) +#define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) #define encode_lock_maxsz (op_encode_hdr_maxsz + \ 7 + \ 1 + encode_stateid_maxsz + 8) @@ -240,43 +244,115 @@ static int nfs4_stat_to_errno(int); (encode_getattr_maxsz) #define decode_fs_locations_maxsz \ (0) + +#if defined(CONFIG_NFS_V4_1) +#define NFS4_MAX_MACHINE_NAME_LEN (64) + +#define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \ + encode_verifier_maxsz + \ + 1 /* co_ownerid.len */ + \ + XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \ + 1 /* flags */ + \ + 1 /* spa_how */ + \ + 0 /* SP4_NONE (for now) */ + \ + 1 /* zero implemetation id array */) +#define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \ + 2 /* eir_clientid */ + \ + 1 /* eir_sequenceid */ + \ + 1 /* eir_flags */ + \ + 1 /* spr_how */ + \ + 0 /* SP4_NONE (for now) */ + \ + 2 /* eir_server_owner.so_minor_id */ + \ + /* eir_server_owner.so_major_id<> */ \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ + /* eir_server_scope<> */ \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ + 1 /* eir_server_impl_id array length */ + \ + 0 /* ignored eir_server_impl_id contents */) +#define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */) +#define decode_channel_attrs_maxsz (6 + \ + 1 /* ca_rdma_ird.len */ + \ + 1 /* ca_rdma_ird */) +#define encode_create_session_maxsz (op_encode_hdr_maxsz + \ + 2 /* csa_clientid */ + \ + 1 /* csa_sequence */ + \ + 1 /* csa_flags */ + \ + encode_channel_attrs_maxsz + \ + encode_channel_attrs_maxsz + \ + 1 /* csa_cb_program */ + \ + 1 /* csa_sec_parms.len (1) */ + \ + 1 /* cb_secflavor (AUTH_SYS) */ + \ + 1 /* stamp */ + \ + 1 /* machinename.len */ + \ + XDR_QUADLEN(NFS4_MAX_MACHINE_NAME_LEN) + \ + 1 /* uid */ + \ + 1 /* gid */ + \ + 1 /* gids.len (0) */) +#define decode_create_session_maxsz (op_decode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ + 1 /* csr_sequence */ + \ + 1 /* csr_flags */ + \ + decode_channel_attrs_maxsz + \ + decode_channel_attrs_maxsz) +#define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4) +#define decode_destroy_session_maxsz (op_decode_hdr_maxsz) +#define encode_sequence_maxsz (op_encode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4) +#define decode_sequence_maxsz (op_decode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) +#else /* CONFIG_NFS_V4_1 */ +#define encode_sequence_maxsz 0 +#define decode_sequence_maxsz 0 +#endif /* CONFIG_NFS_V4_1 */ + #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ #define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ #define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_read_maxsz) #define NFS4_dec_read_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_read_maxsz) #define NFS4_enc_readlink_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_readlink_maxsz) #define NFS4_dec_readlink_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_readlink_maxsz) #define NFS4_enc_readdir_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_readdir_maxsz) #define NFS4_dec_readdir_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_readdir_maxsz) #define NFS4_enc_write_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_write_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_write_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_write_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_commit_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_commit_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_open_maxsz + \ @@ -285,6 +361,7 @@ static int nfs4_stat_to_errno(int); encode_restorefh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_open_maxsz + \ @@ -301,43 +378,53 @@ static int nfs4_stat_to_errno(int); decode_putfh_maxsz + \ decode_open_confirm_maxsz) #define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_open_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_open_downgrade_sz \ (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_open_downgrade_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_downgrade_sz \ (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_downgrade_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_close_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_close_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_setattr_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_setattr_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_fsinfo_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_fsinfo_maxsz) #define NFS4_dec_fsinfo_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_fsinfo_maxsz) #define NFS4_enc_renew_sz (compound_encode_hdr_maxsz + \ @@ -359,64 +446,81 @@ static int nfs4_stat_to_errno(int); decode_putrootfh_maxsz + \ decode_fsinfo_maxsz) #define NFS4_enc_lock_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_lock_maxsz) #define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_lock_maxsz) #define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_lockt_maxsz) #define NFS4_dec_lockt_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_lockt_maxsz) #define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_locku_maxsz) #define NFS4_dec_locku_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_locku_maxsz) #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_access_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_access_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_lookup_maxsz + \ encode_getattr_maxsz + \ encode_getfh_maxsz) #define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_lookup_maxsz + \ decode_getattr_maxsz + \ decode_getfh_maxsz) #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putrootfh_maxsz + \ encode_getattr_maxsz + \ encode_getfh_maxsz) #define NFS4_dec_lookup_root_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putrootfh_maxsz + \ decode_getattr_maxsz + \ decode_getfh_maxsz) #define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_remove_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 5 + \ + decode_remove_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_putfh_maxsz + \ @@ -425,6 +529,7 @@ static int nfs4_stat_to_errno(int); encode_restorefh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_putfh_maxsz + \ @@ -433,6 +538,7 @@ static int nfs4_stat_to_errno(int); decode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_putfh_maxsz + \ @@ -441,6 +547,7 @@ static int nfs4_stat_to_errno(int); encode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_putfh_maxsz + \ @@ -449,16 +556,19 @@ static int nfs4_stat_to_errno(int); decode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_symlink_maxsz + \ encode_getattr_maxsz + \ encode_getfh_maxsz) #define NFS4_dec_symlink_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_symlink_maxsz + \ decode_getattr_maxsz + \ decode_getfh_maxsz) #define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_create_maxsz + \ @@ -467,6 +577,7 @@ static int nfs4_stat_to_errno(int); encode_restorefh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_create_maxsz + \ @@ -475,52 +586,98 @@ static int nfs4_stat_to_errno(int); decode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_pathconf_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_statfs_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_statfs_maxsz) #define NFS4_dec_statfs_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_statfs_maxsz) #define NFS4_enc_server_caps_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_delegreturn_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_delegreturn_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_getacl_maxsz) #define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_getacl_maxsz) #define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_setacl_maxsz) #define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_setacl_maxsz) #define NFS4_enc_fs_locations_sz \ (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_lookup_maxsz + \ encode_fs_locations_maxsz) #define NFS4_dec_fs_locations_sz \ (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_lookup_maxsz + \ decode_fs_locations_maxsz) +#if defined(CONFIG_NFS_V4_1) +#define NFS4_enc_exchange_id_sz \ + (compound_encode_hdr_maxsz + \ + encode_exchange_id_maxsz) +#define NFS4_dec_exchange_id_sz \ + (compound_decode_hdr_maxsz + \ + decode_exchange_id_maxsz) +#define NFS4_enc_create_session_sz \ + (compound_encode_hdr_maxsz + \ + encode_create_session_maxsz) +#define NFS4_dec_create_session_sz \ + (compound_decode_hdr_maxsz + \ + decode_create_session_maxsz) +#define NFS4_enc_destroy_session_sz (compound_encode_hdr_maxsz + \ + encode_destroy_session_maxsz) +#define NFS4_dec_destroy_session_sz (compound_decode_hdr_maxsz + \ + decode_destroy_session_maxsz) +#define NFS4_enc_sequence_sz \ + (compound_decode_hdr_maxsz + \ + encode_sequence_maxsz) +#define NFS4_dec_sequence_sz \ + (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz) +#define NFS4_enc_get_lease_time_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putrootfh_maxsz + \ + encode_fsinfo_maxsz) +#define NFS4_dec_get_lease_time_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putrootfh_maxsz + \ + decode_fsinfo_maxsz) +#endif /* CONFIG_NFS_V4_1 */ static const umode_t nfs_type2fmt[] = { [NF4BAD] = 0, @@ -541,6 +698,8 @@ struct compound_hdr { __be32 * nops_p; uint32_t taglen; char * tag; + uint32_t replen; /* expected reply words */ + u32 minorversion; }; /* @@ -576,22 +735,31 @@ static void encode_string(struct xdr_stream *xdr, unsigned int len, const char * xdr_encode_opaque(p, str, len); } -static void encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) +static void encode_compound_hdr(struct xdr_stream *xdr, + struct rpc_rqst *req, + struct compound_hdr *hdr) { __be32 *p; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; + + /* initialize running count of expected bytes in reply. + * NOTE: the replied tag SHOULD be the same is the one sent, + * but this is not required as a MUST for the server to do so. */ + hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2)); WRITE32(hdr->taglen); WRITEMEM(hdr->tag, hdr->taglen); - WRITE32(NFS4_MINOR_VERSION); + WRITE32(hdr->minorversion); hdr->nops_p = p; WRITE32(hdr->nops); } static void encode_nops(struct compound_hdr *hdr) { + BUG_ON(hdr->nops > NFS4_MAX_OPS); *hdr->nops_p = htonl(hdr->nops); } @@ -736,6 +904,7 @@ static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hd WRITE32(OP_ACCESS); WRITE32(access); hdr->nops++; + hdr->replen += decode_access_maxsz; } static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) @@ -747,6 +916,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg WRITE32(arg->seqid->sequence->counter); WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); hdr->nops++; + hdr->replen += decode_close_maxsz; } static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) @@ -758,6 +928,7 @@ static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *ar WRITE64(args->offset); WRITE32(args->count); hdr->nops++; + hdr->replen += decode_commit_maxsz; } static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) @@ -789,6 +960,7 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * WRITE32(create->name->len); WRITEMEM(create->name->name, create->name->len); hdr->nops++; + hdr->replen += decode_create_maxsz; encode_attrs(xdr, create->attrs, create->server); } @@ -802,6 +974,7 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c WRITE32(1); WRITE32(bitmap); hdr->nops++; + hdr->replen += decode_getattr_maxsz; } static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) @@ -814,6 +987,7 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm WRITE32(bm0); WRITE32(bm1); hdr->nops++; + hdr->replen += decode_getattr_maxsz; } static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) @@ -841,6 +1015,7 @@ static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) RESERVE_SPACE(4); WRITE32(OP_GETFH); hdr->nops++; + hdr->replen += decode_getfh_maxsz; } static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) @@ -852,6 +1027,7 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct WRITE32(name->len); WRITEMEM(name->name, name->len); hdr->nops++; + hdr->replen += decode_link_maxsz; } static inline int nfs4_lock_type(struct file_lock *fl, int block) @@ -899,6 +1075,7 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args WRITE32(args->lock_seqid->sequence->counter); } hdr->nops++; + hdr->replen += decode_lock_maxsz; } static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) @@ -915,6 +1092,7 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar WRITEMEM("lock id:", 8); WRITE64(args->lock_owner.id); hdr->nops++; + hdr->replen += decode_lockt_maxsz; } static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) @@ -929,6 +1107,7 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar WRITE64(args->fl->fl_start); WRITE64(nfs4_lock_length(args->fl)); hdr->nops++; + hdr->replen += decode_locku_maxsz; } static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) @@ -941,6 +1120,7 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc WRITE32(len); WRITEMEM(name->name, len); hdr->nops++; + hdr->replen += decode_lookup_maxsz; } static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) @@ -1080,6 +1260,7 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, BUG(); } hdr->nops++; + hdr->replen += decode_open_maxsz; } static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) @@ -1091,6 +1272,7 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); WRITE32(arg->seqid->sequence->counter); hdr->nops++; + hdr->replen += decode_open_confirm_maxsz; } static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) @@ -1103,6 +1285,7 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->fmode); hdr->nops++; + hdr->replen += decode_open_downgrade_maxsz; } static void @@ -1116,6 +1299,7 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hd WRITE32(len); WRITEMEM(fh->data, len); hdr->nops++; + hdr->replen += decode_putfh_maxsz; } static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) @@ -1125,6 +1309,7 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) RESERVE_SPACE(4); WRITE32(OP_PUTROOTFH); hdr->nops++; + hdr->replen += decode_putrootfh_maxsz; } static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) @@ -1153,6 +1338,7 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, WRITE64(args->offset); WRITE32(args->count); hdr->nops++; + hdr->replen += decode_read_maxsz; } static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) @@ -1178,6 +1364,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg WRITE32(attrs[0] & readdir->bitmask[0]); WRITE32(attrs[1] & readdir->bitmask[1]); hdr->nops++; + hdr->replen += decode_readdir_maxsz; dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", __func__, (unsigned long long)readdir->cookie, @@ -1194,6 +1381,7 @@ static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink * RESERVE_SPACE(4); WRITE32(OP_READLINK); hdr->nops++; + hdr->replen += decode_readlink_maxsz; } static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) @@ -1205,6 +1393,7 @@ static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struc WRITE32(name->len); WRITEMEM(name->name, name->len); hdr->nops++; + hdr->replen += decode_remove_maxsz; } static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) @@ -1220,6 +1409,7 @@ static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, co WRITE32(newname->len); WRITEMEM(newname->name, newname->len); hdr->nops++; + hdr->replen += decode_rename_maxsz; } static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr) @@ -1230,6 +1420,7 @@ static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client WRITE32(OP_RENEW); WRITE64(client_stateid->cl_clientid); hdr->nops++; + hdr->replen += decode_renew_maxsz; } static void @@ -1240,6 +1431,7 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) RESERVE_SPACE(4); WRITE32(OP_RESTOREFH); hdr->nops++; + hdr->replen += decode_restorefh_maxsz; } static int @@ -1259,6 +1451,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun WRITE32(arg->acl_len); xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); hdr->nops++; + hdr->replen += decode_setacl_maxsz; return 0; } @@ -1270,6 +1463,7 @@ encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) RESERVE_SPACE(4); WRITE32(OP_SAVEFH); hdr->nops++; + hdr->replen += decode_savefh_maxsz; } static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) @@ -1280,6 +1474,7 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs WRITE32(OP_SETATTR); WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); hdr->nops++; + hdr->replen += decode_setattr_maxsz; encode_attrs(xdr, arg->iap, server); } @@ -1299,6 +1494,7 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie RESERVE_SPACE(4); WRITE32(setclientid->sc_cb_ident); hdr->nops++; + hdr->replen += decode_setclientid_maxsz; } static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr) @@ -1310,6 +1506,7 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_ WRITE64(client_state->cl_clientid); WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); hdr->nops++; + hdr->replen += decode_setclientid_confirm_maxsz; } static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) @@ -1328,6 +1525,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg xdr_write_pages(xdr, args->pages, args->pgbase, args->count); hdr->nops++; + hdr->replen += decode_write_maxsz; } static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) @@ -1339,11 +1537,163 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state WRITE32(OP_DELEGRETURN); WRITEMEM(stateid->data, NFS4_STATEID_SIZE); hdr->nops++; + hdr->replen += decode_delegreturn_maxsz; +} + +#if defined(CONFIG_NFS_V4_1) +/* NFSv4.1 operations */ +static void encode_exchange_id(struct xdr_stream *xdr, + struct nfs41_exchange_id_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + RESERVE_SPACE(4 + sizeof(args->verifier->data)); + WRITE32(OP_EXCHANGE_ID); + WRITEMEM(args->verifier->data, sizeof(args->verifier->data)); + + encode_string(xdr, args->id_len, args->id); + + RESERVE_SPACE(12); + WRITE32(args->flags); + WRITE32(0); /* zero length state_protect4_a */ + WRITE32(0); /* zero length implementation id array */ + hdr->nops++; + hdr->replen += decode_exchange_id_maxsz; +} + +static void encode_create_session(struct xdr_stream *xdr, + struct nfs41_create_session_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + char machine_name[NFS4_MAX_MACHINE_NAME_LEN]; + uint32_t len; + struct nfs_client *clp = args->client; + + RESERVE_SPACE(4); + WRITE32(OP_CREATE_SESSION); + + RESERVE_SPACE(8); + WRITE64(clp->cl_ex_clid); + + RESERVE_SPACE(8); + WRITE32(clp->cl_seqid); /*Sequence id */ + WRITE32(args->flags); /*flags */ + + RESERVE_SPACE(2*28); /* 2 channel_attrs */ + /* Fore Channel */ + WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ + WRITE32(args->fc_attrs.max_rqst_sz); /* max req size */ + WRITE32(args->fc_attrs.max_resp_sz); /* max resp size */ + WRITE32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */ + WRITE32(args->fc_attrs.max_ops); /* max operations */ + WRITE32(args->fc_attrs.max_reqs); /* max requests */ + WRITE32(0); /* rdmachannel_attrs */ + + /* Back Channel */ + WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ + WRITE32(args->bc_attrs.max_rqst_sz); /* max req size */ + WRITE32(args->bc_attrs.max_resp_sz); /* max resp size */ + WRITE32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ + WRITE32(args->bc_attrs.max_ops); /* max operations */ + WRITE32(args->bc_attrs.max_reqs); /* max requests */ + WRITE32(0); /* rdmachannel_attrs */ + + RESERVE_SPACE(4); + WRITE32(args->cb_program); /* cb_program */ + + RESERVE_SPACE(4); /* # of security flavors */ + WRITE32(1); + + RESERVE_SPACE(4); + WRITE32(RPC_AUTH_UNIX); /* auth_sys */ + + /* authsys_parms rfc1831 */ + RESERVE_SPACE(4); + WRITE32((u32)clp->cl_boot_time.tv_nsec); /* stamp */ + len = scnprintf(machine_name, sizeof(machine_name), "%s", + clp->cl_ipaddr); + RESERVE_SPACE(16 + len); + WRITE32(len); + WRITEMEM(machine_name, len); + WRITE32(0); /* UID */ + WRITE32(0); /* GID */ + WRITE32(0); /* No more gids */ + hdr->nops++; + hdr->replen += decode_create_session_maxsz; +} + +static void encode_destroy_session(struct xdr_stream *xdr, + struct nfs4_session *session, + struct compound_hdr *hdr) +{ + __be32 *p; + RESERVE_SPACE(4 + NFS4_MAX_SESSIONID_LEN); + WRITE32(OP_DESTROY_SESSION); + WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); + hdr->nops++; + hdr->replen += decode_destroy_session_maxsz; } +#endif /* CONFIG_NFS_V4_1 */ + +static void encode_sequence(struct xdr_stream *xdr, + const struct nfs4_sequence_args *args, + struct compound_hdr *hdr) +{ +#if defined(CONFIG_NFS_V4_1) + struct nfs4_session *session = args->sa_session; + struct nfs4_slot_table *tp; + struct nfs4_slot *slot; + __be32 *p; + + if (!session) + return; + + tp = &session->fc_slot_table; + + WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); + slot = tp->slots + args->sa_slotid; + + RESERVE_SPACE(4); + WRITE32(OP_SEQUENCE); + + /* + * Sessionid + seqid + slotid + max slotid + cache_this + */ + dprintk("%s: sessionid=%u:%u:%u:%u seqid=%d slotid=%d " + "max_slotid=%d cache_this=%d\n", + __func__, + ((u32 *)session->sess_id.data)[0], + ((u32 *)session->sess_id.data)[1], + ((u32 *)session->sess_id.data)[2], + ((u32 *)session->sess_id.data)[3], + slot->seq_nr, args->sa_slotid, + tp->highest_used_slotid, args->sa_cache_this); + RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 16); + WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); + WRITE32(slot->seq_nr); + WRITE32(args->sa_slotid); + WRITE32(tp->highest_used_slotid); + WRITE32(args->sa_cache_this); + hdr->nops++; + hdr->replen += decode_sequence_maxsz; +#endif /* CONFIG_NFS_V4_1 */ +} + /* * END OF "GENERIC" ENCODE ROUTINES. */ +static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args) +{ +#if defined(CONFIG_NFS_V4_1) + if (args->sa_session) + return args->sa_session->clp->cl_minorversion; +#endif /* CONFIG_NFS_V4_1 */ + return 0; +} + /* * Encode an ACCESS request */ @@ -1351,11 +1701,12 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_access(&xdr, args->access, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1370,11 +1721,12 @@ static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->dir_fh, &hdr); encode_lookup(&xdr, args->name, &hdr); encode_getfh(&xdr, &hdr); @@ -1390,11 +1742,12 @@ static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struc { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putrootfh(&xdr, &hdr); encode_getfh(&xdr, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1409,11 +1762,12 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_remove(&xdr, &args->name, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1428,11 +1782,12 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->old_dir, &hdr); encode_savefh(&xdr, &hdr); encode_putfh(&xdr, args->new_dir, &hdr); @@ -1451,11 +1806,12 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_ { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_savefh(&xdr, &hdr); encode_putfh(&xdr, args->dir_fh, &hdr); @@ -1474,11 +1830,12 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->dir_fh, &hdr); encode_savefh(&xdr, &hdr); encode_create(&xdr, args, &hdr); @@ -1505,11 +1862,12 @@ static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nf { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); @@ -1523,11 +1881,12 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closea { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_close(&xdr, args, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1542,11 +1901,12 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openarg { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_savefh(&xdr, &hdr); encode_open(&xdr, args, &hdr); @@ -1569,7 +1929,7 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_open_confirm(&xdr, args, &hdr); encode_nops(&hdr); @@ -1583,11 +1943,12 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_ { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_open(&xdr, args, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1602,11 +1963,12 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct n { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_open_downgrade(&xdr, args, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1621,11 +1983,12 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_ar { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_lock(&xdr, args, &hdr); encode_nops(&hdr); @@ -1639,11 +2002,12 @@ static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_ { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_lockt(&xdr, args, &hdr); encode_nops(&hdr); @@ -1657,11 +2021,12 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_ { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_locku(&xdr, args, &hdr); encode_nops(&hdr); @@ -1675,22 +2040,16 @@ static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct n { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; - unsigned int replen; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_readlink(&xdr, args, req, &hdr); - /* set up reply kvec - * toplevel_status + taglen + rescount + OP_PUTFH + status - * + OP_READLINK + status + string length = 8 - */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_readlink_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, args->pgbase, args->pglen); encode_nops(&hdr); return 0; @@ -1703,25 +2062,19 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; - int replen; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_readdir(&xdr, args, req, &hdr); - /* set up reply kvec - * toplevel_status + taglen + rescount + OP_PUTFH + status - * + OP_READDIR + status + verifer(2) = 9 - */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_readdir_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, args->pgbase, args->count); dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", - __func__, replen, args->pages, + __func__, hdr.replen << 2, args->pages, args->pgbase, args->count); encode_nops(&hdr); return 0; @@ -1732,24 +2085,18 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf */ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - int replen; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_read(&xdr, args, &hdr); - /* set up reply kvec - * toplevel status + taglen=0 + rescount + OP_PUTFH + status - * + OP_READ + status + eof + datalen = 9 - */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, args->pgbase, args->count); req->rq_rcv_buf.flags |= XDRBUF_READ; encode_nops(&hdr); @@ -1763,11 +2110,12 @@ static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_seta { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_setattr(&xdr, args, args->server, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1783,20 +2131,19 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p, struct nfs_getaclargs *args) { struct xdr_stream xdr; - struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - int replen; + uint32_t replen; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); + replen = hdr.replen + nfs4_fattr_bitmap_maxsz + 1; encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr); - /* set up reply buffer: */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, + xdr_inline_pages(&req->rq_rcv_buf, replen << 2, args->acl_pages, args->acl_pgbase, args->acl_len); encode_nops(&hdr); return 0; @@ -1809,11 +2156,12 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_write(&xdr, args, &hdr); req->rq_snd_buf.flags |= XDRBUF_WRITE; @@ -1829,11 +2177,12 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_write { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_commit(&xdr, args, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1848,11 +2197,12 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsin { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_fsinfo(&xdr, args->bitmask, &hdr); encode_nops(&hdr); @@ -1866,11 +2216,12 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct n { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0], &hdr); @@ -1885,11 +2236,12 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0], args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr); @@ -1900,16 +2252,18 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs /* * GETATTR_BITMAP request */ -static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, const struct nfs_fh *fhandle) +static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, + struct nfs4_server_caps_arg *args) { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); - encode_putfh(&xdr, fhandle, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fhandle, &hdr); encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS| FATTR4_WORD0_LINK_SUPPORT| FATTR4_WORD0_SYMLINK_SUPPORT| @@ -1929,7 +2283,7 @@ static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); encode_renew(&xdr, clp, &hdr); encode_nops(&hdr); return 0; @@ -1946,7 +2300,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4 }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); encode_setclientid(&xdr, sc, &hdr); encode_nops(&hdr); return 0; @@ -1964,7 +2318,7 @@ static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); encode_setclientid_confirm(&xdr, clp, &hdr); encode_putrootfh(&xdr, &hdr); encode_fsinfo(&xdr, lease_bitmap, &hdr); @@ -1979,11 +2333,12 @@ static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struc { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fhandle, &hdr); encode_delegreturn(&xdr, args->stateid, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1998,28 +2353,119 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; - int replen; + uint32_t replen; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->dir_fh, &hdr); encode_lookup(&xdr, args->name, &hdr); + replen = hdr.replen; /* get the attribute into args->page */ encode_fs_locations(&xdr, args->bitmask, &hdr); - /* set up reply - * toplevel_status + OP_PUTFH + status - * + OP_LOOKUP + status + OP_GETATTR + status = 7 - */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page, + xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page, 0, PAGE_SIZE); encode_nops(&hdr); return 0; } +#if defined(CONFIG_NFS_V4_1) +/* + * EXCHANGE_ID request + */ +static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p, + struct nfs41_exchange_id_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = args->client->cl_minorversion, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_exchange_id(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a CREATE_SESSION request + */ +static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p, + struct nfs41_create_session_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = args->client->cl_minorversion, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_create_session(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a DESTROY_SESSION request + */ +static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p, + struct nfs4_session *session) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = session->clp->cl_minorversion, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_destroy_session(&xdr, session, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a SEQUENCE request + */ +static int nfs4_xdr_enc_sequence(struct rpc_rqst *req, uint32_t *p, + struct nfs4_sequence_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a GET_LEASE_TIME request + */ +static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p, + struct nfs4_get_lease_time_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->la_seq_args), + }; + const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->la_seq_args, &hdr); + encode_putrootfh(&xdr, &hdr); + encode_fsinfo(&xdr, lease_bitmap, &hdr); + encode_nops(&hdr); + return 0; +} +#endif /* CONFIG_NFS_V4_1 */ + /* * START OF "GENERIC" DECODE ROUTINES. * These may look a little ugly since they are imported from a "generic" @@ -3657,7 +4103,7 @@ decode_savefh(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_SAVEFH); } -static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res) +static int decode_setattr(struct xdr_stream *xdr) { __be32 *p; uint32_t bmlen; @@ -3735,6 +4181,169 @@ static int decode_delegreturn(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_DELEGRETURN); } +#if defined(CONFIG_NFS_V4_1) +static int decode_exchange_id(struct xdr_stream *xdr, + struct nfs41_exchange_id_res *res) +{ + __be32 *p; + uint32_t dummy; + int status; + struct nfs_client *clp = res->client; + + status = decode_op_hdr(xdr, OP_EXCHANGE_ID); + if (status) + return status; + + READ_BUF(8); + READ64(clp->cl_ex_clid); + READ_BUF(12); + READ32(clp->cl_seqid); + READ32(clp->cl_exchange_flags); + + /* We ask for SP4_NONE */ + READ32(dummy); + if (dummy != SP4_NONE) + return -EIO; + + /* Throw away minor_id */ + READ_BUF(8); + + /* Throw away Major id */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + + /* Throw away server_scope */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + + /* Throw away Implementation id array */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + + return 0; +} + +static int decode_chan_attrs(struct xdr_stream *xdr, + struct nfs4_channel_attrs *attrs) +{ + __be32 *p; + u32 nr_attrs; + + READ_BUF(28); + READ32(attrs->headerpadsz); + READ32(attrs->max_rqst_sz); + READ32(attrs->max_resp_sz); + READ32(attrs->max_resp_sz_cached); + READ32(attrs->max_ops); + READ32(attrs->max_reqs); + READ32(nr_attrs); + if (unlikely(nr_attrs > 1)) { + printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", + __func__, nr_attrs); + return -EINVAL; + } + if (nr_attrs == 1) + READ_BUF(4); /* skip rdma_attrs */ + return 0; +} + +static int decode_create_session(struct xdr_stream *xdr, + struct nfs41_create_session_res *res) +{ + __be32 *p; + int status; + struct nfs_client *clp = res->client; + struct nfs4_session *session = clp->cl_session; + + status = decode_op_hdr(xdr, OP_CREATE_SESSION); + + if (status) + return status; + + /* sessionid */ + READ_BUF(NFS4_MAX_SESSIONID_LEN); + COPYMEM(&session->sess_id, NFS4_MAX_SESSIONID_LEN); + + /* seqid, flags */ + READ_BUF(8); + READ32(clp->cl_seqid); + READ32(session->flags); + + /* Channel attributes */ + status = decode_chan_attrs(xdr, &session->fc_attrs); + if (!status) + status = decode_chan_attrs(xdr, &session->bc_attrs); + return status; +} + +static int decode_destroy_session(struct xdr_stream *xdr, void *dummy) +{ + return decode_op_hdr(xdr, OP_DESTROY_SESSION); +} +#endif /* CONFIG_NFS_V4_1 */ + +static int decode_sequence(struct xdr_stream *xdr, + struct nfs4_sequence_res *res, + struct rpc_rqst *rqstp) +{ +#if defined(CONFIG_NFS_V4_1) + struct nfs4_slot *slot; + struct nfs4_sessionid id; + u32 dummy; + int status; + __be32 *p; + + if (!res->sr_session) + return 0; + + status = decode_op_hdr(xdr, OP_SEQUENCE); + if (status) + goto out_err; + + /* + * If the server returns different values for sessionID, slotID or + * sequence number, the server is looney tunes. + */ + status = -ESERVERFAULT; + + slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid]; + READ_BUF(NFS4_MAX_SESSIONID_LEN + 20); + COPYMEM(id.data, NFS4_MAX_SESSIONID_LEN); + if (memcmp(id.data, res->sr_session->sess_id.data, + NFS4_MAX_SESSIONID_LEN)) { + dprintk("%s Invalid session id\n", __func__); + goto out_err; + } + /* seqid */ + READ32(dummy); + if (dummy != slot->seq_nr) { + dprintk("%s Invalid sequence number\n", __func__); + goto out_err; + } + /* slot id */ + READ32(dummy); + if (dummy != res->sr_slotid) { + dprintk("%s Invalid slot id\n", __func__); + goto out_err; + } + /* highest slot id - currently not processed */ + READ32(dummy); + /* target highest slot id - currently not processed */ + READ32(dummy); + /* result flags - currently not processed */ + READ32(dummy); + status = 0; +out_err: + res->sr_status = status; + return status; +#else /* CONFIG_NFS_V4_1 */ + return 0; +#endif /* CONFIG_NFS_V4_1 */ +} + /* * END OF "GENERIC" DECODE ROUTINES. */ @@ -3752,6 +4361,9 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -3773,7 +4385,11 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; status = decode_putfh(&xdr); if (status != 0) @@ -3796,7 +4412,11 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; @@ -3819,7 +4439,11 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; if ((status = decode_putrootfh(&xdr)) != 0) goto out; @@ -3839,7 +4463,11 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; @@ -3860,7 +4488,11 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; @@ -3890,7 +4522,11 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; @@ -3923,7 +4559,11 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; @@ -3963,6 +4603,9 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -3979,12 +4622,13 @@ nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); status = encode_setacl(&xdr, args, &hdr); encode_nops(&hdr); @@ -3995,7 +4639,8 @@ nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args * Decode SETACL response */ static int -nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, void *res) +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, + struct nfs_setaclres *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4005,10 +4650,13 @@ nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, void *res) status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; - status = decode_setattr(&xdr, res); + status = decode_setattr(&xdr); out: return status; } @@ -4017,7 +4665,8 @@ out: * Decode GETACL response */ static int -nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, size_t *acl_len) +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, + struct nfs_getaclres *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4027,10 +4676,13 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, size_t *acl_len) status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; - status = decode_getacl(&xdr, rqstp, acl_len); + status = decode_getacl(&xdr, rqstp, &res->acl_len); out: return status; @@ -4049,6 +4701,9 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4079,6 +4734,9 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4133,6 +4791,9 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nf status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4157,10 +4818,13 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; - status = decode_setattr(&xdr, res); + status = decode_setattr(&xdr); if (status) goto out; decode_getfattr(&xdr, res->fattr, res->server); @@ -4181,6 +4845,9 @@ static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock_ status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4202,6 +4869,9 @@ static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4223,6 +4893,9 @@ static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4234,7 +4907,8 @@ out: /* * Decode READLINK response */ -static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, void *res) +static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, + struct nfs4_readlink_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4244,6 +4918,9 @@ static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, void *res) status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4265,6 +4942,9 @@ static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_r status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4286,6 +4966,9 @@ static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, __be32 *p, struct nfs_readr status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4309,6 +4992,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writ status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4335,6 +5021,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_wri status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4349,7 +5038,8 @@ out: /* * FSINFO request */ -static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo) +static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, + struct nfs4_fsinfo_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4358,16 +5048,19 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinf xdr_init_decode(&xdr, &req->rq_rcv_buf, p); status = decode_compound_hdr(&xdr, &hdr); if (!status) + status = decode_sequence(&xdr, &res->seq_res, req); + if (!status) status = decode_putfh(&xdr); if (!status) - status = decode_fsinfo(&xdr, fsinfo); + status = decode_fsinfo(&xdr, res->fsinfo); return status; } /* * PATHCONF request */ -static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *pathconf) +static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, + struct nfs4_pathconf_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4376,16 +5069,19 @@ static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, struct nfs_pat xdr_init_decode(&xdr, &req->rq_rcv_buf, p); status = decode_compound_hdr(&xdr, &hdr); if (!status) + status = decode_sequence(&xdr, &res->seq_res, req); + if (!status) status = decode_putfh(&xdr); if (!status) - status = decode_pathconf(&xdr, pathconf); + status = decode_pathconf(&xdr, res->pathconf); return status; } /* * STATFS request */ -static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *fsstat) +static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, + struct nfs4_statfs_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4394,9 +5090,11 @@ static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, struct nfs_fssta xdr_init_decode(&xdr, &req->rq_rcv_buf, p); status = decode_compound_hdr(&xdr, &hdr); if (!status) + status = decode_sequence(&xdr, &res->seq_res, req); + if (!status) status = decode_putfh(&xdr); if (!status) - status = decode_statfs(&xdr, fsstat); + status = decode_statfs(&xdr, res->fsstat); return status; } @@ -4410,7 +5108,11 @@ static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4 int status; xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, req); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; @@ -4483,7 +5185,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); status = decode_compound_hdr(&xdr, &hdr); - if (status != 0) + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; status = decode_putfh(&xdr); if (status != 0) @@ -4497,7 +5202,8 @@ out: /* * FS_LOCATIONS request */ -static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations *res) +static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, + struct nfs4_fs_locations_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4505,18 +5211,113 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs xdr_init_decode(&xdr, &req->rq_rcv_buf, p); status = decode_compound_hdr(&xdr, &hdr); - if (status != 0) + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, req); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; if ((status = decode_lookup(&xdr)) != 0) goto out; xdr_enter_page(&xdr, PAGE_SIZE); - status = decode_getfattr(&xdr, &res->fattr, res->server); + status = decode_getfattr(&xdr, &res->fs_locations->fattr, + res->fs_locations->server); out: return status; } +#if defined(CONFIG_NFS_V4_1) +/* + * EXCHANGE_ID request + */ +static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p, + void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_exchange_id(&xdr, res); + return status; +} + +/* + * a CREATE_SESSION request + */ +static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p, + struct nfs41_create_session_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_create_session(&xdr, res); + return status; +} + +/* + * a DESTROY_SESSION request + */ +static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p, + void *dummy) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_destroy_session(&xdr, dummy); + return status; +} + +/* + * a SEQUENCE request + */ +static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p, + struct nfs4_sequence_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_sequence(&xdr, res, rqstp); + return status; +} + +/* + * a GET_LEASE_TIME request + */ +static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p, + struct nfs4_get_lease_time_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_sequence(&xdr, &res->lr_seq_res, rqstp); + if (!status) + status = decode_putrootfh(&xdr); + if (!status) + status = decode_fsinfo(&xdr, res->lr_fsinfo); + return status; +} +#endif /* CONFIG_NFS_V4_1 */ + __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) { uint32_t bitmap[2] = {0}; @@ -4686,6 +5487,13 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(GETACL, enc_getacl, dec_getacl), PROC(SETACL, enc_setacl, dec_setacl), PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), +#if defined(CONFIG_NFS_V4_1) + PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), + PROC(CREATE_SESSION, enc_create_session, dec_create_session), + PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), + PROC(SEQUENCE, enc_sequence, dec_sequence), + PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), +#endif /* CONFIG_NFS_V4_1 */ }; struct rpc_version nfs_version4 = { diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index e3ed5908820..8c55b27c0de 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -92,6 +92,9 @@ #undef NFSROOT_DEBUG #define NFSDBG_FACILITY NFSDBG_ROOT +/* Default port to use if server is not running a portmapper */ +#define NFS_MNT_PORT 627 + /* Default path we try to mount. "%s" gets replaced by our IP address */ #define NFS_ROOT "/tftpboot/%s" @@ -487,6 +490,7 @@ static int __init root_nfs_get_handle(void) { struct nfs_fh fh; struct sockaddr_in sin; + unsigned int auth_flav_len = 0; struct nfs_mount_request request = { .sap = (struct sockaddr *)&sin, .salen = sizeof(sin), @@ -496,6 +500,7 @@ static int __init root_nfs_get_handle(void) .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP, .fh = &fh, + .auth_flav_len = &auth_flav_len, }; int status; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 4ace3c50a8e..96c4ebfa46f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -22,6 +22,7 @@ #include <asm/system.h> +#include "nfs4_fs.h" #include "internal.h" #include "iostat.h" #include "fscache.h" @@ -46,6 +47,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); p->npages = pagecount; + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { @@ -357,19 +359,25 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data struct nfs_readres *resp = &data->res; if (resp->eof || resp->count == argp->count) - return; + goto out; /* This is a short read! */ nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count == 0) - return; + goto out; /* Yes, so retry the read at the end of the data */ argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; - rpc_restart_call(task); + nfs4_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); + return; +out: + nfs4_sequence_free_slot(NFS_SERVER(data->inode)->nfs_client, + &data->res.seq_res); + return; + } /* @@ -406,7 +414,23 @@ static void nfs_readpage_release_partial(void *calldata) nfs_readdata_release(calldata); } +#if defined(CONFIG_NFS_V4_1) +void nfs_read_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_read_data *data = calldata; + + if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client, + &data->args.seq_args, &data->res.seq_res, + 0, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + static const struct rpc_call_ops nfs_read_partial_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_read_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_readpage_result_partial, .rpc_release = nfs_readpage_release_partial, }; @@ -470,6 +494,9 @@ static void nfs_readpage_release_full(void *calldata) } static const struct rpc_call_ops nfs_read_full_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_read_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_readpage_result_full, .rpc_release = nfs_readpage_release_full, }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 26127b69a27..0b4cbdc60ab 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -42,6 +42,8 @@ #include <linux/smp_lock.h> #include <linux/seq_file.h> #include <linux/mount.h> +#include <linux/mnt_namespace.h> +#include <linux/namei.h> #include <linux/nfs_idmap.h> #include <linux/vfs.h> #include <linux/inet.h> @@ -90,6 +92,7 @@ enum { Opt_mountport, Opt_mountvers, Opt_nfsvers, + Opt_minorversion, /* Mount options that take string arguments */ Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, @@ -139,22 +142,23 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_fscache_uniq, "fsc=%s" }, { Opt_nofscache, "nofsc" }, - { Opt_port, "port=%u" }, - { Opt_rsize, "rsize=%u" }, - { Opt_wsize, "wsize=%u" }, - { Opt_bsize, "bsize=%u" }, - { Opt_timeo, "timeo=%u" }, - { Opt_retrans, "retrans=%u" }, - { Opt_acregmin, "acregmin=%u" }, - { Opt_acregmax, "acregmax=%u" }, - { Opt_acdirmin, "acdirmin=%u" }, - { Opt_acdirmax, "acdirmax=%u" }, - { Opt_actimeo, "actimeo=%u" }, - { Opt_namelen, "namlen=%u" }, - { Opt_mountport, "mountport=%u" }, - { Opt_mountvers, "mountvers=%u" }, - { Opt_nfsvers, "nfsvers=%u" }, - { Opt_nfsvers, "vers=%u" }, + { Opt_port, "port=%s" }, + { Opt_rsize, "rsize=%s" }, + { Opt_wsize, "wsize=%s" }, + { Opt_bsize, "bsize=%s" }, + { Opt_timeo, "timeo=%s" }, + { Opt_retrans, "retrans=%s" }, + { Opt_acregmin, "acregmin=%s" }, + { Opt_acregmax, "acregmax=%s" }, + { Opt_acdirmin, "acdirmin=%s" }, + { Opt_acdirmax, "acdirmax=%s" }, + { Opt_actimeo, "actimeo=%s" }, + { Opt_namelen, "namlen=%s" }, + { Opt_mountport, "mountport=%s" }, + { Opt_mountvers, "mountvers=%s" }, + { Opt_nfsvers, "nfsvers=%s" }, + { Opt_nfsvers, "vers=%s" }, + { Opt_minorversion, "minorversion=%u" }, { Opt_sec, "sec=%s" }, { Opt_proto, "proto=%s" }, @@ -270,10 +274,14 @@ static const struct super_operations nfs_sops = { #ifdef CONFIG_NFS_V4 static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_remote_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static void nfs4_kill_super(struct super_block *sb); static struct file_system_type nfs4_fs_type = { @@ -284,6 +292,14 @@ static struct file_system_type nfs4_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +static struct file_system_type nfs4_remote_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_remote_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + struct file_system_type nfs4_xdev_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -292,6 +308,14 @@ struct file_system_type nfs4_xdev_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +static struct file_system_type nfs4_remote_referral_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_remote_referral_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + struct file_system_type nfs4_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -514,7 +538,6 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, const char *nostr; } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, - { NFS_MOUNT_INTR, ",intr", ",nointr" }, { NFS_MOUNT_POSIX, ",posix", "" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, @@ -943,11 +966,6 @@ static int nfs_parse_security_flavors(char *value, return 1; } -static void nfs_parse_invalid_value(const char *option) -{ - dfprintk(MOUNT, "NFS: bad value specified for %s option\n", option); -} - /* * Error-check and convert a string of mount options from user space into * a data structure. The whole mount string is processed; bad options are @@ -958,7 +976,7 @@ static int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) { char *p, *string, *secdata; - int rc, sloppy = 0, errors = 0; + int rc, sloppy = 0, invalid_option = 0; if (!raw) { dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); @@ -982,7 +1000,9 @@ static int nfs_parse_mount_options(char *raw, while ((p = strsep(&raw, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; - int option, token; + unsigned long option; + int int_option; + int token; if (!*p) continue; @@ -1091,114 +1111,156 @@ static int nfs_parse_mount_options(char *raw, * options that take numeric values */ case Opt_port: - if (match_int(args, &option) || - option < 0 || option > USHORT_MAX) { - errors++; - nfs_parse_invalid_value("port"); - } else - mnt->nfs_server.port = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option > USHORT_MAX) + goto out_invalid_value; + mnt->nfs_server.port = option; break; case Opt_rsize: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("rsize"); - } else - mnt->rsize = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->rsize = option; break; case Opt_wsize: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("wsize"); - } else - mnt->wsize = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->wsize = option; break; case Opt_bsize: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("bsize"); - } else - mnt->bsize = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->bsize = option; break; case Opt_timeo: - if (match_int(args, &option) || option <= 0) { - errors++; - nfs_parse_invalid_value("timeo"); - } else - mnt->timeo = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option == 0) + goto out_invalid_value; + mnt->timeo = option; break; case Opt_retrans: - if (match_int(args, &option) || option <= 0) { - errors++; - nfs_parse_invalid_value("retrans"); - } else - mnt->retrans = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option == 0) + goto out_invalid_value; + mnt->retrans = option; break; case Opt_acregmin: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("acregmin"); - } else - mnt->acregmin = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acregmin = option; break; case Opt_acregmax: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("acregmax"); - } else - mnt->acregmax = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acregmax = option; break; case Opt_acdirmin: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("acdirmin"); - } else - mnt->acdirmin = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acdirmin = option; break; case Opt_acdirmax: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("acdirmax"); - } else - mnt->acdirmax = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acdirmax = option; break; case Opt_actimeo: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("actimeo"); - } else - mnt->acregmin = mnt->acregmax = - mnt->acdirmin = mnt->acdirmax = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acregmin = mnt->acregmax = + mnt->acdirmin = mnt->acdirmax = option; break; case Opt_namelen: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("namlen"); - } else - mnt->namlen = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->namlen = option; break; case Opt_mountport: - if (match_int(args, &option) || - option < 0 || option > USHORT_MAX) { - errors++; - nfs_parse_invalid_value("mountport"); - } else - mnt->mount_server.port = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option > USHORT_MAX) + goto out_invalid_value; + mnt->mount_server.port = option; break; case Opt_mountvers: - if (match_int(args, &option) || + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option < NFS_MNT_VERSION || - option > NFS_MNT3_VERSION) { - errors++; - nfs_parse_invalid_value("mountvers"); - } else - mnt->mount_server.version = option; + option > NFS_MNT3_VERSION) + goto out_invalid_value; + mnt->mount_server.version = option; break; case Opt_nfsvers: - if (match_int(args, &option)) { - errors++; - nfs_parse_invalid_value("nfsvers"); - break; - } + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; switch (option) { case NFS2_VERSION: mnt->flags &= ~NFS_MOUNT_VER3; @@ -1207,10 +1269,16 @@ static int nfs_parse_mount_options(char *raw, mnt->flags |= NFS_MOUNT_VER3; break; default: - errors++; - nfs_parse_invalid_value("nfsvers"); + goto out_invalid_value; } break; + case Opt_minorversion: + if (match_int(args, &int_option)) + return 0; + if (int_option < 0 || int_option > NFS4_MAX_MINOR_VERSION) + return 0; + mnt->minorversion = int_option; + break; /* * options that take text values @@ -1222,9 +1290,9 @@ static int nfs_parse_mount_options(char *raw, rc = nfs_parse_security_flavors(string, mnt); kfree(string); if (!rc) { - errors++; dfprintk(MOUNT, "NFS: unrecognized " "security flavor\n"); + return 0; } break; case Opt_proto: @@ -1238,23 +1306,25 @@ static int nfs_parse_mount_options(char *raw, case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; + kfree(string); break; case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + kfree(string); break; case Opt_xprt_rdma: /* vector side protocols to TCP */ mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; xprt_load_transport(string); + kfree(string); break; default: - errors++; dfprintk(MOUNT, "NFS: unrecognized " "transport protocol\n"); + return 0; } - kfree(string); break; case Opt_mountproto: string = match_strdup(args); @@ -1273,9 +1343,9 @@ static int nfs_parse_mount_options(char *raw, break; case Opt_xprt_rdma: /* not used for side protocols */ default: - errors++; dfprintk(MOUNT, "NFS: unrecognized " "transport protocol\n"); + return 0; } break; case Opt_addr: @@ -1331,9 +1401,9 @@ static int nfs_parse_mount_options(char *raw, mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; break; default: - errors++; dfprintk(MOUNT, "NFS: invalid " "lookupcache argument\n"); + return 0; }; break; @@ -1351,20 +1421,20 @@ static int nfs_parse_mount_options(char *raw, break; default: - errors++; + invalid_option = 1; dfprintk(MOUNT, "NFS: unrecognized mount option " "'%s'\n", p); } } - if (errors > 0) { - dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n", - errors, (errors == 1 ? "" : "s")); - if (!sloppy) - return 0; - } + if (!sloppy && invalid_option) + return 0; + return 1; +out_invalid_value: + printk(KERN_INFO "NFS: bad mount option value specified: %s \n", p); + return 0; out_nomem: printk(KERN_INFO "NFS: not enough memory to parse option\n"); return 0; @@ -1381,6 +1451,7 @@ out_security_failure: static int nfs_try_mount(struct nfs_parsed_mount_data *args, struct nfs_fh *root_fh) { + unsigned int auth_flavor_len = 0; struct nfs_mount_request request = { .sap = (struct sockaddr *) &args->mount_server.address, @@ -1388,6 +1459,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, .protocol = args->mount_server.protocol, .fh = root_fh, .noresvport = args->flags & NFS_MOUNT_NORESVPORT, + .auth_flav_len = &auth_flavor_len, }; int status; @@ -2240,6 +2312,11 @@ static void nfs4_fill_super(struct super_block *sb) nfs_initialise_sb(sb); } +static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) +{ + args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); +} + /* * Validate NFSv4 mount options */ @@ -2263,6 +2340,7 @@ static int nfs4_validate_mount_data(void *options, args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ args->auth_flavors[0] = RPC_AUTH_UNIX; args->auth_flavor_len = 0; + args->minorversion = 0; switch (data->version) { case 1: @@ -2336,6 +2414,8 @@ static int nfs4_validate_mount_data(void *options, nfs_validate_transport_protocol(args); + nfs4_validate_mount_flags(args); + if (args->auth_flavor_len > 1) goto out_inval_auth; @@ -2375,12 +2455,12 @@ out_no_client_address: } /* - * Get the superblock for an NFS4 mountpoint + * Get the superblock for the NFS4 root partition */ -static int nfs4_get_sb(struct file_system_type *fs_type, +static int nfs4_remote_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - struct nfs_parsed_mount_data *data; + struct nfs_parsed_mount_data *data = raw_data; struct super_block *s; struct nfs_server *server; struct nfs_fh *mntfh; @@ -2391,18 +2471,12 @@ static int nfs4_get_sb(struct file_system_type *fs_type, }; int error = -ENOMEM; - data = kzalloc(sizeof(*data), GFP_KERNEL); mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); if (data == NULL || mntfh == NULL) goto out_free_fh; security_init_mnt_opts(&data->lsm_opts); - /* Validate the mount data */ - error = nfs4_validate_mount_data(raw_data, data, dev_name); - if (error < 0) - goto out; - /* Get a volume representation */ server = nfs4_create_server(data, mntfh); if (IS_ERR(server)) { @@ -2415,7 +2489,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, compare_super = NULL; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); + s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_free; @@ -2452,14 +2526,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data->client_address); - kfree(data->nfs_server.export_path); - kfree(data->nfs_server.hostname); - kfree(data->fscache_uniq); security_free_mnt_opts(&data->lsm_opts); out_free_fh: kfree(mntfh); - kfree(data); return error; out_free: @@ -2473,16 +2542,137 @@ error_splat_super: goto out; } +static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, + int flags, void *data, const char *hostname) +{ + struct vfsmount *root_mnt; + char *root_devname; + size_t len; + + len = strlen(hostname) + 3; + root_devname = kmalloc(len, GFP_KERNEL); + if (root_devname == NULL) + return ERR_PTR(-ENOMEM); + snprintf(root_devname, len, "%s:/", hostname); + root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); + kfree(root_devname); + return root_mnt; +} + +static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt) +{ + char *page = (char *) __get_free_page(GFP_KERNEL); + char *devname, *tmp; + + if (page == NULL) + return; + devname = nfs_path(path->mnt->mnt_devname, + path->mnt->mnt_root, path->dentry, + page, PAGE_SIZE); + if (devname == NULL) + goto out_freepage; + tmp = kstrdup(devname, GFP_KERNEL); + if (tmp == NULL) + goto out_freepage; + kfree(mnt->mnt_devname); + mnt->mnt_devname = tmp; +out_freepage: + free_page((unsigned long)page); +} + +static int nfs_follow_remote_path(struct vfsmount *root_mnt, + const char *export_path, struct vfsmount *mnt_target) +{ + struct mnt_namespace *ns_private; + struct nameidata nd; + struct super_block *s; + int ret; + + ns_private = create_mnt_ns(root_mnt); + ret = PTR_ERR(ns_private); + if (IS_ERR(ns_private)) + goto out_mntput; + + ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, + export_path, LOOKUP_FOLLOW, &nd); + + put_mnt_ns(ns_private); + + if (ret != 0) + goto out_err; + + s = nd.path.mnt->mnt_sb; + atomic_inc(&s->s_active); + mnt_target->mnt_sb = s; + mnt_target->mnt_root = dget(nd.path.dentry); + + /* Correct the device pathname */ + nfs_fix_devname(&nd.path, mnt_target); + + path_put(&nd.path); + down_write(&s->s_umount); + return 0; +out_mntput: + mntput(root_mnt); +out_err: + return ret; +} + +/* + * Get the superblock for an NFS4 mountpoint + */ +static int nfs4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +{ + struct nfs_parsed_mount_data *data; + char *export_path; + struct vfsmount *root_mnt; + int error = -ENOMEM; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + goto out_free_data; + + /* Validate the mount data */ + error = nfs4_validate_mount_data(raw_data, data, dev_name); + if (error < 0) + goto out; + + export_path = data->nfs_server.export_path; + data->nfs_server.export_path = "/"; + root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data, + data->nfs_server.hostname); + data->nfs_server.export_path = export_path; + + error = PTR_ERR(root_mnt); + if (IS_ERR(root_mnt)) + goto out; + + error = nfs_follow_remote_path(root_mnt, export_path, mnt); + +out: + kfree(data->client_address); + kfree(data->nfs_server.export_path); + kfree(data->nfs_server.hostname); + kfree(data->fscache_uniq); +out_free_data: + kfree(data); + dprintk("<-- nfs4_get_sb() = %d%s\n", error, + error != 0 ? " [error]" : ""); + return error; +} + static void nfs4_kill_super(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); + dprintk("--> %s\n", __func__); nfs_super_return_all_delegations(sb); kill_anon_super(sb); - nfs4_renewd_prepare_shutdown(server); nfs_fscache_release_super_cookie(sb); nfs_free_server(server); + dprintk("<-- %s\n", __func__); } /* @@ -2568,12 +2758,9 @@ error_splat_super: return error; } -/* - * Create an NFS4 server record on referral traversal - */ -static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data, - struct vfsmount *mnt) +static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, + struct vfsmount *mnt) { struct nfs_clone_mount *data = raw_data; struct super_block *s; @@ -2652,4 +2839,36 @@ error_splat_super: return error; } +/* + * Create an NFS4 server record on referral traversal + */ +static int nfs4_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, + struct vfsmount *mnt) +{ + struct nfs_clone_mount *data = raw_data; + char *export_path; + struct vfsmount *root_mnt; + int error; + + dprintk("--> nfs4_referral_get_sb()\n"); + + export_path = data->mnt_path; + data->mnt_path = "/"; + + root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, + flags, data, data->hostname); + data->mnt_path = export_path; + + error = PTR_ERR(root_mnt); + if (IS_ERR(root_mnt)) + goto out; + + error = nfs_follow_remote_path(root_mnt, export_path, mnt); +out: + dprintk("<-- nfs4_referral_get_sb() = %d%s\n", error, + error != 0 ? " [error]" : ""); + return error; +} + #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index ecc29534777..1064c91ae81 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -15,6 +15,7 @@ #include <linux/wait.h> #include "internal.h" +#include "nfs4_fs.h" struct nfs_unlinkdata { struct hlist_node list; @@ -82,7 +83,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) struct inode *dir = data->dir; if (!NFS_PROTO(dir)->unlink_done(task, dir)) - rpc_restart_call(task); + nfs4_restart_rpc(task, NFS_SERVER(dir)->nfs_client); } /** @@ -102,9 +103,25 @@ static void nfs_async_unlink_release(void *calldata) nfs_sb_deactive(sb); } +#if defined(CONFIG_NFS_V4_1) +void nfs_unlink_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_unlinkdata *data = calldata; + struct nfs_server *server = NFS_SERVER(data->dir); + + if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + static const struct rpc_call_ops nfs_unlink_ops = { .rpc_call_done = nfs_async_unlink_done, .rpc_release = nfs_async_unlink_release, +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_unlink_prepare, +#endif /* CONFIG_NFS_V4_1 */ }; static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) @@ -241,6 +258,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) status = PTR_ERR(data->cred); goto out_free; } + data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; status = -EBUSY; spin_lock(&dentry->d_lock); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e560a78995a..ce728829f79 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -25,6 +25,7 @@ #include "delegation.h" #include "internal.h" #include "iostat.h" +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -52,6 +53,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void) if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; } return p; } @@ -71,6 +73,7 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); p->npages = pagecount; + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { @@ -1048,7 +1051,23 @@ out: nfs_writedata_release(calldata); } +#if defined(CONFIG_NFS_V4_1) +void nfs_write_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client; + + if (nfs4_setup_sequence(clp, &data->args.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + static const struct rpc_call_ops nfs_write_partial_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_writeback_done_partial, .rpc_release = nfs_writeback_release_partial, }; @@ -1111,6 +1130,9 @@ remove_request: } static const struct rpc_call_ops nfs_write_full_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_writeback_done_full, .rpc_release = nfs_writeback_release_full, }; @@ -1123,6 +1145,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; + struct nfs_server *server = NFS_SERVER(data->inode); int status; dprintk("NFS: %5u nfs_writeback_done (status %d)\n", @@ -1155,7 +1178,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) if (time_before(complain, jiffies)) { dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", - NFS_SERVER(data->inode)->nfs_client->cl_hostname, + server->nfs_client->cl_hostname, resp->verf->committed, argp->stable); complain = jiffies + 300 * HZ; } @@ -1181,7 +1204,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) */ argp->stable = NFS_FILE_SYNC; } - rpc_restart_call(task); + nfs4_restart_rpc(task, server->nfs_client); return -EAGAIN; } if (time_before(complain, jiffies)) { @@ -1193,6 +1216,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) /* Can't do anything about it except throw an error. */ task->tk_status = -EIO; } + nfs4_sequence_free_slot(server->nfs_client, &data->res.seq_res); return 0; } @@ -1349,6 +1373,9 @@ static void nfs_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_commit_done, .rpc_release = nfs_commit_release, }; diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8b1f8efb469..b92a27629fb 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -464,16 +464,11 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) if (err) return err; /* - * Just a quick sanity check; we could also try to check - * whether this pseudoflavor is supported, but at worst - * an unsupported pseudoflavor on the export would just - * be a pseudoflavor that won't match the flavor of any - * authenticated request. The administrator will - * probably discover the problem when someone fails to - * authenticate. + * XXX: It would be nice to also check whether this + * pseudoflavor is supported, so we can discover the + * problem at export time instead of when a client fails + * to authenticate. */ - if (f->pseudoflavor < 0) - return -EINVAL; err = get_int(mesg, &f->flags); if (err) return err; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 7c9fe838f03..a713c418a92 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -652,8 +652,6 @@ nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp, * NFSv3 Server procedures. * Only the results of non-idempotent operations are cached. */ -#define nfs3svc_decode_voidargs NULL -#define nfs3svc_release_void NULL #define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle #define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat #define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat @@ -686,28 +684,219 @@ struct nfsd3_voidargs { int dummy; }; #define WC (7+pAT) /* WCC attributes */ static struct svc_procedure nfsd_procedures3[22] = { - PROC(null, void, void, void, RC_NOCACHE, ST), - PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), - PROC(setattr, sattr, wccstat, fhandle, RC_REPLBUFF, ST+WC), - PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT), - PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1), - PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4), - PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE/4), - PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4), - PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), - PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), - PROC(symlink, symlink, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), - PROC(mknod, mknod, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), - PROC(remove, dirop, wccstat, fhandle, RC_REPLBUFF, ST+WC), - PROC(rmdir, dirop, wccstat, fhandle, RC_REPLBUFF, ST+WC), - PROC(rename, rename, rename, fhandle2, RC_REPLBUFF, ST+WC+WC), - PROC(link, link, link, fhandle2, RC_REPLBUFF, ST+pAT+WC), - PROC(readdir, readdir, readdir, fhandle, RC_NOCACHE, 0), - PROC(readdirplus,readdirplus, readdir, fhandle, RC_NOCACHE, 0), - PROC(fsstat, fhandle, fsstat, void, RC_NOCACHE, ST+pAT+2*6+1), - PROC(fsinfo, fhandle, fsinfo, void, RC_NOCACHE, ST+pAT+12), - PROC(pathconf, fhandle, pathconf, void, RC_NOCACHE, ST+pAT+6), - PROC(commit, commit, commit, fhandle, RC_NOCACHE, ST+WC+2), + [NFS3PROC_NULL] = { + .pc_func = (svc_procfunc) nfsd3_proc_null, + .pc_encode = (kxdrproc_t) nfs3svc_encode_voidres, + .pc_argsize = sizeof(struct nfsd3_voidargs), + .pc_ressize = sizeof(struct nfsd3_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, + [NFS3PROC_GETATTR] = { + .pc_func = (svc_procfunc) nfsd3_proc_getattr, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_attrstatres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT, + }, + [NFS3PROC_SETATTR] = { + .pc_func = (svc_procfunc) nfsd3_proc_setattr, + .pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_sattrargs), + .pc_ressize = sizeof(struct nfsd3_wccstatres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC, + }, + [NFS3PROC_LOOKUP] = { + .pc_func = (svc_procfunc) nfsd3_proc_lookup, + .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_diropres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_ressize = sizeof(struct nfsd3_diropres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+FH+pAT+pAT, + }, + [NFS3PROC_ACCESS] = { + .pc_func = (svc_procfunc) nfsd3_proc_access, + .pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_accessres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_accessargs), + .pc_ressize = sizeof(struct nfsd3_accessres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+1, + }, + [NFS3PROC_READLINK] = { + .pc_func = (svc_procfunc) nfsd3_proc_readlink, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readlinkargs), + .pc_ressize = sizeof(struct nfsd3_readlinkres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4, + }, + [NFS3PROC_READ] = { + .pc_func = (svc_procfunc) nfsd3_proc_read, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readargs), + .pc_ressize = sizeof(struct nfsd3_readres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4, + }, + [NFS3PROC_WRITE] = { + .pc_func = (svc_procfunc) nfsd3_proc_write, + .pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_writeres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_writeargs), + .pc_ressize = sizeof(struct nfsd3_writeres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC+4, + }, + [NFS3PROC_CREATE] = { + .pc_func = (svc_procfunc) nfsd3_proc_create, + .pc_decode = (kxdrproc_t) nfs3svc_decode_createargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_createargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_MKDIR] = { + .pc_func = (svc_procfunc) nfsd3_proc_mkdir, + .pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_mkdirargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_SYMLINK] = { + .pc_func = (svc_procfunc) nfsd3_proc_symlink, + .pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_symlinkargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_MKNOD] = { + .pc_func = (svc_procfunc) nfsd3_proc_mknod, + .pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_mknodargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_REMOVE] = { + .pc_func = (svc_procfunc) nfsd3_proc_remove, + .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_ressize = sizeof(struct nfsd3_wccstatres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC, + }, + [NFS3PROC_RMDIR] = { + .pc_func = (svc_procfunc) nfsd3_proc_rmdir, + .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_ressize = sizeof(struct nfsd3_wccstatres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC, + }, + [NFS3PROC_RENAME] = { + .pc_func = (svc_procfunc) nfsd3_proc_rename, + .pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_renameres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_renameargs), + .pc_ressize = sizeof(struct nfsd3_renameres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC+WC, + }, + [NFS3PROC_LINK] = { + .pc_func = (svc_procfunc) nfsd3_proc_link, + .pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_linkres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_linkargs), + .pc_ressize = sizeof(struct nfsd3_linkres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+pAT+WC, + }, + [NFS3PROC_READDIR] = { + .pc_func = (svc_procfunc) nfsd3_proc_readdir, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readdirargs), + .pc_ressize = sizeof(struct nfsd3_readdirres), + .pc_cachetype = RC_NOCACHE, + }, + [NFS3PROC_READDIRPLUS] = { + .pc_func = (svc_procfunc) nfsd3_proc_readdirplus, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readdirplusargs), + .pc_ressize = sizeof(struct nfsd3_readdirres), + .pc_cachetype = RC_NOCACHE, + }, + [NFS3PROC_FSSTAT] = { + .pc_func = (svc_procfunc) nfsd3_proc_fsstat, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_fsstatres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+2*6+1, + }, + [NFS3PROC_FSINFO] = { + .pc_func = (svc_procfunc) nfsd3_proc_fsinfo, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_fsinfores), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+12, + }, + [NFS3PROC_PATHCONF] = { + .pc_func = (svc_procfunc) nfsd3_proc_pathconf, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_pathconfres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+6, + }, + [NFS3PROC_COMMIT] = { + .pc_func = (svc_procfunc) nfsd3_proc_commit, + .pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_commitres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_commitargs), + .pc_ressize = sizeof(struct nfsd3_commitres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+WC+2, + }, }; struct svc_version nfsd_version3 = { diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 17d0dd99720..01d4ec1c88e 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -272,6 +272,7 @@ void fill_post_wcc(struct svc_fh *fhp) err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, &fhp->fh_post_attr); + fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version; if (err) fhp->fh_post_saved = 0; else diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 290289bd44f..3fd23f7acec 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -140,8 +140,10 @@ struct nfs4_cb_compound_hdr { int status; u32 ident; u32 nops; + __be32 *nops_p; + u32 minorversion; u32 taglen; - char * tag; + char *tag; }; static struct { @@ -201,33 +203,39 @@ nfs_cb_stat_to_errno(int stat) * XDR encode */ -static int +static void encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) { __be32 * p; RESERVE_SPACE(16); WRITE32(0); /* tag length is always 0 */ - WRITE32(NFS4_MINOR_VERSION); + WRITE32(hdr->minorversion); WRITE32(hdr->ident); + hdr->nops_p = p; WRITE32(hdr->nops); - return 0; } -static int -encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) +static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr) +{ + *hdr->nops_p = htonl(hdr->nops); +} + +static void +encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp, + struct nfs4_cb_compound_hdr *hdr) { __be32 *p; - int len = cb_rec->cbr_fh.fh_size; + int len = dp->dl_fh.fh_size; - RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); + RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len); WRITE32(OP_CB_RECALL); - WRITE32(cb_rec->cbr_stateid.si_generation); - WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); - WRITE32(cb_rec->cbr_trunc); + WRITE32(dp->dl_stateid.si_generation); + WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t)); + WRITE32(0); /* truncate optimization not implemented */ WRITE32(len); - WRITEMEM(&cb_rec->cbr_fh.fh_base, len); - return 0; + WRITEMEM(&dp->dl_fh.fh_base, len); + hdr->nops++; } static int @@ -241,17 +249,18 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) } static int -nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_cb_recall *args) +nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args) { struct xdr_stream xdr; struct nfs4_cb_compound_hdr hdr = { - .ident = args->cbr_ident, - .nops = 1, + .ident = args->dl_ident, }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_cb_compound_hdr(&xdr, &hdr); - return (encode_cb_recall(&xdr, args)); + encode_cb_recall(&xdr, args, &hdr); + encode_cb_nops(&hdr); + return 0; } @@ -358,18 +367,21 @@ static struct rpc_program cb_program = { .pipe_dir_name = "/nfsd4_cb", }; +static int max_cb_time(void) +{ + return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ; +} + /* Reference counting, callback cleanup, etc., all look racy as heck. * And why is cb_set an atomic? */ -static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp) +int setup_callback_client(struct nfs4_client *clp) { struct sockaddr_in addr; - struct nfs4_callback *cb = &clp->cl_callback; + struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_timeout timeparms = { - .to_initval = (NFSD_LEASE_TIME/4) * HZ, - .to_retries = 5, - .to_maxval = (NFSD_LEASE_TIME/2) * HZ, - .to_exponential = 1, + .to_initval = max_cb_time(), + .to_retries = 0, }; struct rpc_create_args args = { .protocol = IPPROTO_TCP, @@ -386,7 +398,7 @@ static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp) struct rpc_clnt *client; if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) - return ERR_PTR(-EINVAL); + return -EINVAL; /* Initialize address */ memset(&addr, 0, sizeof(addr)); @@ -396,48 +408,77 @@ static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp) /* Create RPC client */ client = rpc_create(&args); - if (IS_ERR(client)) + if (IS_ERR(client)) { dprintk("NFSD: couldn't create callback client: %ld\n", PTR_ERR(client)); - return client; + return PTR_ERR(client); + } + cb->cb_client = client; + return 0; + +} + +static void warn_no_callback_path(struct nfs4_client *clp, int reason) +{ + dprintk("NFSD: warning: no callback path to client %.*s: error %d\n", + (int)clp->cl_name.len, clp->cl_name.data, reason); +} + +static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_client *clp = calldata; + + if (task->tk_status) + warn_no_callback_path(clp, task->tk_status); + else + atomic_set(&clp->cl_cb_conn.cb_set, 1); + put_nfs4_client(clp); +} + +static const struct rpc_call_ops nfsd4_cb_probe_ops = { + .rpc_call_done = nfsd4_cb_probe_done, +}; +static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb) +{ + struct auth_cred acred = { + .machine_cred = 1 + }; + + /* + * Note in the gss case this doesn't actually have to wait for a + * gss upcall (or any calls to the client); this just creates a + * non-uptodate cred which the rpc state machine will fill in with + * a refresh_upcall later. + */ + return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred, + RPCAUTH_LOOKUP_NEW); } -static int do_probe_callback(void *data) +void do_probe_callback(struct nfs4_client *clp) { - struct nfs4_client *clp = data; - struct nfs4_callback *cb = &clp->cl_callback; + struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], .rpc_argp = clp, }; - struct rpc_clnt *client; + struct rpc_cred *cred; int status; - client = setup_callback_client(clp); - if (IS_ERR(client)) { - status = PTR_ERR(client); - dprintk("NFSD: couldn't create callback client: %d\n", - status); - goto out_err; + cred = lookup_cb_cred(cb); + if (IS_ERR(cred)) { + status = PTR_ERR(cred); + goto out; + } + cb->cb_cred = cred; + msg.rpc_cred = cb->cb_cred; + status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT, + &nfsd4_cb_probe_ops, (void *)clp); +out: + if (status) { + warn_no_callback_path(clp, status); + put_nfs4_client(clp); } - - status = rpc_call_sync(client, &msg, RPC_TASK_SOFT); - - if (status) - goto out_release_client; - - cb->cb_client = client; - atomic_set(&cb->cb_set, 1); - put_nfs4_client(clp); - return 0; -out_release_client: - rpc_shutdown_client(client); -out_err: - dprintk("NFSD: warning: no callback path to client %.*s: error %d\n", - (int)clp->cl_name.len, clp->cl_name.data, status); - put_nfs4_client(clp); - return 0; } /* @@ -446,21 +487,65 @@ out_err: void nfsd4_probe_callback(struct nfs4_client *clp) { - struct task_struct *t; + int status; - BUG_ON(atomic_read(&clp->cl_callback.cb_set)); + BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set)); + + status = setup_callback_client(clp); + if (status) { + warn_no_callback_path(clp, status); + return; + } /* the task holds a reference to the nfs4_client struct */ atomic_inc(&clp->cl_count); - t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe"); + do_probe_callback(clp); +} - if (IS_ERR(t)) - atomic_dec(&clp->cl_count); +static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_delegation *dp = calldata; + struct nfs4_client *clp = dp->dl_client; - return; + switch (task->tk_status) { + case -EIO: + /* Network partition? */ + atomic_set(&clp->cl_cb_conn.cb_set, 0); + warn_no_callback_path(clp, task->tk_status); + case -EBADHANDLE: + case -NFS4ERR_BAD_STATEID: + /* Race: client probably got cb_recall + * before open reply granting delegation */ + break; + default: + /* success, or error we can't handle */ + return; + } + if (dp->dl_retries--) { + rpc_delay(task, 2*HZ); + task->tk_status = 0; + rpc_restart_call(task); + } else { + atomic_set(&clp->cl_cb_conn.cb_set, 0); + warn_no_callback_path(clp, task->tk_status); + } +} + +static void nfsd4_cb_recall_release(void *calldata) +{ + struct nfs4_delegation *dp = calldata; + struct nfs4_client *clp = dp->dl_client; + + nfs4_put_delegation(dp); + put_nfs4_client(clp); } +static const struct rpc_call_ops nfsd4_cb_recall_ops = { + .rpc_call_done = nfsd4_cb_recall_done, + .rpc_release = nfsd4_cb_recall_release, +}; + /* * called with dp->dl_count inc'ed. */ @@ -468,41 +553,19 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_client; - struct rpc_clnt *clnt = clp->cl_callback.cb_client; - struct nfs4_cb_recall *cbr = &dp->dl_recall; + struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], - .rpc_argp = cbr, + .rpc_argp = dp, + .rpc_cred = clp->cl_cb_conn.cb_cred }; - int retries = 1; - int status = 0; - - cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ - cbr->cbr_dp = dp; - - status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); - while (retries--) { - switch (status) { - case -EIO: - /* Network partition? */ - atomic_set(&clp->cl_callback.cb_set, 0); - case -EBADHANDLE: - case -NFS4ERR_BAD_STATEID: - /* Race: client probably got cb_recall - * before open reply granting delegation */ - break; - default: - goto out_put_cred; - } - ssleep(2); - status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); + int status; + + dp->dl_retries = 1; + status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, + &nfsd4_cb_recall_ops, dp); + if (status) { + put_nfs4_client(clp); + nfs4_put_delegation(dp); } -out_put_cred: - /* - * Success or failure, now we're either waiting for lease expiration - * or deleg_return. - */ - put_nfs4_client(clp); - nfs4_put_delegation(dp); - return; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index b2883e9c638..7c8801769a3 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -51,6 +51,78 @@ #define NFSDDBG_FACILITY NFSDDBG_PROC +static u32 nfsd_attrmask[] = { + NFSD_WRITEABLE_ATTRS_WORD0, + NFSD_WRITEABLE_ATTRS_WORD1, + NFSD_WRITEABLE_ATTRS_WORD2 +}; + +static u32 nfsd41_ex_attrmask[] = { + NFSD_SUPPATTR_EXCLCREAT_WORD0, + NFSD_SUPPATTR_EXCLCREAT_WORD1, + NFSD_SUPPATTR_EXCLCREAT_WORD2 +}; + +static __be32 +check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + u32 *bmval, u32 *writable) +{ + struct dentry *dentry = cstate->current_fh.fh_dentry; + struct svc_export *exp = cstate->current_fh.fh_export; + + /* + * Check about attributes are supported by the NFSv4 server or not. + * According to spec, unsupported attributes return ERR_ATTRNOTSUPP. + */ + if ((bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) || + (bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) || + (bmval[2] & ~nfsd_suppattrs2(cstate->minorversion))) + return nfserr_attrnotsupp; + + /* + * Check FATTR4_WORD0_ACL & FATTR4_WORD0_FS_LOCATIONS can be supported + * in current environment or not. + */ + if (bmval[0] & FATTR4_WORD0_ACL) { + if (!IS_POSIXACL(dentry->d_inode)) + return nfserr_attrnotsupp; + } + if (bmval[0] & FATTR4_WORD0_FS_LOCATIONS) { + if (exp->ex_fslocs.locations == NULL) + return nfserr_attrnotsupp; + } + + /* + * According to spec, read-only attributes return ERR_INVAL. + */ + if (writable) { + if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) || + (bmval[2] & ~writable[2])) + return nfserr_inval; + } + + return nfs_ok; +} + +static __be32 +nfsd4_check_open_attributes(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, struct nfsd4_open *open) +{ + __be32 status = nfs_ok; + + if (open->op_create == NFS4_OPEN_CREATE) { + if (open->op_createmode == NFS4_CREATE_UNCHECKED + || open->op_createmode == NFS4_CREATE_GUARDED) + status = check_attr_support(rqstp, cstate, + open->op_bmval, nfsd_attrmask); + else if (open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1) + status = check_attr_support(rqstp, cstate, + open->op_bmval, nfsd41_ex_attrmask); + } + + return status; +} + static inline void fh_dup2(struct svc_fh *dst, struct svc_fh *src) { @@ -225,6 +297,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; + status = nfsd4_check_open_attributes(rqstp, cstate, open); + if (status) + goto out; + /* Openowner is now set, so sequence id will get bumped. Now we need * these checks before we do any creates: */ status = nfserr_grace; @@ -395,6 +471,11 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; + status = check_attr_support(rqstp, cstate, create->cr_bmval, + nfsd_attrmask); + if (status) + return status; + switch (create->cr_type) { case NF4LNK: /* ugh! we have to null-terminate the linktext, or @@ -689,6 +770,12 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; status = nfs_ok; + + status = check_attr_support(rqstp, cstate, setattr->sa_bmval, + nfsd_attrmask); + if (status) + goto out; + if (setattr->sa_acl != NULL) status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, setattr->sa_acl); @@ -763,10 +850,10 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; - if ((verify->ve_bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) - || (verify->ve_bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) - || (verify->ve_bmval[2] & ~nfsd_suppattrs2(cstate->minorversion))) - return nfserr_attrnotsupp; + status = check_attr_support(rqstp, cstate, verify->ve_bmval, NULL); + if (status) + return status; + if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) return nfserr_inval; @@ -1226,24 +1313,9 @@ static const char *nfsd4_op_name(unsigned opnum) return "unknown_operation"; } -#define nfs4svc_decode_voidargs NULL -#define nfs4svc_release_void NULL #define nfsd4_voidres nfsd4_voidargs -#define nfs4svc_release_compound NULL struct nfsd4_voidargs { int dummy; }; -#define PROC(name, argt, rest, relt, cache, respsize) \ - { (svc_procfunc) nfsd4_proc_##name, \ - (kxdrproc_t) nfs4svc_decode_##argt##args, \ - (kxdrproc_t) nfs4svc_encode_##rest##res, \ - (kxdrproc_t) nfs4svc_release_##relt, \ - sizeof(struct nfsd4_##argt##args), \ - sizeof(struct nfsd4_##rest##res), \ - 0, \ - cache, \ - respsize, \ - } - /* * TODO: At the present time, the NFSv4 server does not do XID caching * of requests. Implementing XID caching would not be a serious problem, @@ -1255,8 +1327,23 @@ struct nfsd4_voidargs { int dummy; }; * better XID's. */ static struct svc_procedure nfsd_procedures4[2] = { - PROC(null, void, void, void, RC_NOCACHE, 1), - PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE/4) + [NFSPROC4_NULL] = { + .pc_func = (svc_procfunc) nfsd4_proc_null, + .pc_encode = (kxdrproc_t) nfs4svc_encode_voidres, + .pc_argsize = sizeof(struct nfsd4_voidargs), + .pc_ressize = sizeof(struct nfsd4_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = 1, + }, + [NFSPROC4_COMPOUND] = { + .pc_func = (svc_procfunc) nfsd4_proc_compound, + .pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs, + .pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres, + .pc_argsize = sizeof(struct nfsd4_compoundargs), + .pc_ressize = sizeof(struct nfsd4_compoundres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = NFSD_BUFSIZE/4, + }, }; struct svc_version nfsd_version4 = { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3b711f5147a..980a216a48c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -182,7 +182,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f { struct nfs4_delegation *dp; struct nfs4_file *fp = stp->st_file; - struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; + struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn; dprintk("NFSD alloc_init_deleg\n"); if (fp->fi_had_conflict) @@ -203,10 +203,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f get_file(stp->st_vfs_file); dp->dl_vfs_file = stp->st_vfs_file; dp->dl_type = type; - dp->dl_recall.cbr_dp = NULL; - dp->dl_recall.cbr_ident = cb->cb_ident; - dp->dl_recall.cbr_trunc = 0; - dp->dl_stateid.si_boot = boot_time; + dp->dl_ident = cb->cb_ident; + dp->dl_stateid.si_boot = get_seconds(); dp->dl_stateid.si_stateownerid = current_delegid++; dp->dl_stateid.si_fileid = 0; dp->dl_stateid.si_generation = 0; @@ -427,6 +425,11 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) { int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; + if (fchan->maxreqs < 1) + return nfserr_inval; + else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) + fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; + spin_lock(&nfsd_serv->sv_lock); if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; @@ -446,8 +449,8 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) * fchan holds the client values on input, and the server values on output */ static int init_forechannel_attrs(struct svc_rqst *rqstp, - struct nfsd4_session *session, - struct nfsd4_channel_attrs *fchan) + struct nfsd4_channel_attrs *session_fchan, + struct nfsd4_channel_attrs *fchan) { int status = 0; __u32 maxcount = svc_max_payload(rqstp); @@ -457,21 +460,21 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp, /* Use the client's max request and max response size if possible */ if (fchan->maxreq_sz > maxcount) fchan->maxreq_sz = maxcount; - session->se_fmaxreq_sz = fchan->maxreq_sz; + session_fchan->maxreq_sz = fchan->maxreq_sz; if (fchan->maxresp_sz > maxcount) fchan->maxresp_sz = maxcount; - session->se_fmaxresp_sz = fchan->maxresp_sz; + session_fchan->maxresp_sz = fchan->maxresp_sz; /* Set the max response cached size our default which is * a multiple of PAGE_SIZE and small */ - session->se_fmaxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; - fchan->maxresp_cached = session->se_fmaxresp_cached; + session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; + fchan->maxresp_cached = session_fchan->maxresp_cached; /* Use the client's maxops if possible */ if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; - session->se_fmaxops = fchan->maxops; + session_fchan->maxops = fchan->maxops; /* try to use the client requested number of slots */ if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) @@ -483,7 +486,7 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp, */ status = set_forechannel_maxreqs(fchan); - session->se_fnumslots = fchan->maxreqs; + session_fchan->maxreqs = fchan->maxreqs; return status; } @@ -497,12 +500,14 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, memset(&tmp, 0, sizeof(tmp)); /* FIXME: For now, we just accept the client back channel attributes. */ - status = init_forechannel_attrs(rqstp, &tmp, &cses->fore_channel); + tmp.se_bchannel = cses->back_channel; + status = init_forechannel_attrs(rqstp, &tmp.se_fchannel, + &cses->fore_channel); if (status) goto out; /* allocate struct nfsd4_session and slot table in one piece */ - slotsize = tmp.se_fnumslots * sizeof(struct nfsd4_slot); + slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot); new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); if (!new) goto out; @@ -576,7 +581,7 @@ free_session(struct kref *kref) int i; ses = container_of(kref, struct nfsd4_session, se_ref); - for (i = 0; i < ses->se_fnumslots; i++) { + for (i = 0; i < ses->se_fchannel.maxreqs; i++) { struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; nfsd4_release_respages(e->ce_respages, e->ce_resused); } @@ -632,16 +637,20 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static void shutdown_callback_client(struct nfs4_client *clp) { - struct rpc_clnt *clnt = clp->cl_callback.cb_client; + struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; if (clnt) { /* * Callback threads take a reference on the client, so there * should be no outstanding callbacks at this point. */ - clp->cl_callback.cb_client = NULL; + clp->cl_cb_conn.cb_client = NULL; rpc_shutdown_client(clnt); } + if (clp->cl_cb_conn.cb_cred) { + put_rpccred(clp->cl_cb_conn.cb_cred); + clp->cl_cb_conn.cb_cred = NULL; + } } static inline void @@ -714,7 +723,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir) return NULL; memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); atomic_set(&clp->cl_count, 1); - atomic_set(&clp->cl_callback.cb_set, 0); + atomic_set(&clp->cl_cb_conn.cb_set, 0); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_strhash); INIT_LIST_HEAD(&clp->cl_openowners); @@ -966,7 +975,7 @@ parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigne static void gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) { - struct nfs4_callback *cb = &clp->cl_callback; + struct nfs4_cb_conn *cb = &clp->cl_cb_conn; /* Currently, we only support tcp for the callback channel */ if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) @@ -975,6 +984,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, &cb->cb_addr, &cb->cb_port))) goto out_err; + cb->cb_minorversion = 0; cb->cb_prog = se->se_callback_prog; cb->cb_ident = se->se_callback_ident; return; @@ -1128,7 +1138,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, * is sent (lease renewal). */ if (seq && nfsd4_not_cached(resp)) { - seq->maxslots = resp->cstate.session->se_fnumslots; + seq->maxslots = resp->cstate.session->se_fchannel.maxreqs; return nfs_ok; } @@ -1238,12 +1248,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, expire_client(conf); goto out_new; } - if (ip_addr != conf->cl_addr && - !(exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A)) { - /* Client collision. 18.35.4 case 3 */ - status = nfserr_clid_inuse; - goto out; - } /* * Set bit when the owner id and verifier map to an already * confirmed client id (18.35.3). @@ -1257,12 +1261,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, copy_verf(conf, &verf); new = conf; goto out_copy; - } else { - /* 18.35.4 case 7 */ - if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { - status = nfserr_noent; - goto out; - } + } + + /* 18.35.4 case 7 */ + if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { + status = nfserr_noent; + goto out; } unconf = find_unconfirmed_client_by_str(dname, strhashval, true); @@ -1471,7 +1475,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, goto out; status = nfserr_badslot; - if (seq->slotid >= session->se_fnumslots) + if (seq->slotid >= session->se_fchannel.maxreqs) goto out; slot = &session->se_slots[seq->slotid]; @@ -1686,9 +1690,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, else { /* XXX: We just turn off callbacks until we can handle * change request correctly. */ - atomic_set(&conf->cl_callback.cb_set, 0); - gen_confirm(conf); - nfsd4_remove_clid_dir(unconf); + atomic_set(&conf->cl_cb_conn.cb_set, 0); expire_client(unconf); status = nfs_ok; @@ -1882,7 +1884,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open * stp->st_stateowner = sop; get_nfs4_file(fp); stp->st_file = fp; - stp->st_stateid.si_boot = boot_time; + stp->st_stateid.si_boot = get_seconds(); stp->st_stateid.si_stateownerid = sop->so_id; stp->st_stateid.si_fileid = fp->fi_id; stp->st_stateid.si_generation = 0; @@ -2059,19 +2061,6 @@ nfs4_file_downgrade(struct file *filp, unsigned int share_access) } /* - * Recall a delegation - */ -static int -do_recall(void *__dp) -{ - struct nfs4_delegation *dp = __dp; - - dp->dl_file->fi_had_conflict = true; - nfsd4_cb_recall(dp); - return 0; -} - -/* * Spawn a thread to perform a recall on the delegation represented * by the lease (file_lock) * @@ -2082,8 +2071,7 @@ do_recall(void *__dp) static void nfsd_break_deleg_cb(struct file_lock *fl) { - struct nfs4_delegation *dp= (struct nfs4_delegation *)fl->fl_owner; - struct task_struct *t; + struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl); if (!dp) @@ -2111,16 +2099,8 @@ void nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall"); - if (IS_ERR(t)) { - struct nfs4_client *clp = dp->dl_client; - - printk(KERN_INFO "NFSD: Callback thread failed for " - "for client (clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); - put_nfs4_client(dp->dl_client); - nfs4_put_delegation(dp); - } + dp->dl_file->fi_had_conflict = true; + nfsd4_cb_recall(dp); } /* @@ -2422,7 +2402,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta { struct nfs4_delegation *dp; struct nfs4_stateowner *sop = stp->st_stateowner; - struct nfs4_callback *cb = &sop->so_client->cl_callback; + struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn; struct file_lock fl, *flp = &fl; int status, flag = 0; @@ -2614,7 +2594,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, renew_client(clp); status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) - && !atomic_read(&clp->cl_callback.cb_set)) + && !atomic_read(&clp->cl_cb_conn.cb_set)) goto out; status = nfs_ok; out: @@ -2738,12 +2718,42 @@ nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp) static int STALE_STATEID(stateid_t *stateid) { - if (stateid->si_boot == boot_time) - return 0; - dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n", - stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid, - stateid->si_generation); - return 1; + if (time_after((unsigned long)boot_time, + (unsigned long)stateid->si_boot)) { + dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n", + stateid->si_boot, stateid->si_stateownerid, + stateid->si_fileid, stateid->si_generation); + return 1; + } + return 0; +} + +static int +EXPIRED_STATEID(stateid_t *stateid) +{ + if (time_before((unsigned long)boot_time, + ((unsigned long)stateid->si_boot)) && + time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) { + dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n", + stateid->si_boot, stateid->si_stateownerid, + stateid->si_fileid, stateid->si_generation); + return 1; + } + return 0; +} + +static __be32 +stateid_error_map(stateid_t *stateid) +{ + if (STALE_STATEID(stateid)) + return nfserr_stale_stateid; + if (EXPIRED_STATEID(stateid)) + return nfserr_expired; + + dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n", + stateid->si_boot, stateid->si_stateownerid, + stateid->si_fileid, stateid->si_generation); + return nfserr_bad_stateid; } static inline int @@ -2867,8 +2877,10 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, status = nfserr_bad_stateid; if (is_delegation_stateid(stateid)) { dp = find_delegation_stateid(ino, stateid); - if (!dp) + if (!dp) { + status = stateid_error_map(stateid); goto out; + } status = check_stateid_generation(stateid, &dp->dl_stateid, flags); if (status) @@ -2881,8 +2893,10 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, *filpp = dp->dl_vfs_file; } else { /* open or lock stateid */ stp = find_stateid(stateid, flags); - if (!stp) + if (!stp) { + status = stateid_error_map(stateid); goto out; + } if (nfs4_check_fh(current_fh, stp)) goto out; if (!stp->st_stateowner->so_confirmed) @@ -2956,7 +2970,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, */ sop = search_close_lru(stateid->si_stateownerid, flags); if (sop == NULL) - return nfserr_bad_stateid; + return stateid_error_map(stateid); *sopp = sop; goto check_replay; } @@ -3227,8 +3241,10 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!is_delegation_stateid(stateid)) goto out; dp = find_delegation_stateid(inode, stateid); - if (!dp) + if (!dp) { + status = stateid_error_map(stateid); goto out; + } status = check_stateid_generation(stateid, &dp->dl_stateid, flags); if (status) goto out; @@ -3455,7 +3471,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc stp->st_stateowner = sop; get_nfs4_file(fp); stp->st_file = fp; - stp->st_stateid.si_boot = boot_time; + stp->st_stateid.si_boot = get_seconds(); stp->st_stateid.si_stateownerid = sop->so_id; stp->st_stateid.si_fileid = fp->fi_id; stp->st_stateid.si_generation = 0; @@ -3987,6 +4003,7 @@ nfs4_state_init(void) INIT_LIST_HEAD(&conf_str_hashtbl[i]); INIT_LIST_HEAD(&unconf_str_hashtbl[i]); INIT_LIST_HEAD(&unconf_id_hashtbl[i]); + INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); } for (i = 0; i < SESSION_HASH_SIZE; i++) INIT_LIST_HEAD(&sessionid_hashtbl[i]); @@ -4009,8 +4026,6 @@ nfs4_state_init(void) INIT_LIST_HEAD(&close_lru); INIT_LIST_HEAD(&client_lru); INIT_LIST_HEAD(&del_recall_lru); - for (i = 0; i < CLIENT_HASH_SIZE; i++) - INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); reclaim_str_hashtbl_size = 0; return 0; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b73549d293b..2dcc7feaa6f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -83,16 +83,6 @@ check_filename(char *str, int len, __be32 err) return 0; } -/* - * START OF "GENERIC" DECODE ROUTINES. - * These may look a little ugly since they are imported from a "generic" - * set of XDR encode/decode routines which are intended to be shared by - * all of our NFSv4 implementations (OpenBSD, MacOS X...). - * - * If the pain of reading these is too great, it should be a straightforward - * task to translate them into Linux-specific versions which are more - * consistent with the style used in NFSv2/v3... - */ #define DECODE_HEAD \ __be32 *p; \ __be32 status @@ -254,20 +244,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) DECODE_TAIL; } -static u32 nfsd_attrmask[] = { - NFSD_WRITEABLE_ATTRS_WORD0, - NFSD_WRITEABLE_ATTRS_WORD1, - NFSD_WRITEABLE_ATTRS_WORD2 -}; - -static u32 nfsd41_ex_attrmask[] = { - NFSD_SUPPATTR_EXCLCREAT_WORD0, - NFSD_SUPPATTR_EXCLCREAT_WORD1, - NFSD_SUPPATTR_EXCLCREAT_WORD2 -}; - static __be32 -nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable, +nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, struct nfs4_acl **acl) { int expected_len, len = 0; @@ -280,18 +258,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable, if ((status = nfsd4_decode_bitmap(argp, bmval))) return status; - /* - * According to spec, unsupported attributes return ERR_ATTRNOTSUPP; - * read-only attributes return ERR_INVAL. - */ - if ((bmval[0] & ~nfsd_suppattrs0(argp->minorversion)) || - (bmval[1] & ~nfsd_suppattrs1(argp->minorversion)) || - (bmval[2] & ~nfsd_suppattrs2(argp->minorversion))) - return nfserr_attrnotsupp; - if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) || - (bmval[2] & ~writable[2])) - return nfserr_inval; - READ_BUF(4); READ32(expected_len); @@ -424,8 +390,11 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable, goto xdr_error; } } - BUG_ON(bmval[2]); /* no such writeable attr supported yet */ - if (len != expected_len) + if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 + || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 + || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) + READ_BUF(expected_len - len); + else if (len != expected_len) goto xdr_error; DECODE_TAIL; @@ -518,8 +487,8 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) return status; - status = nfsd4_decode_fattr(argp, create->cr_bmval, nfsd_attrmask, - &create->cr_iattr, &create->cr_acl); + status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, + &create->cr_acl); if (status) goto out; @@ -682,7 +651,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: status = nfsd4_decode_fattr(argp, open->op_bmval, - nfsd_attrmask, &open->op_iattr, &open->op_acl); + &open->op_iattr, &open->op_acl); if (status) goto out; break; @@ -696,8 +665,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) READ_BUF(8); COPYMEM(open->op_verf.data, 8); status = nfsd4_decode_fattr(argp, open->op_bmval, - nfsd41_ex_attrmask, &open->op_iattr, - &open->op_acl); + &open->op_iattr, &open->op_acl); if (status) goto out; break; @@ -893,8 +861,8 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); if (status) return status; - return nfsd4_decode_fattr(argp, setattr->sa_bmval, nfsd_attrmask, - &setattr->sa_iattr, &setattr->sa_acl); + return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, + &setattr->sa_acl); } static __be32 @@ -1328,64 +1296,64 @@ static nfsd4_dec nfsd4_dec_ops[] = { }; static nfsd4_dec nfsd41_dec_ops[] = { - [OP_ACCESS] (nfsd4_dec)nfsd4_decode_access, - [OP_CLOSE] (nfsd4_dec)nfsd4_decode_close, - [OP_COMMIT] (nfsd4_dec)nfsd4_decode_commit, - [OP_CREATE] (nfsd4_dec)nfsd4_decode_create, - [OP_DELEGPURGE] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DELEGRETURN] (nfsd4_dec)nfsd4_decode_delegreturn, - [OP_GETATTR] (nfsd4_dec)nfsd4_decode_getattr, - [OP_GETFH] (nfsd4_dec)nfsd4_decode_noop, - [OP_LINK] (nfsd4_dec)nfsd4_decode_link, - [OP_LOCK] (nfsd4_dec)nfsd4_decode_lock, - [OP_LOCKT] (nfsd4_dec)nfsd4_decode_lockt, - [OP_LOCKU] (nfsd4_dec)nfsd4_decode_locku, - [OP_LOOKUP] (nfsd4_dec)nfsd4_decode_lookup, - [OP_LOOKUPP] (nfsd4_dec)nfsd4_decode_noop, - [OP_NVERIFY] (nfsd4_dec)nfsd4_decode_verify, - [OP_OPEN] (nfsd4_dec)nfsd4_decode_open, - [OP_OPENATTR] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_CONFIRM] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_DOWNGRADE] (nfsd4_dec)nfsd4_decode_open_downgrade, - [OP_PUTFH] (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_PUTROOTFH] (nfsd4_dec)nfsd4_decode_noop, - [OP_READ] (nfsd4_dec)nfsd4_decode_read, - [OP_READDIR] (nfsd4_dec)nfsd4_decode_readdir, - [OP_READLINK] (nfsd4_dec)nfsd4_decode_noop, - [OP_REMOVE] (nfsd4_dec)nfsd4_decode_remove, - [OP_RENAME] (nfsd4_dec)nfsd4_decode_rename, - [OP_RENEW] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_RESTOREFH] (nfsd4_dec)nfsd4_decode_noop, - [OP_SAVEFH] (nfsd4_dec)nfsd4_decode_noop, - [OP_SECINFO] (nfsd4_dec)nfsd4_decode_secinfo, - [OP_SETATTR] (nfsd4_dec)nfsd4_decode_setattr, - [OP_SETCLIENTID] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_SETCLIENTID_CONFIRM](nfsd4_dec)nfsd4_decode_notsupp, - [OP_VERIFY] (nfsd4_dec)nfsd4_decode_verify, - [OP_WRITE] (nfsd4_dec)nfsd4_decode_write, - [OP_RELEASE_LOCKOWNER] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, + [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, + [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, + [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, + [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, + [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, + [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, + [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, + [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, + [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, + [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, + [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, + [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, + [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, + [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, + [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_READ] = (nfsd4_dec)nfsd4_decode_read, + [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, + [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, + [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, + [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, + [OP_RENEW] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, + [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, + [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SETCLIENTID_CONFIRM]= (nfsd4_dec)nfsd4_decode_notsupp, + [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_notsupp, /* new operations for NFSv4.1 */ - [OP_BACKCHANNEL_CTL] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_BIND_CONN_TO_SESSION](nfsd4_dec)nfsd4_decode_notsupp, - [OP_EXCHANGE_ID] (nfsd4_dec)nfsd4_decode_exchange_id, - [OP_CREATE_SESSION] (nfsd4_dec)nfsd4_decode_create_session, - [OP_DESTROY_SESSION] (nfsd4_dec)nfsd4_decode_destroy_session, - [OP_FREE_STATEID] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_GET_DIR_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_GETDEVICEINFO] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_GETDEVICELIST] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTGET] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTRETURN] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_SECINFO_NO_NAME] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_SEQUENCE] (nfsd4_dec)nfsd4_decode_sequence, - [OP_SET_SSV] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_TEST_STATEID] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_WANT_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DESTROY_CLIENTID] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_RECLAIM_COMPLETE] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_notsupp, + [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, + [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, + [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, + [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, + [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_notsupp, }; struct nfsd4_minorversion_ops { @@ -1489,21 +1457,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_TAIL; } -/* - * END OF "GENERIC" DECODE ROUTINES. - */ - -/* - * START OF "GENERIC" ENCODE ROUTINES. - * These may look a little ugly since they are imported from a "generic" - * set of XDR encode/decode routines which are intended to be shared by - * all of our NFSv4 implementations (OpenBSD, MacOS X...). - * - * If the pain of reading these is too great, it should be a straightforward - * task to translate them into Linux-specific versions which are more - * consistent with the style used in NFSv2/v3... - */ -#define ENCODE_HEAD __be32 *p #define WRITE32(n) *p++ = htonl(n) #define WRITE64(n) do { \ @@ -1515,13 +1468,41 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) memcpy(p, ptr, nbytes); \ p += XDR_QUADLEN(nbytes); \ }} while (0) -#define WRITECINFO(c) do { \ - *p++ = htonl(c.atomic); \ - *p++ = htonl(c.before_ctime_sec); \ - *p++ = htonl(c.before_ctime_nsec); \ - *p++ = htonl(c.after_ctime_sec); \ - *p++ = htonl(c.after_ctime_nsec); \ -} while (0) + +static void write32(__be32 **p, u32 n) +{ + *(*p)++ = n; +} + +static void write64(__be32 **p, u64 n) +{ + write32(p, (u32)(n >> 32)); + write32(p, (u32)n); +} + +static void write_change(__be32 **p, struct kstat *stat, struct inode *inode) +{ + if (IS_I_VERSION(inode)) { + write64(p, inode->i_version); + } else { + write32(p, stat->ctime.tv_sec); + write32(p, stat->ctime.tv_nsec); + } +} + +static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) +{ + write32(p, c->atomic); + if (c->change_supported) { + write64(p, c->before_change); + write64(p, c->after_change); + } else { + write32(p, c->before_ctime_sec); + write32(p, c->before_ctime_nsec); + write32(p, c->after_ctime_sec); + write32(p, c->after_ctime_nsec); + } +} #define RESERVE_SPACE(nbytes) do { \ p = resp->p; \ @@ -1874,16 +1855,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); } if (bmval0 & FATTR4_WORD0_CHANGE) { - /* - * Note: This _must_ be consistent with the scheme for writing - * change_info, so any changes made here must be reflected there - * as well. (See xdr4.h:set_change_info() and the WRITECINFO() - * macro above.) - */ if ((buflen -= 8) < 0) goto out_resource; - WRITE32(stat.ctime.tv_sec); - WRITE32(stat.ctime.tv_nsec); + write_change(&p, &stat, dentry->d_inode); } if (bmval0 & FATTR4_WORD0_SIZE) { if ((buflen -= 8) < 0) @@ -2348,7 +2322,7 @@ fail: static void nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) { - ENCODE_HEAD; + __be32 *p; RESERVE_SPACE(sizeof(stateid_t)); WRITE32(sid->si_generation); @@ -2359,7 +2333,7 @@ nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) static __be32 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(8); @@ -2386,7 +2360,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(8); @@ -2399,11 +2373,11 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(32); - WRITECINFO(create->cr_cinfo); + write_cinfo(&p, &create->cr_cinfo); WRITE32(2); WRITE32(create->cr_bmval[0]); WRITE32(create->cr_bmval[1]); @@ -2435,7 +2409,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh { struct svc_fh *fhp = *fhpp; unsigned int len; - ENCODE_HEAD; + __be32 *p; if (!nfserr) { len = fhp->fh_handle.fh_size; @@ -2454,7 +2428,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh static void nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) { - ENCODE_HEAD; + __be32 *p; RESERVE_SPACE(32 + XDR_LEN(ld->ld_sop ? ld->ld_sop->so_owner.len : 0)); WRITE64(ld->ld_start); @@ -2510,11 +2484,11 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(20); - WRITECINFO(link->li_cinfo); + write_cinfo(&p, &link->li_cinfo); ADJUST_ARGS(); } return nfserr; @@ -2524,7 +2498,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { - ENCODE_HEAD; + __be32 *p; ENCODE_SEQID_OP_HEAD; if (nfserr) @@ -2532,7 +2506,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op nfsd4_encode_stateid(resp, &open->op_stateid); RESERVE_SPACE(40); - WRITECINFO(open->op_cinfo); + write_cinfo(&p, &open->op_cinfo); WRITE32(open->op_rflags); WRITE32(2); WRITE32(open->op_bmval[0]); @@ -2619,7 +2593,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, int v, pn; unsigned long maxcount; long len; - ENCODE_HEAD; + __be32 *p; if (nfserr) return nfserr; @@ -2681,7 +2655,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd { int maxcount; char *page; - ENCODE_HEAD; + __be32 *p; if (nfserr) return nfserr; @@ -2730,7 +2704,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 int maxcount; loff_t offset; __be32 *page, *savep, *tailbase; - ENCODE_HEAD; + __be32 *p; if (nfserr) return nfserr; @@ -2806,11 +2780,11 @@ err_no_verf: static __be32 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(20); - WRITECINFO(remove->rm_cinfo); + write_cinfo(&p, &remove->rm_cinfo); ADJUST_ARGS(); } return nfserr; @@ -2819,12 +2793,12 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(40); - WRITECINFO(rename->rn_sinfo); - WRITECINFO(rename->rn_tinfo); + write_cinfo(&p, &rename->rn_sinfo); + write_cinfo(&p, &rename->rn_tinfo); ADJUST_ARGS(); } return nfserr; @@ -2839,7 +2813,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, u32 nflavs; struct exp_flavor_info *flavs; struct exp_flavor_info def_flavs[2]; - ENCODE_HEAD; + __be32 *p; if (nfserr) goto out; @@ -2904,7 +2878,7 @@ out: static __be32 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) { - ENCODE_HEAD; + __be32 *p; RESERVE_SPACE(12); if (nfserr) { @@ -2924,7 +2898,7 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 static __be32 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(8 + sizeof(nfs4_verifier)); @@ -2944,7 +2918,7 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n static __be32 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(16); @@ -2960,7 +2934,7 @@ static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_exchange_id *exid) { - ENCODE_HEAD; + __be32 *p; char *major_id; char *server_scope; int major_id_sz; @@ -3015,7 +2989,7 @@ static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_create_session *sess) { - ENCODE_HEAD; + __be32 *p; if (nfserr) return nfserr; @@ -3071,7 +3045,7 @@ __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_sequence *seq) { - ENCODE_HEAD; + __be32 *p; if (nfserr) return nfserr; @@ -3209,7 +3183,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp) dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, length, xb->page_len, tlen, pad); - if (length <= session->se_fmaxresp_cached) + if (length <= session->se_fchannel.maxresp_cached) return status; else return nfserr_rep_too_big_to_cache; @@ -3219,7 +3193,7 @@ void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { __be32 *statp; - ENCODE_HEAD; + __be32 *p; RESERVE_SPACE(8); WRITE32(op->opnum); @@ -3253,7 +3227,7 @@ status: void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { - ENCODE_HEAD; + __be32 *p; struct nfs4_replay *rp = op->replay; BUG_ON(!rp); @@ -3268,10 +3242,6 @@ nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) ADJUST_ARGS(); } -/* - * END OF "GENERIC" ENCODE ROUTINES. - */ - int nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) { diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 5bfc2ac60d5..4638635c5d8 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -29,15 +29,24 @@ */ #define CACHESIZE 1024 #define HASHSIZE 64 -#define REQHASH(xid) (((((__force __u32)xid) >> 24) ^ ((__force __u32)xid)) & (HASHSIZE-1)) -static struct hlist_head * hash_list; +static struct hlist_head * cache_hash; static struct list_head lru_head; static int cache_disabled = 1; +/* + * Calculate the hash index from an XID. + */ +static inline u32 request_hash(u32 xid) +{ + u32 h = xid; + h ^= (xid >> 24); + return h & (HASHSIZE-1); +} + static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); -/* +/* * locking for the reply cache: * A cache entry is "single use" if c_state == RC_INPROG * Otherwise, it when accessing _prev or _next, the lock must be held. @@ -62,8 +71,8 @@ int nfsd_reply_cache_init(void) i--; } - hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); - if (!hash_list) + cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); + if (!cache_hash) goto out_nomem; cache_disabled = 0; @@ -88,8 +97,8 @@ void nfsd_reply_cache_shutdown(void) cache_disabled = 1; - kfree (hash_list); - hash_list = NULL; + kfree (cache_hash); + cache_hash = NULL; } /* @@ -108,7 +117,7 @@ static void hash_refile(struct svc_cacherep *rp) { hlist_del_init(&rp->c_hash); - hlist_add_head(&rp->c_hash, hash_list + REQHASH(rp->c_xid)); + hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid)); } /* @@ -138,7 +147,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type) spin_lock(&cache_lock); rtn = RC_DOIT; - rh = &hash_list[REQHASH(xid)]; + rh = &cache_hash[request_hash(xid)]; hlist_for_each_entry(rp, hn, rh, c_hash) { if (rp->c_state != RC_UNUSED && xid == rp->c_xid && proc == rp->c_proc && @@ -165,8 +174,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type) } } - /* This should not happen */ - if (rp == NULL) { + /* All entries on the LRU are in-progress. This should not happen */ + if (&rp->c_lru == &lru_head) { static int complaints; printk(KERN_WARNING "nfsd: all repcache entries locked!\n"); @@ -264,7 +273,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; - + /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { rp->c_state = RC_UNUSED; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index af16849d243..1250fb978ac 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -207,10 +207,14 @@ static struct file_operations pool_stats_operations = { static ssize_t write_svc(struct file *file, char *buf, size_t size) { struct nfsctl_svc *data; + int err; if (size < sizeof(*data)) return -EINVAL; data = (struct nfsctl_svc*) buf; - return nfsd_svc(data->svc_port, data->svc_nthreads); + err = nfsd_svc(data->svc_port, data->svc_nthreads); + if (err < 0) + return err; + return 0; } /** @@ -692,11 +696,12 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) if (newthreads < 0) return -EINVAL; rv = nfsd_svc(NFS_PORT, newthreads); - if (rv) + if (rv < 0) return rv; - } - sprintf(buf, "%d\n", nfsd_nrthreads()); - return strlen(buf); + } else + rv = nfsd_nrthreads(); + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv); } /** @@ -793,7 +798,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) { char *mesg = buf; char *vers, *minorp, sign; - int len, num; + int len, num, remaining; unsigned minor; ssize_t tlen = 0; char *sep; @@ -840,32 +845,50 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) } next: vers += len + 1; - tlen += len; } while ((len = qword_get(&mesg, vers, size)) > 0); /* If all get turned off, turn them back on, as * having no versions is BAD */ nfsd_reset_versions(); } + /* Now write current state into reply buffer */ len = 0; sep = ""; + remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) if (nfsd_vers(num, NFSD_AVAIL)) { - len += sprintf(buf+len, "%s%c%d", sep, + len = snprintf(buf, remaining, "%s%c%d", sep, nfsd_vers(num, NFSD_TEST)?'+':'-', num); sep = " "; + + if (len > remaining) + break; + remaining -= len; + buf += len; + tlen += len; } if (nfsd_vers(4, NFSD_AVAIL)) - for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; minor++) - len += sprintf(buf+len, " %c4.%u", + for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; + minor++) { + len = snprintf(buf, remaining, " %c4.%u", (nfsd_vers(4, NFSD_TEST) && nfsd_minorversion(minor, NFSD_TEST)) ? '+' : '-', minor); - len += sprintf(buf+len, "\n"); - return len; + + if (len > remaining) + break; + remaining -= len; + buf += len; + tlen += len; + } + + len = snprintf(buf, remaining, "\n"); + if (len > remaining) + return -EINVAL; + return tlen + len; } /** @@ -910,104 +933,143 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) return rv; } -static ssize_t __write_ports(struct file *file, char *buf, size_t size) +/* + * Zero-length write. Return a list of NFSD's current listener + * transports. + */ +static ssize_t __write_ports_names(char *buf) { - if (size == 0) { - int len = 0; + if (nfsd_serv == NULL) + return 0; + return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); +} - if (nfsd_serv) - len = svc_xprt_names(nfsd_serv, buf, 0); - return len; - } - /* Either a single 'fd' number is written, in which - * case it must be for a socket of a supported family/protocol, - * and we use it as an nfsd socket, or - * A '-' followed by the 'name' of a socket in which case - * we close the socket. - */ - if (isdigit(buf[0])) { - char *mesg = buf; - int fd; - int err; - err = get_int(&mesg, &fd); - if (err) - return -EINVAL; - if (fd < 0) - return -EINVAL; - err = nfsd_create_serv(); - if (!err) { - err = svc_addsock(nfsd_serv, fd, buf); - if (err >= 0) { - err = lockd_up(); - if (err < 0) - svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); - } - /* Decrease the count, but don't shutdown the - * the service - */ - nfsd_serv->sv_nrthreads--; - } - return err < 0 ? err : 0; - } - if (buf[0] == '-' && isdigit(buf[1])) { - char *toclose = kstrdup(buf+1, GFP_KERNEL); - int len = 0; - if (!toclose) - return -ENOMEM; - if (nfsd_serv) - len = svc_sock_names(buf, nfsd_serv, toclose); - if (len >= 0) - lockd_down(); - kfree(toclose); - return len; - } - /* - * Add a transport listener by writing it's transport name - */ - if (isalpha(buf[0])) { - int err; - char transport[16]; - int port; - if (sscanf(buf, "%15s %4d", transport, &port) == 2) { - if (port < 1 || port > 65535) - return -EINVAL; - err = nfsd_create_serv(); - if (!err) { - err = svc_create_xprt(nfsd_serv, - transport, PF_INET, port, - SVC_SOCK_ANONYMOUS); - if (err == -ENOENT) - /* Give a reasonable perror msg for - * bad transport string */ - err = -EPROTONOSUPPORT; - } - return err < 0 ? err : 0; - } - } - /* - * Remove a transport by writing it's transport name and port number - */ - if (buf[0] == '-' && isalpha(buf[1])) { - struct svc_xprt *xprt; - int err = -EINVAL; - char transport[16]; - int port; - if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) { - if (port < 1 || port > 65535) - return -EINVAL; - if (nfsd_serv) { - xprt = svc_find_xprt(nfsd_serv, transport, - AF_UNSPEC, port); - if (xprt) { - svc_close_xprt(xprt); - svc_xprt_put(xprt); - err = 0; - } else - err = -ENOTCONN; - } - return err < 0 ? err : 0; - } +/* + * A single 'fd' number was written, in which case it must be for + * a socket of a supported family/protocol, and we use it as an + * nfsd listener. + */ +static ssize_t __write_ports_addfd(char *buf) +{ + char *mesg = buf; + int fd, err; + + err = get_int(&mesg, &fd); + if (err != 0 || fd < 0) + return -EINVAL; + + err = nfsd_create_serv(); + if (err != 0) + return err; + + err = lockd_up(); + if (err != 0) + goto out; + + err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); + if (err < 0) + lockd_down(); + +out: + /* Decrease the count, but don't shut down the service */ + nfsd_serv->sv_nrthreads--; + return err; +} + +/* + * A '-' followed by the 'name' of a socket means we close the socket. + */ +static ssize_t __write_ports_delfd(char *buf) +{ + char *toclose; + int len = 0; + + toclose = kstrdup(buf + 1, GFP_KERNEL); + if (toclose == NULL) + return -ENOMEM; + + if (nfsd_serv != NULL) + len = svc_sock_names(nfsd_serv, buf, + SIMPLE_TRANSACTION_LIMIT, toclose); + if (len >= 0) + lockd_down(); + + kfree(toclose); + return len; +} + +/* + * A transport listener is added by writing it's transport name and + * a port number. + */ +static ssize_t __write_ports_addxprt(char *buf) +{ + char transport[16]; + int port, err; + + if (sscanf(buf, "%15s %4u", transport, &port) != 2) + return -EINVAL; + + if (port < 1 || port > USHORT_MAX) + return -EINVAL; + + err = nfsd_create_serv(); + if (err != 0) + return err; + + err = svc_create_xprt(nfsd_serv, transport, + PF_INET, port, SVC_SOCK_ANONYMOUS); + if (err < 0) { + /* Give a reasonable perror msg for bad transport string */ + if (err == -ENOENT) + err = -EPROTONOSUPPORT; + return err; } + return 0; +} + +/* + * A transport listener is removed by writing a "-", it's transport + * name, and it's port number. + */ +static ssize_t __write_ports_delxprt(char *buf) +{ + struct svc_xprt *xprt; + char transport[16]; + int port; + + if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2) + return -EINVAL; + + if (port < 1 || port > USHORT_MAX || nfsd_serv == NULL) + return -EINVAL; + + xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); + if (xprt == NULL) + return -ENOTCONN; + + svc_close_xprt(xprt); + svc_xprt_put(xprt); + return 0; +} + +static ssize_t __write_ports(struct file *file, char *buf, size_t size) +{ + if (size == 0) + return __write_ports_names(buf); + + if (isdigit(buf[0])) + return __write_ports_addfd(buf); + + if (buf[0] == '-' && isdigit(buf[1])) + return __write_ports_delfd(buf); + + if (isalpha(buf[0])) + return __write_ports_addxprt(buf); + + if (buf[0] == '-' && isalpha(buf[1])) + return __write_ports_delxprt(buf); + return -EINVAL; } @@ -1030,7 +1092,9 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) * buf: C string containing an unsigned * integer value representing a bound * but unconnected socket that is to be - * used as an NFSD listener + * used as an NFSD listener; listen(3) + * must be called for a SOCK_STREAM + * socket, otherwise it is ignored * size: non-zero length of C string in @buf * Output: * On success: NFS service is started; @@ -1138,7 +1202,9 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) nfsd_max_blksize = bsize; mutex_unlock(&nfsd_mutex); } - return sprintf(buf, "%d\n", nfsd_max_blksize); + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", + nfsd_max_blksize); } #ifdef CONFIG_NFSD_V4 @@ -1162,8 +1228,9 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size) return -EINVAL; nfs4_reset_lease(lease); } - sprintf(buf, "%ld\n", nfs4_lease_time()); - return strlen(buf); + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", + nfs4_lease_time()); } /** @@ -1219,8 +1286,9 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) status = nfs4_reset_recoverydir(recdir); } - sprintf(buf, "%s\n", nfs4_recoverydir()); - return strlen(buf); + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n", + nfs4_recoverydir()); } /** diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 9f1ca17293d..8847f3fbfc1 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -27,9 +27,6 @@ #define NFSDDBG_FACILITY NFSDDBG_FH -static int nfsd_nr_verified; -static int nfsd_nr_put; - /* * our acceptability function. * if NOSUBTREECHECK, accept anything @@ -251,7 +248,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) fhp->fh_dentry = dentry; fhp->fh_export = exp; - nfsd_nr_verified++; return 0; out: exp_put(exp); @@ -552,7 +548,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, return nfserr_opnotsupp; } - nfsd_nr_verified++; return 0; } @@ -609,7 +604,6 @@ fh_put(struct svc_fh *fhp) fhp->fh_pre_saved = 0; fhp->fh_post_saved = 0; #endif - nfsd_nr_put++; } if (exp) { cache_put(&exp->h, &svc_export_cache); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index e298e260b5f..0eb9c820b7a 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -533,45 +533,179 @@ nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, * NFSv2 Server procedures. * Only the results of non-idempotent operations are cached. */ -#define nfsd_proc_none NULL -#define nfssvc_release_none NULL struct nfsd_void { int dummy; }; -#define PROC(name, argt, rest, relt, cache, respsize) \ - { (svc_procfunc) nfsd_proc_##name, \ - (kxdrproc_t) nfssvc_decode_##argt, \ - (kxdrproc_t) nfssvc_encode_##rest, \ - (kxdrproc_t) nfssvc_release_##relt, \ - sizeof(struct nfsd_##argt), \ - sizeof(struct nfsd_##rest), \ - 0, \ - cache, \ - respsize, \ - } - #define ST 1 /* status */ #define FH 8 /* filehandle */ #define AT 18 /* attributes */ static struct svc_procedure nfsd_procedures2[18] = { - PROC(null, void, void, none, RC_NOCACHE, ST), - PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), - PROC(setattr, sattrargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), - PROC(none, void, void, none, RC_NOCACHE, ST), - PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), - PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), - PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4), - PROC(none, void, void, none, RC_NOCACHE, ST), - PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), - PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), - PROC(remove, diropargs, void, none, RC_REPLSTAT, ST), - PROC(rename, renameargs, void, none, RC_REPLSTAT, ST), - PROC(link, linkargs, void, none, RC_REPLSTAT, ST), - PROC(symlink, symlinkargs, void, none, RC_REPLSTAT, ST), - PROC(mkdir, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), - PROC(rmdir, diropargs, void, none, RC_REPLSTAT, ST), - PROC(readdir, readdirargs, readdirres, none, RC_NOCACHE, 0), - PROC(statfs, fhandle, statfsres, none, RC_NOCACHE, ST+5), + [NFSPROC_NULL] = { + .pc_func = (svc_procfunc) nfsd_proc_null, + .pc_decode = (kxdrproc_t) nfssvc_decode_void, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_void), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, + [NFSPROC_GETATTR] = { + .pc_func = (svc_procfunc) nfsd_proc_getattr, + .pc_decode = (kxdrproc_t) nfssvc_decode_fhandle, + .pc_encode = (kxdrproc_t) nfssvc_encode_attrstat, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_ressize = sizeof(struct nfsd_attrstat), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT, + }, + [NFSPROC_SETATTR] = { + .pc_func = (svc_procfunc) nfsd_proc_setattr, + .pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_attrstat, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_sattrargs), + .pc_ressize = sizeof(struct nfsd_attrstat), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+AT, + }, + [NFSPROC_ROOT] = { + .pc_decode = (kxdrproc_t) nfssvc_decode_void, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_void), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, + [NFSPROC_LOOKUP] = { + .pc_func = (svc_procfunc) nfsd_proc_lookup, + .pc_decode = (kxdrproc_t) nfssvc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_diropres, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_ressize = sizeof(struct nfsd_diropres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+FH+AT, + }, + [NFSPROC_READLINK] = { + .pc_func = (svc_procfunc) nfsd_proc_readlink, + .pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres, + .pc_argsize = sizeof(struct nfsd_readlinkargs), + .pc_ressize = sizeof(struct nfsd_readlinkres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+1+NFS_MAXPATHLEN/4, + }, + [NFSPROC_READ] = { + .pc_func = (svc_procfunc) nfsd_proc_read, + .pc_decode = (kxdrproc_t) nfssvc_decode_readargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_readres, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_readargs), + .pc_ressize = sizeof(struct nfsd_readres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4, + }, + [NFSPROC_WRITECACHE] = { + .pc_decode = (kxdrproc_t) nfssvc_decode_void, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_void), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, + [NFSPROC_WRITE] = { + .pc_func = (svc_procfunc) nfsd_proc_write, + .pc_decode = (kxdrproc_t) nfssvc_decode_writeargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_attrstat, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_writeargs), + .pc_ressize = sizeof(struct nfsd_attrstat), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+AT, + }, + [NFSPROC_CREATE] = { + .pc_func = (svc_procfunc) nfsd_proc_create, + .pc_decode = (kxdrproc_t) nfssvc_decode_createargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_diropres, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_createargs), + .pc_ressize = sizeof(struct nfsd_diropres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+FH+AT, + }, + [NFSPROC_REMOVE] = { + .pc_func = (svc_procfunc) nfsd_proc_remove, + .pc_decode = (kxdrproc_t) nfssvc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_RENAME] = { + .pc_func = (svc_procfunc) nfsd_proc_rename, + .pc_decode = (kxdrproc_t) nfssvc_decode_renameargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_renameargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_LINK] = { + .pc_func = (svc_procfunc) nfsd_proc_link, + .pc_decode = (kxdrproc_t) nfssvc_decode_linkargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_linkargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_SYMLINK] = { + .pc_func = (svc_procfunc) nfsd_proc_symlink, + .pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_symlinkargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_MKDIR] = { + .pc_func = (svc_procfunc) nfsd_proc_mkdir, + .pc_decode = (kxdrproc_t) nfssvc_decode_createargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_diropres, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_createargs), + .pc_ressize = sizeof(struct nfsd_diropres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+FH+AT, + }, + [NFSPROC_RMDIR] = { + .pc_func = (svc_procfunc) nfsd_proc_rmdir, + .pc_decode = (kxdrproc_t) nfssvc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_READDIR] = { + .pc_func = (svc_procfunc) nfsd_proc_readdir, + .pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_readdirres, + .pc_argsize = sizeof(struct nfsd_readdirargs), + .pc_ressize = sizeof(struct nfsd_readdirres), + .pc_cachetype = RC_NOCACHE, + }, + [NFSPROC_STATFS] = { + .pc_func = (svc_procfunc) nfsd_proc_statfs, + .pc_decode = (kxdrproc_t) nfssvc_decode_fhandle, + .pc_encode = (kxdrproc_t) nfssvc_encode_statfsres, + .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_ressize = sizeof(struct nfsd_statfsres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+5, + }, }; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index cbba4a93578..d4c9884cd54 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -390,12 +390,14 @@ nfsd_svc(unsigned short port, int nrservs) mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); - error = -EINVAL; if (nrservs <= 0) nrservs = 0; if (nrservs > NFSD_MAXSERVS) nrservs = NFSD_MAXSERVS; - + error = 0; + if (nrservs == 0 && nfsd_serv == NULL) + goto out; + /* Readahead param cache - will no-op if it already exists */ error = nfsd_racache_init(2*nrservs); if (error<0) @@ -413,6 +415,12 @@ nfsd_svc(unsigned short port, int nrservs) goto failure; error = svc_set_num_threads(nfsd_serv, NULL, nrservs); + if (error == 0) + /* We are holding a reference to nfsd_serv which + * we don't want to count in the return value, + * so subtract 1 + */ + error = nfsd_serv->sv_nrthreads - 1; failure: svc_destroy(nfsd_serv); /* Release server */ out: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 99f83575359..4145083dcf8 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -966,6 +966,43 @@ static void kill_suid(struct dentry *dentry) mutex_unlock(&dentry->d_inode->i_mutex); } +/* + * Gathered writes: If another process is currently writing to the file, + * there's a high chance this is another nfsd (triggered by a bulk write + * from a client's biod). Rather than syncing the file with each write + * request, we sleep for 10 msec. + * + * I don't know if this roughly approximates C. Juszak's idea of + * gathered writes, but it's a nice and simple solution (IMHO), and it + * seems to work:-) + * + * Note: we do this only in the NFSv2 case, since v3 and higher have a + * better tool (separate unstable writes and commits) for solving this + * problem. + */ +static int wait_for_concurrent_writes(struct file *file) +{ + struct inode *inode = file->f_path.dentry->d_inode; + static ino_t last_ino; + static dev_t last_dev; + int err = 0; + + if (atomic_read(&inode->i_writecount) > 1 + || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { + dprintk("nfsd: write defer %d\n", task_pid_nr(current)); + msleep(10); + dprintk("nfsd: write resume %d\n", task_pid_nr(current)); + } + + if (inode->i_state & I_DIRTY) { + dprintk("nfsd: write sync %d\n", task_pid_nr(current)); + err = nfsd_sync(file); + } + last_ino = inode->i_ino; + last_dev = inode->i_sb->s_dev; + return err; +} + static __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, @@ -978,6 +1015,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, __be32 err = 0; int host_err; int stable = *stablep; + int use_wgather; #ifdef MSNFS err = nfserr_perm; @@ -996,9 +1034,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, * - the sync export option has been set, or * - the client requested O_SYNC behavior (NFSv3 feature). * - The file system doesn't support fsync(). - * When gathered writes have been configured for this volume, + * When NFSv2 gathered writes have been configured for this volume, * flushing the data to disk is handled separately below. */ + use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); if (!file->f_op->fsync) {/* COMMIT3 cannot work */ stable = 2; @@ -1007,7 +1046,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (!EX_ISSYNC(exp)) stable = 0; - if (stable && !EX_WGATHER(exp)) { + if (stable && !use_wgather) { spin_lock(&file->f_lock); file->f_flags |= O_SYNC; spin_unlock(&file->f_lock); @@ -1017,52 +1056,20 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, oldfs = get_fs(); set_fs(KERNEL_DS); host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); set_fs(oldfs); - if (host_err >= 0) { - *cnt = host_err; - nfsdstats.io_write += host_err; - fsnotify_modify(file->f_path.dentry); - } + if (host_err < 0) + goto out_nfserr; + *cnt = host_err; + nfsdstats.io_write += host_err; + fsnotify_modify(file->f_path.dentry); /* clear setuid/setgid flag after write */ - if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) + if (inode->i_mode & (S_ISUID | S_ISGID)) kill_suid(dentry); - if (host_err >= 0 && stable) { - static ino_t last_ino; - static dev_t last_dev; - - /* - * Gathered writes: If another process is currently - * writing to the file, there's a high chance - * this is another nfsd (triggered by a bulk write - * from a client's biod). Rather than syncing the - * file with each write request, we sleep for 10 msec. - * - * I don't know if this roughly approximates - * C. Juszak's idea of gathered writes, but it's a - * nice and simple solution (IMHO), and it seems to - * work:-) - */ - if (EX_WGATHER(exp)) { - if (atomic_read(&inode->i_writecount) > 1 - || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { - dprintk("nfsd: write defer %d\n", task_pid_nr(current)); - msleep(10); - dprintk("nfsd: write resume %d\n", task_pid_nr(current)); - } - - if (inode->i_state & I_DIRTY) { - dprintk("nfsd: write sync %d\n", task_pid_nr(current)); - host_err=nfsd_sync(file); - } -#if 0 - wake_up(&inode->i_wait); -#endif - } - last_ino = inode->i_ino; - last_dev = inode->i_sb->s_dev; - } + if (stable && use_wgather) + host_err = wait_for_concurrent_writes(file); +out_nfserr: dprintk("nfsd: write complete host_err=%d\n", host_err); if (host_err >= 0) err = 0; diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index ea2605a58b8..f234f3a4c8c 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -15,7 +15,8 @@ struct inotify_inode_mark_entry { int wd; }; -extern void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group); +extern void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, + struct fsnotify_group *group); extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv); extern const struct fsnotify_ops inotify_fsnotify_ops; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 7ef75b83247..47cd258fd24 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -81,7 +81,7 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) { - inotify_destroy_mark_entry(entry, group); + inotify_ignored_and_remove_idr(entry, group); } static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 982a412ac5b..ff231ad2389 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -363,39 +363,17 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns } /* - * When, for whatever reason, inotify is done with a mark (or what used to be a - * watch) we need to remove that watch from the idr and we need to send IN_IGNORED - * for the given wd. - * - * There is a bit of recursion here. The loop looks like: - * inotify_destroy_mark_entry -> fsnotify_destroy_mark_by_entry -> - * inotify_freeing_mark -> inotify_destory_mark_entry -> restart - * But the loop is broken in 2 places. fsnotify_destroy_mark_by_entry sets - * entry->group = NULL before the call to inotify_freeing_mark, so the if (egroup) - * test below will not call back to fsnotify again. But even if that test wasn't - * there this would still be safe since fsnotify_destroy_mark_by_entry() is - * safe from recursion. + * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the + * internal reference help on the mark because it is in the idr. */ -void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) +void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, + struct fsnotify_group *group) { struct inotify_inode_mark_entry *ientry; struct inotify_event_private_data *event_priv; struct fsnotify_event_private_data *fsn_event_priv; - struct fsnotify_group *egroup; struct idr *idr; - spin_lock(&entry->lock); - egroup = entry->group; - - /* if egroup we aren't really done and something might still send events - * for this inode, on the callback we'll send the IN_IGNORED */ - if (egroup) { - spin_unlock(&entry->lock); - fsnotify_destroy_mark_by_entry(entry); - return; - } - spin_unlock(&entry->lock); - ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); @@ -699,7 +677,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) fsnotify_get_mark(entry); spin_unlock(&group->inotify_data.idr_lock); - inotify_destroy_mark_entry(entry, group); + fsnotify_destroy_mark_by_entry(entry); fsnotify_put_mark(entry); out: diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 6cdeaa76f27..110bb57c46a 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -92,6 +92,9 @@ struct ocfs2_unblock_ctl { enum ocfs2_unblock_action unblock_action; }; +/* Lockdep class keys */ +struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; + static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, int new_level); static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); @@ -317,9 +320,16 @@ static int ocfs2_lock_create(struct ocfs2_super *osb, u32 dlm_flags); static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, int wanted); -static void ocfs2_cluster_unlock(struct ocfs2_super *osb, - struct ocfs2_lock_res *lockres, - int level); +static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level, unsigned long caller_ip); +static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level) +{ + __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); +} + static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); @@ -489,6 +499,13 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); ocfs2_init_lock_stats(res); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (type != OCFS2_LOCK_TYPE_OPEN) + lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], + &lockdep_keys[type], 0); + else + res->l_lockdep_map.key = NULL; +#endif } void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) @@ -644,14 +661,10 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, struct ocfs2_super *osb) { - struct ocfs2_orphan_scan_lvb *lvb; - ocfs2_lock_res_init_once(res); ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, &ocfs2_orphan_scan_lops, osb); - lvb = ocfs2_dlm_lvb(&res->l_lksb); - lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; } void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, @@ -1256,11 +1269,13 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, return ret; } -static int ocfs2_cluster_lock(struct ocfs2_super *osb, - struct ocfs2_lock_res *lockres, - int level, - u32 lkm_flags, - int arg_flags) +static int __ocfs2_cluster_lock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level, + u32 lkm_flags, + int arg_flags, + int l_subclass, + unsigned long caller_ip) { struct ocfs2_mask_waiter mw; int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); @@ -1403,13 +1418,37 @@ out: } ocfs2_update_lock_stats(lockres, level, &mw, ret); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (!ret && lockres->l_lockdep_map.key != NULL) { + if (level == DLM_LOCK_PR) + rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, + !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), + caller_ip); + else + rwsem_acquire(&lockres->l_lockdep_map, l_subclass, + !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), + caller_ip); + } +#endif mlog_exit(ret); return ret; } -static void ocfs2_cluster_unlock(struct ocfs2_super *osb, - struct ocfs2_lock_res *lockres, - int level) +static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level, + u32 lkm_flags, + int arg_flags) +{ + return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, + 0, _RET_IP_); +} + + +static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level, + unsigned long caller_ip) { unsigned long flags; @@ -1418,6 +1457,10 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb, ocfs2_dec_holders(lockres, level); ocfs2_downconvert_on_unlock(osb, lockres); spin_unlock_irqrestore(&lockres->l_lock, flags); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (lockres->l_lockdep_map.key != NULL) + rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); +#endif mlog_exit_void(); } @@ -1989,7 +2032,8 @@ static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, { struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); - if (lvb->lvb_version == OCFS2_LVB_VERSION + if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) + && lvb->lvb_version == OCFS2_LVB_VERSION && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) return 1; return 0; @@ -2162,10 +2206,11 @@ static int ocfs2_assign_bh(struct inode *inode, * returns < 0 error if the callback will never be called, otherwise * the result of the lock will be communicated via the callback. */ -int ocfs2_inode_lock_full(struct inode *inode, - struct buffer_head **ret_bh, - int ex, - int arg_flags) +int ocfs2_inode_lock_full_nested(struct inode *inode, + struct buffer_head **ret_bh, + int ex, + int arg_flags, + int subclass) { int status, level, acquired; u32 dlm_flags; @@ -2203,7 +2248,8 @@ int ocfs2_inode_lock_full(struct inode *inode, if (arg_flags & OCFS2_META_LOCK_NOQUEUE) dlm_flags |= DLM_LKF_NOQUEUE; - status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); + status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, + arg_flags, subclass, _RET_IP_); if (status < 0) { if (status != -EAGAIN && status != -EIOCBRETRY) mlog_errno(status); @@ -2369,35 +2415,45 @@ void ocfs2_inode_unlock(struct inode *inode, mlog_exit_void(); } -int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex) +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) { struct ocfs2_lock_res *lockres; struct ocfs2_orphan_scan_lvb *lvb; - int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; int status = 0; + if (ocfs2_is_hard_readonly(osb)) + return -EROFS; + + if (ocfs2_mount_local(osb)) + return 0; + lockres = &osb->osb_orphan_scan.os_lockres; - status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); + status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); if (status < 0) return status; lvb = ocfs2_dlm_lvb(&lockres->l_lksb); - if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) + if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && + lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) *seqno = be32_to_cpu(lvb->lvb_os_seqno); + else + *seqno = osb->osb_orphan_scan.os_seqno + 1; + return status; } -void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex) +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) { struct ocfs2_lock_res *lockres; struct ocfs2_orphan_scan_lvb *lvb; - int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; - lockres = &osb->osb_orphan_scan.os_lockres; - lvb = ocfs2_dlm_lvb(&lockres->l_lksb); - lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; - lvb->lvb_os_seqno = cpu_to_be32(seqno); - ocfs2_cluster_unlock(osb, lockres, level); + if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { + lockres = &osb->osb_orphan_scan.os_lockres; + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); + lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; + lvb->lvb_os_seqno = cpu_to_be32(seqno); + ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); + } } int ocfs2_super_lock(struct ocfs2_super *osb, @@ -3627,7 +3683,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) struct ocfs2_global_disk_dqinfo *gdinfo; int status = 0; - if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { + if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && + lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 31b90d7b8f5..7553836931d 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -78,6 +78,14 @@ struct ocfs2_orphan_scan_lvb { /* don't block waiting for the downconvert thread, instead return -EAGAIN */ #define OCFS2_LOCK_NONBLOCK (0x04) +/* Locking subclasses of inode cluster lock */ +enum { + OI_LS_NORMAL = 0, + OI_LS_PARENT, + OI_LS_RENAME1, + OI_LS_RENAME2, +}; + int ocfs2_dlm_init(struct ocfs2_super *osb); void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending); void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); @@ -104,25 +112,31 @@ void ocfs2_open_unlock(struct inode *inode); int ocfs2_inode_lock_atime(struct inode *inode, struct vfsmount *vfsmnt, int *level); -int ocfs2_inode_lock_full(struct inode *inode, +int ocfs2_inode_lock_full_nested(struct inode *inode, struct buffer_head **ret_bh, int ex, - int arg_flags); + int arg_flags, + int subclass); int ocfs2_inode_lock_with_page(struct inode *inode, struct buffer_head **ret_bh, int ex, struct page *page); +/* Variants without special locking class or flags */ +#define ocfs2_inode_lock_full(i, r, e, f)\ + ocfs2_inode_lock_full_nested(i, r, e, f, OI_LS_NORMAL) +#define ocfs2_inode_lock_nested(i, b, e, s)\ + ocfs2_inode_lock_full_nested(i, b, e, 0, s) /* 99% of the time we don't want to supply any additional flags -- * those are for very specific cases only. */ -#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0) +#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL) void ocfs2_inode_unlock(struct inode *inode, int ex); int ocfs2_super_lock(struct ocfs2_super *osb, int ex); void ocfs2_super_unlock(struct ocfs2_super *osb, int ex); -int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex); -void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex); +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno); +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno); int ocfs2_rename_lock(struct ocfs2_super *osb); void ocfs2_rename_unlock(struct ocfs2_super *osb); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 07267e0da90..62442e413a0 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2026,7 +2026,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in, size_t len, unsigned int flags) { - int ret = 0; + int ret = 0, lock_level = 0; struct inode *inode = in->f_path.dentry->d_inode; mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, @@ -2037,12 +2037,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in, /* * See the comment in ocfs2_file_aio_read() */ - ret = ocfs2_inode_lock(inode, NULL, 0); + ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level); if (ret < 0) { mlog_errno(ret); goto bail; } - ocfs2_inode_unlock(inode, 0); + ocfs2_inode_unlock(inode, lock_level); ret = generic_file_splice_read(in, ppos, pipe, len, flags); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 10e1fa87396..4dc8890ba31 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -215,6 +215,8 @@ bail: static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) { struct ocfs2_find_inode_args *args = opaque; + static struct lock_class_key ocfs2_quota_ip_alloc_sem_key, + ocfs2_file_ip_alloc_sem_key; mlog_entry("inode = %p, opaque = %p\n", inode, opaque); @@ -223,6 +225,15 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) if (args->fi_sysfile_type != 0) lockdep_set_class(&inode->i_mutex, &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); + if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE || + args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE || + args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE || + args->fi_sysfile_type == LOCAL_GROUP_QUOTA_SYSTEM_INODE) + lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem, + &ocfs2_quota_ip_alloc_sem_key); + else + lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem, + &ocfs2_file_ip_alloc_sem_key); mlog_exit(0); return 0; diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 4a3b9e6b31a..f033760ecbe 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1880,13 +1880,20 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) os = &osb->osb_orphan_scan; - status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX); + if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) + goto out; + + status = ocfs2_orphan_scan_lock(osb, &seqno); if (status < 0) { if (status != -EAGAIN) mlog_errno(status); goto out; } + /* Do no queue the tasks if the volume is being umounted */ + if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) + goto unlock; + if (os->os_seqno != seqno) { os->os_seqno = seqno; goto unlock; @@ -1903,7 +1910,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) os->os_count++; os->os_scantime = CURRENT_TIME; unlock: - ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX); + ocfs2_orphan_scan_unlock(osb, seqno); out: return; } @@ -1920,8 +1927,9 @@ void ocfs2_orphan_scan_work(struct work_struct *work) mutex_lock(&os->os_lock); ocfs2_queue_orphan_scan(osb); - schedule_delayed_work(&os->os_orphan_scan_work, - ocfs2_orphan_scan_timeout()); + if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) + schedule_delayed_work(&os->os_orphan_scan_work, + ocfs2_orphan_scan_timeout()); mutex_unlock(&os->os_lock); } @@ -1930,26 +1938,33 @@ void ocfs2_orphan_scan_stop(struct ocfs2_super *osb) struct ocfs2_orphan_scan *os; os = &osb->osb_orphan_scan; - mutex_lock(&os->os_lock); - cancel_delayed_work(&os->os_orphan_scan_work); - mutex_unlock(&os->os_lock); + if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) { + atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); + mutex_lock(&os->os_lock); + cancel_delayed_work(&os->os_orphan_scan_work); + mutex_unlock(&os->os_lock); + } } -int ocfs2_orphan_scan_init(struct ocfs2_super *osb) +void ocfs2_orphan_scan_init(struct ocfs2_super *osb) { struct ocfs2_orphan_scan *os; os = &osb->osb_orphan_scan; os->os_osb = osb; os->os_count = 0; + os->os_seqno = 0; os->os_scantime = CURRENT_TIME; mutex_init(&os->os_lock); - - INIT_DELAYED_WORK(&os->os_orphan_scan_work, - ocfs2_orphan_scan_work); - schedule_delayed_work(&os->os_orphan_scan_work, - ocfs2_orphan_scan_timeout()); - return 0; + INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); + + if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) + atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); + else { + atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE); + schedule_delayed_work(&os->os_orphan_scan_work, + ocfs2_orphan_scan_timeout()); + } } struct ocfs2_orphan_filldir_priv { diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 61045eeb3f6..5432c7f79cc 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -144,7 +144,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, } /* Exported only for the journal struct init code in super.c. Do not call. */ -int ocfs2_orphan_scan_init(struct ocfs2_super *osb); +void ocfs2_orphan_scan_init(struct ocfs2_super *osb); void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 33464c6b60a..8601f934010 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -118,7 +118,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); - status = ocfs2_inode_lock(dir, NULL, 0); + status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -636,7 +636,7 @@ static int ocfs2_link(struct dentry *old_dentry, if (S_ISDIR(inode->i_mode)) return -EPERM; - err = ocfs2_inode_lock(dir, &parent_fe_bh, 1); + err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT); if (err < 0) { if (err != -ENOENT) mlog_errno(err); @@ -800,7 +800,8 @@ static int ocfs2_unlink(struct inode *dir, return -EPERM; } - status = ocfs2_inode_lock(dir, &parent_node_bh, 1); + status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1, + OI_LS_PARENT); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -978,7 +979,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, inode1 = tmpinode; } /* lock id2 */ - status = ocfs2_inode_lock(inode2, bh2, 1); + status = ocfs2_inode_lock_nested(inode2, bh2, 1, + OI_LS_RENAME1); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -987,7 +989,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, } /* lock id1 */ - status = ocfs2_inode_lock(inode1, bh1, 1); + status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2); if (status < 0) { /* * An error return must mean that no cluster locks @@ -1103,7 +1105,8 @@ static int ocfs2_rename(struct inode *old_dir, * won't have to concurrently downconvert the inode and the * dentry locks. */ - status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1); + status = ocfs2_inode_lock_nested(old_inode, &old_inode_bh, 1, + OI_LS_PARENT); if (status < 0) { if (status != -ENOENT) mlog_errno(status); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 18c1d9ec1c9..c9345ebb849 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -34,6 +34,7 @@ #include <linux/workqueue.h> #include <linux/kref.h> #include <linux/mutex.h> +#include <linux/lockdep.h> #ifndef CONFIG_OCFS2_COMPAT_JBD # include <linux/jbd2.h> #else @@ -152,6 +153,14 @@ struct ocfs2_lock_res { unsigned int l_lock_max_exmode; /* Max wait for EX */ unsigned int l_lock_refresh; /* Disk refreshes */ #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map l_lockdep_map; +#endif +}; + +enum ocfs2_orphan_scan_state { + ORPHAN_SCAN_ACTIVE, + ORPHAN_SCAN_INACTIVE }; struct ocfs2_orphan_scan { @@ -162,6 +171,7 @@ struct ocfs2_orphan_scan { struct timespec os_scantime; /* time this node ran the scan */ u32 os_count; /* tracks node specific scans */ u32 os_seqno; /* tracks cluster wide scans */ + atomic_t os_state; /* ACTIVE or INACTIVE */ }; struct ocfs2_dlm_debug { diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index fcd120f1493..3f661376a2d 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -236,6 +236,16 @@ static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb) return dlm_status_to_errno(lksb->lksb_o2dlm.status); } +/* + * o2dlm aways has a "valid" LVB. If the dlm loses track of the LVB + * contents, it will zero out the LVB. Thus the caller can always trust + * the contents. + */ +static int o2cb_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) +{ + return 1; +} + static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb) { return (void *)(lksb->lksb_o2dlm.lvb); @@ -354,6 +364,7 @@ static struct ocfs2_stack_operations o2cb_stack_ops = { .dlm_lock = o2cb_dlm_lock, .dlm_unlock = o2cb_dlm_unlock, .lock_status = o2cb_dlm_lock_status, + .lvb_valid = o2cb_dlm_lvb_valid, .lock_lvb = o2cb_dlm_lvb, .dump_lksb = o2cb_dump_lksb, }; diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 9b76d41a8ac..ff4c798a563 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -738,6 +738,13 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb) return lksb->lksb_fsdlm.sb_status; } +static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) +{ + int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID; + + return !invalid; +} + static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) { if (!lksb->lksb_fsdlm.sb_lvbptr) @@ -873,6 +880,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { .dlm_lock = user_dlm_lock, .dlm_unlock = user_dlm_unlock, .lock_status = user_dlm_lock_status, + .lvb_valid = user_dlm_lvb_valid, .lock_lvb = user_dlm_lvb, .plock = user_plock, .dump_lksb = user_dlm_dump_lksb, diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 68b668b0e60..3f2f1c45b7b 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -6,7 +6,7 @@ * Code which implements an OCFS2 specific interface to underlying * cluster stacks. * - * Copyright (C) 2007 Oracle. All rights reserved. + * Copyright (C) 2007, 2009 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public @@ -271,11 +271,12 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) } EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); -/* - * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we - * don't cast at the glue level. The real answer is that the header - * ordering is nigh impossible. - */ +int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) +{ + return active_stack->sp_ops->lvb_valid(lksb); +} +EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid); + void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) { return active_stack->sp_ops->lock_lvb(lksb); diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index c571af375ef..03a44d60eac 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -186,6 +186,11 @@ struct ocfs2_stack_operations { int (*lock_status)(union ocfs2_dlm_lksb *lksb); /* + * Return non-zero if the LVB is valid. + */ + int (*lvb_valid)(union ocfs2_dlm_lksb *lksb); + + /* * Pull the lvb pointer off of the stack-specific lksb. */ void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); @@ -252,6 +257,7 @@ int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, struct ocfs2_lock_res *astarg); int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); +int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb); void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8439f6b324b..73a16d4666d 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -923,14 +923,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, int nr) { struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; + int ret; if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) return 0; - if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data) + + if (!buffer_jbd(bg_bh)) return 1; + jbd_lock_bh_state(bg_bh); bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; - return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); + if (bg) + ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); + else + ret = 1; + jbd_unlock_bh_state(bg_bh); + + return ret; } static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, @@ -1885,6 +1894,7 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, unsigned int tmp; int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; struct ocfs2_group_desc *undo_bg = NULL; + int cluster_bitmap = 0; mlog_entry_void(); @@ -1905,18 +1915,28 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, } if (ocfs2_is_cluster_bitmap(alloc_inode)) - undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; + cluster_bitmap = 1; + + if (cluster_bitmap) { + jbd_lock_bh_state(group_bh); + undo_bg = (struct ocfs2_group_desc *) + bh2jh(group_bh)->b_committed_data; + BUG_ON(!undo_bg); + } tmp = num_bits; while(tmp--) { ocfs2_clear_bit((bit_off + tmp), (unsigned long *) bg->bg_bitmap); - if (ocfs2_is_cluster_bitmap(alloc_inode)) + if (cluster_bitmap) ocfs2_set_bit(bit_off + tmp, (unsigned long *) undo_bg->bg_bitmap); } le16_add_cpu(&bg->bg_free_bits_count, num_bits); + if (cluster_bitmap) + jbd_unlock_bh_state(group_bh); + status = ocfs2_journal_dirty(handle, group_bh); if (status < 0) mlog_errno(status); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index d33767f17ba..7efb349fb9b 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -205,11 +205,10 @@ static const match_table_t tokens = { #ifdef CONFIG_DEBUG_FS static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) { - int out = 0; - int i; struct ocfs2_cluster_connection *cconn = osb->cconn; struct ocfs2_recovery_map *rm = osb->recovery_map; - struct ocfs2_orphan_scan *os; + struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan; + int i, out = 0; out += snprintf(buf + out, len - out, "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", @@ -234,20 +233,24 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount", osb->s_mount_opt, osb->s_atime_quantum); - out += snprintf(buf + out, len - out, - "%10s => Stack: %s Name: %*s Version: %d.%d\n", - "Cluster", - (*osb->osb_cluster_stack == '\0' ? - "o2cb" : osb->osb_cluster_stack), - cconn->cc_namelen, cconn->cc_name, - cconn->cc_version.pv_major, cconn->cc_version.pv_minor); + if (cconn) { + out += snprintf(buf + out, len - out, + "%10s => Stack: %s Name: %*s " + "Version: %d.%d\n", "Cluster", + (*osb->osb_cluster_stack == '\0' ? + "o2cb" : osb->osb_cluster_stack), + cconn->cc_namelen, cconn->cc_name, + cconn->cc_version.pv_major, + cconn->cc_version.pv_minor); + } spin_lock(&osb->dc_task_lock); out += snprintf(buf + out, len - out, "%10s => Pid: %d Count: %lu WakeSeq: %lu " "WorkSeq: %lu\n", "DownCnvt", - task_pid_nr(osb->dc_task), osb->blocked_lock_count, - osb->dc_wake_sequence, osb->dc_work_sequence); + (osb->dc_task ? task_pid_nr(osb->dc_task) : -1), + osb->blocked_lock_count, osb->dc_wake_sequence, + osb->dc_work_sequence); spin_unlock(&osb->dc_task_lock); spin_lock(&osb->osb_lock); @@ -267,14 +270,15 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) out += snprintf(buf + out, len - out, "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", - task_pid_nr(osb->commit_task), osb->osb_commit_interval, + (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), + osb->osb_commit_interval, atomic_read(&osb->needs_checkpoint)); out += snprintf(buf + out, len - out, - "%10s => State: %d NumTxns: %d TxnId: %lu\n", + "%10s => State: %d TxnId: %lu NumTxns: %d\n", "Journal", osb->journal->j_state, - atomic_read(&osb->journal->j_num_trans), - osb->journal->j_trans_id); + osb->journal->j_trans_id, + atomic_read(&osb->journal->j_num_trans)); out += snprintf(buf + out, len - out, "%10s => GlobalAllocs: %d LocalAllocs: %d " @@ -300,9 +304,18 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) atomic_read(&osb->s_num_inodes_stolen)); spin_unlock(&osb->osb_lock); + out += snprintf(buf + out, len - out, "OrphanScan => "); + out += snprintf(buf + out, len - out, "Local: %u Global: %u ", + os->os_count, os->os_seqno); + out += snprintf(buf + out, len - out, " Last Scan: "); + if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) + out += snprintf(buf + out, len - out, "Disabled\n"); + else + out += snprintf(buf + out, len - out, "%lu seconds ago\n", + (get_seconds() - os->os_scantime.tv_sec)); + out += snprintf(buf + out, len - out, "%10s => %3s %10s\n", "Slots", "Num", "RecoGen"); - for (i = 0; i < osb->max_slots; ++i) { out += snprintf(buf + out, len - out, "%10s %c %3d %10d\n", @@ -311,13 +324,6 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) i, osb->slot_recovery_generations[i]); } - os = &osb->osb_orphan_scan; - out += snprintf(buf + out, len - out, "Orphan Scan=> "); - out += snprintf(buf + out, len - out, "Local: %u Global: %u ", - os->os_count, os->os_seqno); - out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n", - (get_seconds() - os->os_scantime.tv_sec)); - return out; } @@ -552,7 +558,7 @@ static unsigned long long ocfs2_max_file_offset(unsigned int bbits, */ #if BITS_PER_LONG == 32 -# if defined(CONFIG_LBD) +# if defined(CONFIG_LBDAF) BUILD_BUG_ON(sizeof(sector_t) != 8); /* * We might be limited by page cache size. @@ -1175,6 +1181,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS); wake_up(&osb->osb_mount_event); + /* Start this when the mount is almost sure of being successful */ + ocfs2_orphan_scan_init(osb); + mlog_exit(status); return status; @@ -1810,14 +1819,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) debugfs_remove(osb->osb_ctxt); + /* Orphan scan should be stopped as early as possible */ + ocfs2_orphan_scan_stop(osb); + ocfs2_disable_quotas(osb); ocfs2_shutdown_local_alloc(osb); ocfs2_truncate_log_shutdown(osb); - ocfs2_orphan_scan_stop(osb); - /* This will disable recovery and flush any recovery work. */ ocfs2_recovery_exit(osb); @@ -1978,13 +1988,6 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - status = ocfs2_orphan_scan_init(osb); - if (status) { - mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n"); - mlog_errno(status); - goto bail; - } - init_waitqueue_head(&osb->checkpoint_event); atomic_set(&osb->needs_checkpoint, 0); diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index ab713ebdd54..40e53702948 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c @@ -50,6 +50,10 @@ static inline int is_in_system_inode_array(struct ocfs2_super *osb, int type, u32 slot); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; +#endif + static inline int is_global_system_inode(int type) { return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE && @@ -118,6 +122,21 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, inode = NULL; goto bail; } +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (type == LOCAL_USER_QUOTA_SYSTEM_INODE || + type == LOCAL_GROUP_QUOTA_SYSTEM_INODE || + type == JOURNAL_SYSTEM_INODE) { + /* Ignore inode lock on these inodes as the lock does not + * really belong to any process and lockdep cannot handle + * that */ + OCFS2_I(inode)->ip_inode_lockres.l_lockdep_map.key = NULL; + } else { + lockdep_init_map(&OCFS2_I(inode)->ip_inode_lockres. + l_lockdep_map, + ocfs2_system_inodes[type].si_name, + &ocfs2_sysfile_cluster_lock_key[type], 0); + } +#endif bail: return inode; diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 63d965193b2..11a7b5c6815 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -18,6 +18,7 @@ proc-y += meminfo.o proc-y += stat.o proc-y += uptime.o proc-y += version.o +proc-y += softirqs.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index fc6c3025bef..7ba79a54948 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -195,20 +195,20 @@ void proc_device_tree_add_node(struct device_node *np, p = fixup_name(np, de, p); ent = proc_mkdir(p, de); - if (ent == 0) + if (ent == NULL) break; proc_device_tree_add_node(child, ent); } of_node_put(child); - for (pp = np->properties; pp != 0; pp = pp->next) { + for (pp = np->properties; pp != NULL; pp = pp->next) { p = pp->name; if (duplicate_name(de, p)) p = fixup_name(np, de, p); ent = __proc_device_tree_add_prop(de, pp, p); - if (ent == 0) + if (ent == NULL) break; } } @@ -221,10 +221,10 @@ void __init proc_device_tree_init(void) struct device_node *root; proc_device_tree = proc_mkdir("device-tree", NULL); - if (proc_device_tree == 0) + if (proc_device_tree == NULL) return; root = of_find_node_by_path("/"); - if (root == 0) { + if (root == NULL) { printk(KERN_ERR "/proc/device-tree: can't find root\n"); return; } diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c new file mode 100644 index 00000000000..1807c2419f1 --- /dev/null +++ b/fs/proc/softirqs.c @@ -0,0 +1,44 @@ +#include <linux/init.h> +#include <linux/kernel_stat.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> + +/* + * /proc/softirqs ... display the number of softirqs + */ +static int show_softirqs(struct seq_file *p, void *v) +{ + int i, j; + + seq_printf(p, " "); + for_each_possible_cpu(i) + seq_printf(p, "CPU%-8d", i); + seq_printf(p, "\n"); + + for (i = 0; i < NR_SOFTIRQS; i++) { + seq_printf(p, "%8s:", softirq_to_name[i]); + for_each_possible_cpu(j) + seq_printf(p, " %10u", kstat_softirqs_cpu(i, j)); + seq_printf(p, "\n"); + } + return 0; +} + +static int softirqs_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_softirqs, NULL); +} + +static const struct file_operations proc_softirqs_operations = { + .open = softirqs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init proc_softirqs_init(void) +{ + proc_create("softirqs", 0, NULL, &proc_softirqs_operations); + return 0; +} +module_init(proc_softirqs_init); diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 81e4eb60972..7cc726c6d70 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -29,6 +29,8 @@ static int show_stat(struct seq_file *p, void *v) cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; cputime64_t guest; u64 sum = 0; + u64 sum_softirq = 0; + unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; struct timespec boottime; unsigned int per_irq_sum; @@ -53,6 +55,13 @@ static int show_stat(struct seq_file *p, void *v) sum += kstat_irqs_cpu(j, i); } sum += arch_irq_stat_cpu(i); + + for (j = 0; j < NR_SOFTIRQS; j++) { + unsigned int softirq_stat = kstat_softirqs_cpu(j, i); + + per_softirq_sums[j] += softirq_stat; + sum_softirq += softirq_stat; + } } sum += arch_irq_stat(); @@ -115,6 +124,12 @@ static int show_stat(struct seq_file *p, void *v) nr_running(), nr_iowait()); + seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq); + + for (i = 0; i < NR_SOFTIRQS; i++) + seq_printf(p, " %u", per_softirq_sums[i]); + seq_printf(p, "\n"); + return 0; } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 5edcc3f92ba..0872afa58d3 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -166,12 +166,7 @@ static const struct file_operations proc_vmcore_operations = { static struct vmcore* __init get_new_element(void) { - struct vmcore *p; - - p = kmalloc(sizeof(*p), GFP_KERNEL); - if (p) - memset(p, 0, sizeof(*p)); - return p; + return kzalloc(sizeof(struct vmcore), GFP_KERNEL); } static u64 __init get_vmcore_size_elf64(char *elfptr) diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c index 4beb964a2a3..128d3f7c8aa 100644 --- a/fs/reiserfs/do_balan.c +++ b/fs/reiserfs/do_balan.c @@ -1270,9 +1270,8 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h RFALSE(ih, "PAP-12210: ih must be 0"); - if (is_direntry_le_ih - (aux_ih = - B_N_PITEM_HEAD(tbS0, item_pos))) { + aux_ih = B_N_PITEM_HEAD(tbS0, item_pos); + if (is_direntry_le_ih(aux_ih)) { /* we append to directory item */ int entry_count; diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c index 381750a155f..03d85cbf90b 100644 --- a/fs/reiserfs/lbalance.c +++ b/fs/reiserfs/lbalance.c @@ -390,7 +390,8 @@ static void leaf_item_bottle(struct buffer_info *dest_bi, if (last_first == FIRST_TO_LAST) { /* if ( if item in position item_num in buffer SOURCE is directory item ) */ - if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num))) + ih = B_N_PITEM_HEAD(src, item_num); + if (is_direntry_le_ih(ih)) leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, item_num, 0, cpy_bytes); else { @@ -418,7 +419,8 @@ static void leaf_item_bottle(struct buffer_info *dest_bi, } } else { /* if ( if item in position item_num in buffer SOURCE is directory item ) */ - if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num))) + ih = B_N_PITEM_HEAD(src, item_num); + if (is_direntry_le_ih(ih)) leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, item_num, I_ENTRY_COUNT(ih) - cpy_bytes, @@ -774,8 +776,8 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first, leaf_delete_items_entirely(cur_bi, first + 1, del_num - 1); - if (is_direntry_le_ih - (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1))) + ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1); + if (is_direntry_le_ih(ih)) /* the last item is directory */ /* len = numbers of directory entries in this item */ len = ih_entry_count(ih); diff --git a/fs/seq_file.c b/fs/seq_file.c index 7f40f30c55c..6c959275f2d 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -640,6 +640,26 @@ int seq_puts(struct seq_file *m, const char *s) } EXPORT_SYMBOL(seq_puts); +/** + * seq_write - write arbitrary data to buffer + * @seq: seq_file identifying the buffer to which data should be written + * @data: data address + * @len: number of bytes + * + * Return 0 on success, non-zero otherwise. + */ +int seq_write(struct seq_file *seq, const void *data, size_t len) +{ + if (seq->count + len < seq->size) { + memcpy(seq->buf + seq->count, data, len); + seq->count += len; + return 0; + } + seq->count = seq->size; + return -1; +} +EXPORT_SYMBOL(seq_write); + struct list_head *seq_list_start(struct list_head *head, loff_t pos) { struct list_head *lh; diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 3d2512c21f0..7cf33379fd4 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -56,9 +56,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off UFSD("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks); - if (i_block < 0) { - ufs_warning(inode->i_sb, "ufs_block_to_path", "block < 0"); - } else if (i_block < direct_blocks) { + if (i_block < direct_blocks) { offsets[n++] = i_block; } else if ((i_block -= direct_blocks) < indirect_blocks) { offsets[n++] = UFS_IND_BLOCK; @@ -440,8 +438,6 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head lock_kernel(); UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment); - if (fragment < 0) - goto abort_negative; if (fragment > ((UFS_NDADDR + uspi->s_apb + uspi->s_2apb + uspi->s_3apb) << uspi->s_fpbshift)) @@ -504,10 +500,6 @@ abort: unlock_kernel(); return err; -abort_negative: - ufs_warning(sb, "ufs_get_block", "block < 0"); - goto abort; - abort_too_big: ufs_warning(sb, "ufs_get_block", "block > big"); goto abort; diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index f65a53f8752..6127e24062d 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -24,7 +24,7 @@ * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set. */ -#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) +#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64) # define XFS_BIG_BLKNOS 1 # define XFS_BIG_INUMS 1 #else diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 2e09efbca8d..a220d36f789 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -616,7 +616,7 @@ xfs_max_file_offset( */ #if BITS_PER_LONG == 32 -# if defined(CONFIG_LBD) +# if defined(CONFIG_LBDAF) ASSERT(sizeof(sector_t) == 8); pagefactor = PAGE_CACHE_SIZE; bitshift = BITS_PER_LONG; |