diff options
Diffstat (limited to 'fs')
66 files changed, 2113 insertions, 1683 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 977ef208c05..3662dd44896 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -58,8 +58,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = { * of the file * * @name: [in] name of the "class" of the new file - * @fops [in] file operations for the new file - * @priv [in] private data for the new file (will be file's private_data) + * @fops: [in] file operations for the new file + * @priv: [in] private data for the new file (will be file's private_data) + * @flags: [in] flags * * Creates a new file by hooking it on a single inode. This is useful for files * that do not need to have a full-fledged inode in order to operate correctly. @@ -68,7 +69,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = { * setup. Returns new descriptor or -error. */ int anon_inode_getfd(const char *name, const struct file_operations *fops, - void *priv) + void *priv, int flags) { struct qstr this; struct dentry *dentry; @@ -78,7 +79,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops, if (IS_ERR(anon_inode_inode)) return -ENODEV; - error = get_unused_fd(); + error = get_unused_fd_flags(flags); if (error < 0) return error; fd = error; @@ -115,7 +116,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops, file->f_mapping = anon_inode_inode->i_mapping; file->f_pos = 0; - file->f_flags = O_RDWR; + file->f_flags = O_RDWR | (flags & O_NONBLOCK); file->f_version = 0; file->private_data = priv; diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index c3d352d7fa9..69a2f5c9231 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -52,7 +52,10 @@ struct autofs_info { int flags; - struct list_head rehash; + struct completion expire_complete; + + struct list_head active; + struct list_head expiring; struct autofs_sb_info *sbi; unsigned long last_used; @@ -68,15 +71,14 @@ struct autofs_info { }; #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ +#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */ struct autofs_wait_queue { wait_queue_head_t queue; struct autofs_wait_queue *next; autofs_wqt_t wait_queue_token; /* We use the following to see what we are waiting for */ - unsigned int hash; - unsigned int len; - char *name; + struct qstr name; u32 dev; u64 ino; uid_t uid; @@ -85,7 +87,7 @@ struct autofs_wait_queue { pid_t tgid; /* This is for status reporting upon return */ int status; - atomic_t wait_ctr; + unsigned int wait_ctr; }; #define AUTOFS_SBI_MAGIC 0x6d4a556d @@ -112,8 +114,9 @@ struct autofs_sb_info { struct mutex wq_mutex; spinlock_t fs_lock; struct autofs_wait_queue *queues; /* Wait queue pointer */ - spinlock_t rehash_lock; - struct list_head rehash_list; + spinlock_t lookup_lock; + struct list_head active_list; + struct list_head expiring_list; }; static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb) @@ -138,18 +141,14 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) { static inline int autofs4_ispending(struct dentry *dentry) { struct autofs_info *inf = autofs4_dentry_ino(dentry); - int pending = 0; if (dentry->d_flags & DCACHE_AUTOFS_PENDING) return 1; - if (inf) { - spin_lock(&inf->sbi->fs_lock); - pending = inf->flags & AUTOFS_INF_EXPIRING; - spin_unlock(&inf->sbi->fs_lock); - } + if (inf->flags & AUTOFS_INF_EXPIRING) + return 1; - return pending; + return 0; } static inline void autofs4_copy_atime(struct file *src, struct file *dst) @@ -164,6 +163,7 @@ void autofs4_free_ino(struct autofs_info *); /* Expiration */ int is_autofs4_dentry(struct dentry *); +int autofs4_expire_wait(struct dentry *dentry); int autofs4_expire_run(struct super_block *, struct vfsmount *, struct autofs_sb_info *, struct autofs_packet_expire __user *); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 894fee54d4d..cdabb796ff0 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -259,13 +259,15 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb, now = jiffies; timeout = sbi->exp_timeout; - /* Lock the tree as we must expire as a whole */ spin_lock(&sbi->fs_lock); if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { struct autofs_info *ino = autofs4_dentry_ino(root); - - /* Set this flag early to catch sys_chdir and the like */ + if (d_mountpoint(root)) { + ino->flags |= AUTOFS_INF_MOUNTPOINT; + root->d_mounted--; + } ino->flags |= AUTOFS_INF_EXPIRING; + init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); return root; } @@ -292,6 +294,8 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb, struct list_head *next; int do_now = how & AUTOFS_EXP_IMMEDIATE; int exp_leaves = how & AUTOFS_EXP_LEAVES; + struct autofs_info *ino; + unsigned int ino_count; if (!root) return NULL; @@ -316,6 +320,9 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb, dentry = dget(dentry); spin_unlock(&dcache_lock); + spin_lock(&sbi->fs_lock); + ino = autofs4_dentry_ino(dentry); + /* * Case 1: (i) indirect mount or top level pseudo direct mount * (autofs-4.1). @@ -326,6 +333,11 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb, DPRINTK("checking mountpoint %p %.*s", dentry, (int)dentry->d_name.len, dentry->d_name.name); + /* Path walk currently on this dentry? */ + ino_count = atomic_read(&ino->count) + 2; + if (atomic_read(&dentry->d_count) > ino_count) + goto next; + /* Can we umount this guy */ if (autofs4_mount_busy(mnt, dentry)) goto next; @@ -343,23 +355,25 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb, /* Case 2: tree mount, expire iff entire tree is not busy */ if (!exp_leaves) { - /* Lock the tree as we must expire as a whole */ - spin_lock(&sbi->fs_lock); - if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { - struct autofs_info *inf = autofs4_dentry_ino(dentry); + /* Path walk currently on this dentry? */ + ino_count = atomic_read(&ino->count) + 1; + if (atomic_read(&dentry->d_count) > ino_count) + goto next; - /* Set this flag early to catch sys_chdir and the like */ - inf->flags |= AUTOFS_INF_EXPIRING; - spin_unlock(&sbi->fs_lock); + if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { expired = dentry; goto found; } - spin_unlock(&sbi->fs_lock); /* * Case 3: pseudo direct mount, expire individual leaves * (autofs-4.1). */ } else { + /* Path walk currently on this dentry? */ + ino_count = atomic_read(&ino->count) + 1; + if (atomic_read(&dentry->d_count) > ino_count) + goto next; + expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); if (expired) { dput(dentry); @@ -367,6 +381,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb, } } next: + spin_unlock(&sbi->fs_lock); dput(dentry); spin_lock(&dcache_lock); next = next->next; @@ -377,12 +392,45 @@ next: found: DPRINTK("returning %p %.*s", expired, (int)expired->d_name.len, expired->d_name.name); + ino = autofs4_dentry_ino(expired); + ino->flags |= AUTOFS_INF_EXPIRING; + init_completion(&ino->expire_complete); + spin_unlock(&sbi->fs_lock); spin_lock(&dcache_lock); list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); spin_unlock(&dcache_lock); return expired; } +int autofs4_expire_wait(struct dentry *dentry) +{ + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + int status; + + /* Block on any pending expire */ + spin_lock(&sbi->fs_lock); + if (ino->flags & AUTOFS_INF_EXPIRING) { + spin_unlock(&sbi->fs_lock); + + DPRINTK("waiting for expire %p name=%.*s", + dentry, dentry->d_name.len, dentry->d_name.name); + + status = autofs4_wait(sbi, dentry, NFY_NONE); + wait_for_completion(&ino->expire_complete); + + DPRINTK("expire done status=%d", status); + + if (d_unhashed(dentry)) + return -EAGAIN; + + return status; + } + spin_unlock(&sbi->fs_lock); + + return 0; +} + /* Perform an expiry operation */ int autofs4_expire_run(struct super_block *sb, struct vfsmount *mnt, @@ -390,7 +438,9 @@ int autofs4_expire_run(struct super_block *sb, struct autofs_packet_expire __user *pkt_p) { struct autofs_packet_expire pkt; + struct autofs_info *ino; struct dentry *dentry; + int ret = 0; memset(&pkt,0,sizeof pkt); @@ -406,9 +456,15 @@ int autofs4_expire_run(struct super_block *sb, dput(dentry); if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) ) - return -EFAULT; + ret = -EFAULT; - return 0; + spin_lock(&sbi->fs_lock); + ino = autofs4_dentry_ino(dentry); + ino->flags &= ~AUTOFS_INF_EXPIRING; + complete_all(&ino->expire_complete); + spin_unlock(&sbi->fs_lock); + + return ret; } /* Call repeatedly until it returns -EAGAIN, meaning there's nothing @@ -433,9 +489,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, /* This is synchronous because it makes the daemon a little easier */ - ino->flags |= AUTOFS_INF_EXPIRING; ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); + + spin_lock(&sbi->fs_lock); + if (ino->flags & AUTOFS_INF_MOUNTPOINT) { + sb->s_root->d_mounted++; + ino->flags &= ~AUTOFS_INF_MOUNTPOINT; + } ino->flags &= ~AUTOFS_INF_EXPIRING; + complete_all(&ino->expire_complete); + spin_unlock(&sbi->fs_lock); dput(dentry); } diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 2fdcf5e1d23..7bb3e5ba053 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -24,8 +24,10 @@ static void ino_lnkfree(struct autofs_info *ino) { - kfree(ino->u.symlink); - ino->u.symlink = NULL; + if (ino->u.symlink) { + kfree(ino->u.symlink); + ino->u.symlink = NULL; + } } struct autofs_info *autofs4_init_ino(struct autofs_info *ino, @@ -41,16 +43,18 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino, if (ino == NULL) return NULL; - ino->flags = 0; - ino->mode = mode; - ino->inode = NULL; - ino->dentry = NULL; - ino->size = 0; - - INIT_LIST_HEAD(&ino->rehash); + if (!reinit) { + ino->flags = 0; + ino->inode = NULL; + ino->dentry = NULL; + ino->size = 0; + INIT_LIST_HEAD(&ino->active); + INIT_LIST_HEAD(&ino->expiring); + atomic_set(&ino->count, 0); + } + ino->mode = mode; ino->last_used = jiffies; - atomic_set(&ino->count, 0); ino->sbi = sbi; @@ -159,8 +163,8 @@ void autofs4_kill_sb(struct super_block *sb) if (!sbi) goto out_kill_sb; - if (!sbi->catatonic) - autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */ + /* Free wait queues, close pipe */ + autofs4_catatonic_mode(sbi); /* Clean up and release dangling references */ autofs4_force_release(sbi); @@ -338,8 +342,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) mutex_init(&sbi->wq_mutex); spin_lock_init(&sbi->fs_lock); sbi->queues = NULL; - spin_lock_init(&sbi->rehash_lock); - INIT_LIST_HEAD(&sbi->rehash_list); + spin_lock_init(&sbi->lookup_lock); + INIT_LIST_HEAD(&sbi->active_list); + INIT_LIST_HEAD(&sbi->expiring_list); s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = AUTOFS_SUPER_MAGIC; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index edf5b6bddb5..bcfb2dc0a61 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -25,25 +25,25 @@ static int autofs4_dir_rmdir(struct inode *,struct dentry *); static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); static int autofs4_dir_open(struct inode *inode, struct file *file); -static int autofs4_dir_close(struct inode *inode, struct file *file); -static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir); -static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir); static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); static void *autofs4_follow_link(struct dentry *, struct nameidata *); +#define TRIGGER_FLAGS (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) +#define TRIGGER_INTENTS (LOOKUP_OPEN | LOOKUP_CREATE) + const struct file_operations autofs4_root_operations = { .open = dcache_dir_open, .release = dcache_dir_close, .read = generic_read_dir, - .readdir = autofs4_root_readdir, + .readdir = dcache_readdir, .ioctl = autofs4_root_ioctl, }; const struct file_operations autofs4_dir_operations = { .open = autofs4_dir_open, - .release = autofs4_dir_close, + .release = dcache_dir_close, .read = generic_read_dir, - .readdir = autofs4_dir_readdir, + .readdir = dcache_readdir, }; const struct inode_operations autofs4_indirect_root_inode_operations = { @@ -70,42 +70,10 @@ const struct inode_operations autofs4_dir_inode_operations = { .rmdir = autofs4_dir_rmdir, }; -static int autofs4_root_readdir(struct file *file, void *dirent, - filldir_t filldir) -{ - struct autofs_sb_info *sbi = autofs4_sbi(file->f_path.dentry->d_sb); - int oz_mode = autofs4_oz_mode(sbi); - - DPRINTK("called, filp->f_pos = %lld", file->f_pos); - - /* - * Don't set reghost flag if: - * 1) f_pos is larger than zero -- we've already been here. - * 2) we haven't even enabled reghosting in the 1st place. - * 3) this is the daemon doing a readdir - */ - if (oz_mode && file->f_pos == 0 && sbi->reghost_enabled) - sbi->needs_reghost = 1; - - DPRINTK("needs_reghost = %d", sbi->needs_reghost); - - return dcache_readdir(file, dirent, filldir); -} - static int autofs4_dir_open(struct inode *inode, struct file *file) { struct dentry *dentry = file->f_path.dentry; - struct vfsmount *mnt = file->f_path.mnt; struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); - struct dentry *cursor; - int status; - - status = dcache_dir_open(inode, file); - if (status) - goto out; - - cursor = file->private_data; - cursor->d_fsdata = NULL; DPRINTK("file=%p dentry=%p %.*s", file, dentry, dentry->d_name.len, dentry->d_name.name); @@ -113,159 +81,32 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) if (autofs4_oz_mode(sbi)) goto out; - if (autofs4_ispending(dentry)) { - DPRINTK("dentry busy"); - dcache_dir_close(inode, file); - status = -EBUSY; - goto out; - } - - status = -ENOENT; - if (!d_mountpoint(dentry) && dentry->d_op && dentry->d_op->d_revalidate) { - struct nameidata nd; - int empty, ret; - - /* In case there are stale directory dentrys from a failed mount */ - spin_lock(&dcache_lock); - empty = list_empty(&dentry->d_subdirs); + /* + * An empty directory in an autofs file system is always a + * mount point. The daemon must have failed to mount this + * during lookup so it doesn't exist. This can happen, for + * example, if user space returns an incorrect status for a + * mount request. Otherwise we're doing a readdir on the + * autofs file system so just let the libfs routines handle + * it. + */ + spin_lock(&dcache_lock); + if (!d_mountpoint(dentry) && __simple_empty(dentry)) { spin_unlock(&dcache_lock); - - if (!empty) - d_invalidate(dentry); - - nd.flags = LOOKUP_DIRECTORY; - ret = (dentry->d_op->d_revalidate)(dentry, &nd); - - if (ret <= 0) { - if (ret < 0) - status = ret; - dcache_dir_close(inode, file); - goto out; - } + return -ENOENT; } + spin_unlock(&dcache_lock); - if (d_mountpoint(dentry)) { - struct file *fp = NULL; - struct path fp_path = { .dentry = dentry, .mnt = mnt }; - - path_get(&fp_path); - - if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) { - path_put(&fp_path); - dcache_dir_close(inode, file); - goto out; - } - - fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags); - status = PTR_ERR(fp); - if (IS_ERR(fp)) { - dcache_dir_close(inode, file); - goto out; - } - cursor->d_fsdata = fp; - } - return 0; -out: - return status; -} - -static int autofs4_dir_close(struct inode *inode, struct file *file) -{ - struct dentry *dentry = file->f_path.dentry; - struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); - struct dentry *cursor = file->private_data; - int status = 0; - - DPRINTK("file=%p dentry=%p %.*s", - file, dentry, dentry->d_name.len, dentry->d_name.name); - - if (autofs4_oz_mode(sbi)) - goto out; - - if (autofs4_ispending(dentry)) { - DPRINTK("dentry busy"); - status = -EBUSY; - goto out; - } - - if (d_mountpoint(dentry)) { - struct file *fp = cursor->d_fsdata; - if (!fp) { - status = -ENOENT; - goto out; - } - filp_close(fp, current->files); - } -out: - dcache_dir_close(inode, file); - return status; -} - -static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir) -{ - struct dentry *dentry = file->f_path.dentry; - struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); - struct dentry *cursor = file->private_data; - int status; - - DPRINTK("file=%p dentry=%p %.*s", - file, dentry, dentry->d_name.len, dentry->d_name.name); - - if (autofs4_oz_mode(sbi)) - goto out; - - if (autofs4_ispending(dentry)) { - DPRINTK("dentry busy"); - return -EBUSY; - } - - if (d_mountpoint(dentry)) { - struct file *fp = cursor->d_fsdata; - - if (!fp) - return -ENOENT; - - if (!fp->f_op || !fp->f_op->readdir) - goto out; - - status = vfs_readdir(fp, filldir, dirent); - file->f_pos = fp->f_pos; - if (status) - autofs4_copy_atime(file, fp); - return status; - } out: - return dcache_readdir(file, dirent, filldir); + return dcache_dir_open(inode, file); } static int try_to_fill_dentry(struct dentry *dentry, int flags) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); - struct dentry *new; int status; - /* Block on any pending expiry here; invalidate the dentry - when expiration is done to trigger mount request with a new - dentry */ - if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { - DPRINTK("waiting for expire %p name=%.*s", - dentry, dentry->d_name.len, dentry->d_name.name); - - status = autofs4_wait(sbi, dentry, NFY_NONE); - - DPRINTK("expire done status=%d", status); - - /* - * If the directory still exists the mount request must - * continue otherwise it can't be followed at the right - * time during the walk. - */ - status = d_invalidate(dentry); - if (status != -EBUSY) - return -EAGAIN; - } - DPRINTK("dentry=%p %.*s ino=%p", dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); @@ -292,7 +133,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) return status; } /* Trigger mount for path component or follow link */ - } else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) || + } else if (dentry->d_flags & DCACHE_AUTOFS_PENDING || + flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) || current->link_count) { DPRINTK("waiting for mount name=%.*s", dentry->d_name.len, dentry->d_name.name); @@ -320,26 +162,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; spin_unlock(&dentry->d_lock); - /* - * The dentry that is passed in from lookup may not be the one - * we end up using, as mkdir can create a new one. If this - * happens, and another process tries the lookup at the same time, - * it will set the PENDING flag on this new dentry, but add itself - * to our waitq. Then, if after the lookup succeeds, the first - * process that requested the mount performs another lookup of the - * same directory, it will show up as still pending! So, we need - * to redo the lookup here and clear pending on that dentry. - */ - if (d_unhashed(dentry)) { - new = d_lookup(dentry->d_parent, &dentry->d_name); - if (new) { - spin_lock(&new->d_lock); - new->d_flags &= ~DCACHE_AUTOFS_PENDING; - spin_unlock(&new->d_lock); - dput(new); - } - } - return 0; } @@ -355,51 +177,63 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d", dentry, dentry->d_name.len, dentry->d_name.name, oz_mode, nd->flags); - - /* If it's our master or we shouldn't trigger a mount we're done */ - lookup_type = nd->flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY); - if (oz_mode || !lookup_type) + /* + * For an expire of a covered direct or offset mount we need + * to beeak out of follow_down() at the autofs mount trigger + * (d_mounted--), so we can see the expiring flag, and manage + * the blocking and following here until the expire is completed. + */ + if (oz_mode) { + spin_lock(&sbi->fs_lock); + if (ino->flags & AUTOFS_INF_EXPIRING) { + spin_unlock(&sbi->fs_lock); + /* Follow down to our covering mount. */ + if (!follow_down(&nd->path.mnt, &nd->path.dentry)) + goto done; + goto follow; + } + spin_unlock(&sbi->fs_lock); goto done; + } - /* If an expire request is pending wait for it. */ - if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { - DPRINTK("waiting for active request %p name=%.*s", - dentry, dentry->d_name.len, dentry->d_name.name); - - status = autofs4_wait(sbi, dentry, NFY_NONE); + /* If an expire request is pending everyone must wait. */ + autofs4_expire_wait(dentry); - DPRINTK("request done status=%d", status); - } + /* We trigger a mount for almost all flags */ + lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS); + if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING)) + goto follow; /* - * If the dentry contains directories then it is an - * autofs multi-mount with no root mount offset. So - * don't try to mount it again. + * If the dentry contains directories then it is an autofs + * multi-mount with no root mount offset. So don't try to + * mount it again. */ spin_lock(&dcache_lock); - if (!d_mountpoint(dentry) && __simple_empty(dentry)) { + if (dentry->d_flags & DCACHE_AUTOFS_PENDING || + (!d_mountpoint(dentry) && __simple_empty(dentry))) { spin_unlock(&dcache_lock); status = try_to_fill_dentry(dentry, 0); if (status) goto out_error; - /* - * The mount succeeded but if there is no root mount - * it must be an autofs multi-mount with no root offset - * so we don't need to follow the mount. - */ - if (d_mountpoint(dentry)) { - if (!autofs4_follow_mount(&nd->path.mnt, - &nd->path.dentry)) { - status = -ENOENT; - goto out_error; - } - } - - goto done; + goto follow; } spin_unlock(&dcache_lock); +follow: + /* + * If there is no root mount it must be an autofs + * multi-mount with no root offset so we don't need + * to follow it. + */ + if (d_mountpoint(dentry)) { + if (!autofs4_follow_mount(&nd->path.mnt, + &nd->path.dentry)) { + status = -ENOENT; + goto out_error; + } + } done: return NULL; @@ -424,12 +258,23 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) int status = 1; /* Pending dentry */ + spin_lock(&sbi->fs_lock); if (autofs4_ispending(dentry)) { /* The daemon never causes a mount to trigger */ + spin_unlock(&sbi->fs_lock); + if (oz_mode) return 1; /* + * If the directory has gone away due to an expire + * we have been called as ->d_revalidate() and so + * we need to return false and proceed to ->lookup(). + */ + if (autofs4_expire_wait(dentry) == -EAGAIN) + return 0; + + /* * A zero status is success otherwise we have a * negative error code. */ @@ -437,17 +282,9 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) if (status == 0) return 1; - /* - * A status of EAGAIN here means that the dentry has gone - * away while waiting for an expire to complete. If we are - * racing with expire lookup will wait for it so this must - * be a revalidate and we need to send it to lookup. - */ - if (status == -EAGAIN) - return 0; - return status; } + spin_unlock(&sbi->fs_lock); /* Negative dentry.. invalidate if "old" */ if (dentry->d_inode == NULL) @@ -461,6 +298,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) DPRINTK("dentry=%p %.*s, emptydir", dentry, dentry->d_name.len, dentry->d_name.name); spin_unlock(&dcache_lock); + /* The daemon never causes a mount to trigger */ if (oz_mode) return 1; @@ -493,10 +331,12 @@ void autofs4_dentry_release(struct dentry *de) struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); if (sbi) { - spin_lock(&sbi->rehash_lock); - if (!list_empty(&inf->rehash)) - list_del(&inf->rehash); - spin_unlock(&sbi->rehash_lock); + spin_lock(&sbi->lookup_lock); + if (!list_empty(&inf->active)) + list_del(&inf->active); + if (!list_empty(&inf->expiring)) + list_del(&inf->expiring); + spin_unlock(&sbi->lookup_lock); } inf->dentry = NULL; @@ -518,7 +358,7 @@ static struct dentry_operations autofs4_dentry_operations = { .d_release = autofs4_dentry_release, }; -static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) +static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) { unsigned int len = name->len; unsigned int hash = name->hash; @@ -526,14 +366,66 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct struct list_head *p, *head; spin_lock(&dcache_lock); - spin_lock(&sbi->rehash_lock); - head = &sbi->rehash_list; + spin_lock(&sbi->lookup_lock); + head = &sbi->active_list; list_for_each(p, head) { struct autofs_info *ino; struct dentry *dentry; struct qstr *qstr; - ino = list_entry(p, struct autofs_info, rehash); + ino = list_entry(p, struct autofs_info, active); + dentry = ino->dentry; + + spin_lock(&dentry->d_lock); + + /* Already gone? */ + if (atomic_read(&dentry->d_count) == 0) + goto next; + + qstr = &dentry->d_name; + + if (dentry->d_name.hash != hash) + goto next; + if (dentry->d_parent != parent) + goto next; + + if (qstr->len != len) + goto next; + if (memcmp(qstr->name, str, len)) + goto next; + + if (d_unhashed(dentry)) { + dget(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&sbi->lookup_lock); + spin_unlock(&dcache_lock); + return dentry; + } +next: + spin_unlock(&dentry->d_lock); + } + spin_unlock(&sbi->lookup_lock); + spin_unlock(&dcache_lock); + + return NULL; +} + +static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) +{ + unsigned int len = name->len; + unsigned int hash = name->hash; + const unsigned char *str = name->name; + struct list_head *p, *head; + + spin_lock(&dcache_lock); + spin_lock(&sbi->lookup_lock); + head = &sbi->expiring_list; + list_for_each(p, head) { + struct autofs_info *ino; + struct dentry *dentry; + struct qstr *qstr; + + ino = list_entry(p, struct autofs_info, expiring); dentry = ino->dentry; spin_lock(&dentry->d_lock); @@ -555,33 +447,16 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct goto next; if (d_unhashed(dentry)) { - struct inode *inode = dentry->d_inode; - - ino = autofs4_dentry_ino(dentry); - list_del_init(&ino->rehash); dget(dentry); - /* - * Make the rehashed dentry negative so the VFS - * behaves as it should. - */ - if (inode) { - dentry->d_inode = NULL; - list_del_init(&dentry->d_alias); - spin_unlock(&dentry->d_lock); - spin_unlock(&sbi->rehash_lock); - spin_unlock(&dcache_lock); - iput(inode); - return dentry; - } spin_unlock(&dentry->d_lock); - spin_unlock(&sbi->rehash_lock); + spin_unlock(&sbi->lookup_lock); spin_unlock(&dcache_lock); return dentry; } next: spin_unlock(&dentry->d_lock); } - spin_unlock(&sbi->rehash_lock); + spin_unlock(&sbi->lookup_lock); spin_unlock(&dcache_lock); return NULL; @@ -591,7 +466,8 @@ next: static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct autofs_sb_info *sbi; - struct dentry *unhashed; + struct autofs_info *ino; + struct dentry *expiring, *unhashed; int oz_mode; DPRINTK("name = %.*s", @@ -607,8 +483,26 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); - unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name); - if (!unhashed) { + expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name); + if (expiring) { + /* + * If we are racing with expire the request might not + * be quite complete but the directory has been removed + * so it must have been successful, so just wait for it. + */ + ino = autofs4_dentry_ino(expiring); + autofs4_expire_wait(expiring); + spin_lock(&sbi->lookup_lock); + if (!list_empty(&ino->expiring)) + list_del_init(&ino->expiring); + spin_unlock(&sbi->lookup_lock); + dput(expiring); + } + + unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name); + if (unhashed) + dentry = unhashed; + else { /* * Mark the dentry incomplete but don't hash it. We do this * to serialize our inode creation operations (symlink and @@ -622,39 +516,34 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s */ dentry->d_op = &autofs4_root_dentry_operations; - dentry->d_fsdata = NULL; - d_instantiate(dentry, NULL); - } else { - struct autofs_info *ino = autofs4_dentry_ino(unhashed); - DPRINTK("rehash %p with %p", dentry, unhashed); /* - * If we are racing with expire the request might not - * be quite complete but the directory has been removed - * so it must have been successful, so just wait for it. - * We need to ensure the AUTOFS_INF_EXPIRING flag is clear - * before continuing as revalidate may fail when calling - * try_to_fill_dentry (returning EAGAIN) if we don't. + * And we need to ensure that the same dentry is used for + * all following lookup calls until it is hashed so that + * the dentry flags are persistent throughout the request. */ - while (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { - DPRINTK("wait for incomplete expire %p name=%.*s", - unhashed, unhashed->d_name.len, - unhashed->d_name.name); - autofs4_wait(sbi, unhashed, NFY_NONE); - DPRINTK("request completed"); - } - dentry = unhashed; + ino = autofs4_init_ino(NULL, sbi, 0555); + if (!ino) + return ERR_PTR(-ENOMEM); + + dentry->d_fsdata = ino; + ino->dentry = dentry; + + spin_lock(&sbi->lookup_lock); + list_add(&ino->active, &sbi->active_list); + spin_unlock(&sbi->lookup_lock); + + d_instantiate(dentry, NULL); } if (!oz_mode) { spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_AUTOFS_PENDING; spin_unlock(&dentry->d_lock); - } - - if (dentry->d_op && dentry->d_op->d_revalidate) { - mutex_unlock(&dir->i_mutex); - (dentry->d_op->d_revalidate)(dentry, nd); - mutex_lock(&dir->i_mutex); + if (dentry->d_op && dentry->d_op->d_revalidate) { + mutex_unlock(&dir->i_mutex); + (dentry->d_op->d_revalidate)(dentry, nd); + mutex_lock(&dir->i_mutex); + } } /* @@ -673,9 +562,11 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s return ERR_PTR(-ERESTARTNOINTR); } } - spin_lock(&dentry->d_lock); - dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; - spin_unlock(&dentry->d_lock); + if (!oz_mode) { + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; + spin_unlock(&dentry->d_lock); + } } /* @@ -706,7 +597,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s } if (unhashed) - return dentry; + return unhashed; return NULL; } @@ -728,20 +619,31 @@ static int autofs4_dir_symlink(struct inode *dir, return -EACCES; ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555); - if (ino == NULL) - return -ENOSPC; + if (!ino) + return -ENOMEM; - ino->size = strlen(symname); - ino->u.symlink = cp = kmalloc(ino->size + 1, GFP_KERNEL); + spin_lock(&sbi->lookup_lock); + if (!list_empty(&ino->active)) + list_del_init(&ino->active); + spin_unlock(&sbi->lookup_lock); - if (cp == NULL) { - kfree(ino); - return -ENOSPC; + ino->size = strlen(symname); + cp = kmalloc(ino->size + 1, GFP_KERNEL); + if (!cp) { + if (!dentry->d_fsdata) + kfree(ino); + return -ENOMEM; } strcpy(cp, symname); inode = autofs4_get_inode(dir->i_sb, ino); + if (!inode) { + kfree(cp); + if (!dentry->d_fsdata) + kfree(ino); + return -ENOMEM; + } d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) @@ -757,6 +659,7 @@ static int autofs4_dir_symlink(struct inode *dir, atomic_inc(&p_ino->count); ino->inode = inode; + ino->u.symlink = cp; dir->i_mtime = CURRENT_TIME; return 0; @@ -769,9 +672,8 @@ static int autofs4_dir_symlink(struct inode *dir, * that the file no longer exists. However, doing that means that the * VFS layer can turn the dentry into a negative dentry. We don't want * this, because the unlink is probably the result of an expire. - * We simply d_drop it and add it to a rehash candidates list in the - * super block, which allows the dentry lookup to reuse it retaining - * the flags, such as expire in progress, in case we're racing with expire. + * We simply d_drop it and add it to a expiring list in the super block, + * which allows the dentry lookup to check for an incomplete expire. * * If a process is blocked on the dentry waiting for the expire to finish, * it will invalidate the dentry and try to mount with a new one. @@ -801,9 +703,10 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) dir->i_mtime = CURRENT_TIME; spin_lock(&dcache_lock); - spin_lock(&sbi->rehash_lock); - list_add(&ino->rehash, &sbi->rehash_list); - spin_unlock(&sbi->rehash_lock); + spin_lock(&sbi->lookup_lock); + if (list_empty(&ino->expiring)) + list_add(&ino->expiring, &sbi->expiring_list); + spin_unlock(&sbi->lookup_lock); spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); @@ -829,9 +732,10 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) spin_unlock(&dcache_lock); return -ENOTEMPTY; } - spin_lock(&sbi->rehash_lock); - list_add(&ino->rehash, &sbi->rehash_list); - spin_unlock(&sbi->rehash_lock); + spin_lock(&sbi->lookup_lock); + if (list_empty(&ino->expiring)) + list_add(&ino->expiring, &sbi->expiring_list); + spin_unlock(&sbi->lookup_lock); spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); @@ -866,10 +770,20 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) dentry, dentry->d_name.len, dentry->d_name.name); ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555); - if (ino == NULL) - return -ENOSPC; + if (!ino) + return -ENOMEM; + + spin_lock(&sbi->lookup_lock); + if (!list_empty(&ino->active)) + list_del_init(&ino->active); + spin_unlock(&sbi->lookup_lock); inode = autofs4_get_inode(dir->i_sb, ino); + if (!inode) { + if (!dentry->d_fsdata) + kfree(ino); + return -ENOMEM; + } d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) @@ -922,44 +836,6 @@ static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int __user } /* - * Tells the daemon whether we need to reghost or not. Also, clears - * the reghost_needed flag. - */ -static inline int autofs4_ask_reghost(struct autofs_sb_info *sbi, int __user *p) -{ - int status; - - DPRINTK("returning %d", sbi->needs_reghost); - - status = put_user(sbi->needs_reghost, p); - if (status) - return status; - - sbi->needs_reghost = 0; - return 0; -} - -/* - * Enable / Disable reghosting ioctl() operation - */ -static inline int autofs4_toggle_reghost(struct autofs_sb_info *sbi, int __user *p) -{ - int status; - int val; - - status = get_user(val, p); - - DPRINTK("reghost = %d", val); - - if (status) - return status; - - /* turn on/off reghosting, with the val */ - sbi->reghost_enabled = val; - return 0; -} - -/* * Tells the daemon whether it can umount the autofs mount. */ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) @@ -1023,11 +899,6 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp, case AUTOFS_IOC_SETTIMEOUT: return autofs4_get_set_timeout(sbi, p); - case AUTOFS_IOC_TOGGLEREGHOST: - return autofs4_toggle_reghost(sbi, p); - case AUTOFS_IOC_ASKREGHOST: - return autofs4_ask_reghost(sbi, p); - case AUTOFS_IOC_ASKUMOUNT: return autofs4_ask_umount(filp->f_path.mnt, p); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 75e5955c3f6..35216d18d8b 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -28,6 +28,12 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) { struct autofs_wait_queue *wq, *nwq; + mutex_lock(&sbi->wq_mutex); + if (sbi->catatonic) { + mutex_unlock(&sbi->wq_mutex); + return; + } + DPRINTK("entering catatonic mode"); sbi->catatonic = 1; @@ -36,13 +42,18 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) while (wq) { nwq = wq->next; wq->status = -ENOENT; /* Magic is gone - report failure */ - kfree(wq->name); - wq->name = NULL; + if (wq->name.name) { + kfree(wq->name.name); + wq->name.name = NULL; + } + wq->wait_ctr--; wake_up_interruptible(&wq->queue); wq = nwq; } fput(sbi->pipe); /* Close the pipe */ sbi->pipe = NULL; + sbi->pipefd = -1; + mutex_unlock(&sbi->wq_mutex); } static int autofs4_write(struct file *file, const void *addr, int bytes) @@ -89,10 +100,11 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, union autofs_packet_union v4_pkt; union autofs_v5_packet_union v5_pkt; } pkt; + struct file *pipe = NULL; size_t pktsz; DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d", - wq->wait_queue_token, wq->len, wq->name, type); + wq->wait_queue_token, wq->name.len, wq->name.name, type); memset(&pkt,0,sizeof pkt); /* For security reasons */ @@ -107,9 +119,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pktsz = sizeof(*mp); mp->wait_queue_token = wq->wait_queue_token; - mp->len = wq->len; - memcpy(mp->name, wq->name, wq->len); - mp->name[wq->len] = '\0'; + mp->len = wq->name.len; + memcpy(mp->name, wq->name.name, wq->name.len); + mp->name[wq->name.len] = '\0'; break; } case autofs_ptype_expire_multi: @@ -119,9 +131,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pktsz = sizeof(*ep); ep->wait_queue_token = wq->wait_queue_token; - ep->len = wq->len; - memcpy(ep->name, wq->name, wq->len); - ep->name[wq->len] = '\0'; + ep->len = wq->name.len; + memcpy(ep->name, wq->name.name, wq->name.len); + ep->name[wq->name.len] = '\0'; break; } /* @@ -138,9 +150,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pktsz = sizeof(*packet); packet->wait_queue_token = wq->wait_queue_token; - packet->len = wq->len; - memcpy(packet->name, wq->name, wq->len); - packet->name[wq->len] = '\0'; + packet->len = wq->name.len; + memcpy(packet->name, wq->name.name, wq->name.len); + packet->name[wq->name.len] = '\0'; packet->dev = wq->dev; packet->ino = wq->ino; packet->uid = wq->uid; @@ -154,8 +166,19 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, return; } - if (autofs4_write(sbi->pipe, &pkt, pktsz)) - autofs4_catatonic_mode(sbi); + /* Check if we have become catatonic */ + mutex_lock(&sbi->wq_mutex); + if (!sbi->catatonic) { + pipe = sbi->pipe; + get_file(pipe); + } + mutex_unlock(&sbi->wq_mutex); + + if (pipe) { + if (autofs4_write(pipe, &pkt, pktsz)) + autofs4_catatonic_mode(sbi); + fput(pipe); + } } static int autofs4_getpath(struct autofs_sb_info *sbi, @@ -191,58 +214,55 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, } static struct autofs_wait_queue * -autofs4_find_wait(struct autofs_sb_info *sbi, - char *name, unsigned int hash, unsigned int len) +autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr) { struct autofs_wait_queue *wq; for (wq = sbi->queues; wq; wq = wq->next) { - if (wq->hash == hash && - wq->len == len && - wq->name && !memcmp(wq->name, name, len)) + if (wq->name.hash == qstr->hash && + wq->name.len == qstr->len && + wq->name.name && + !memcmp(wq->name.name, qstr->name, qstr->len)) break; } return wq; } -int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, - enum autofs_notify notify) +/* + * Check if we have a valid request. + * Returns + * 1 if the request should continue. + * In this case we can return an autofs_wait_queue entry if one is + * found or NULL to idicate a new wait needs to be created. + * 0 or a negative errno if the request shouldn't continue. + */ +static int validate_request(struct autofs_wait_queue **wait, + struct autofs_sb_info *sbi, + struct qstr *qstr, + struct dentry*dentry, enum autofs_notify notify) { - struct autofs_info *ino; struct autofs_wait_queue *wq; - char *name; - unsigned int len = 0; - unsigned int hash = 0; - int status, type; - - /* In catatonic mode, we don't wait for nobody */ - if (sbi->catatonic) - return -ENOENT; - - name = kmalloc(NAME_MAX + 1, GFP_KERNEL); - if (!name) - return -ENOMEM; + struct autofs_info *ino; - /* If this is a direct mount request create a dummy name */ - if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT)) - len = sprintf(name, "%p", dentry); - else { - len = autofs4_getpath(sbi, dentry, &name); - if (!len) { - kfree(name); - return -ENOENT; - } + /* Wait in progress, continue; */ + wq = autofs4_find_wait(sbi, qstr); + if (wq) { + *wait = wq; + return 1; } - hash = full_name_hash(name, len); - if (mutex_lock_interruptible(&sbi->wq_mutex)) { - kfree(name); - return -EINTR; - } + *wait = NULL; - wq = autofs4_find_wait(sbi, name, hash, len); + /* If we don't yet have any info this is a new request */ ino = autofs4_dentry_ino(dentry); - if (!wq && ino && notify == NFY_NONE) { + if (!ino) + return 1; + + /* + * If we've been asked to wait on an existing expire (NFY_NONE) + * but there is no wait in the queue ... + */ + if (notify == NFY_NONE) { /* * Either we've betean the pending expire to post it's * wait or it finished while we waited on the mutex. @@ -253,13 +273,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, while (ino->flags & AUTOFS_INF_EXPIRING) { mutex_unlock(&sbi->wq_mutex); schedule_timeout_interruptible(HZ/10); - if (mutex_lock_interruptible(&sbi->wq_mutex)) { - kfree(name); + if (mutex_lock_interruptible(&sbi->wq_mutex)) return -EINTR; + + wq = autofs4_find_wait(sbi, qstr); + if (wq) { + *wait = wq; + return 1; } - wq = autofs4_find_wait(sbi, name, hash, len); - if (wq) - break; } /* @@ -267,18 +288,96 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, * cases where we wait on NFY_NONE neither depend on the * return status of the wait. */ - if (!wq) { + return 0; + } + + /* + * If we've been asked to trigger a mount and the request + * completed while we waited on the mutex ... + */ + if (notify == NFY_MOUNT) { + /* + * If the dentry isn't hashed just go ahead and try the + * mount again with a new wait (not much else we can do). + */ + if (!d_unhashed(dentry)) { + /* + * But if the dentry is hashed, that means that we + * got here through the revalidate path. Thus, we + * need to check if the dentry has been mounted + * while we waited on the wq_mutex. If it has, + * simply return success. + */ + if (d_mountpoint(dentry)) + return 0; + } + } + + return 1; +} + +int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, + enum autofs_notify notify) +{ + struct autofs_wait_queue *wq; + struct qstr qstr; + char *name; + int status, ret, type; + + /* In catatonic mode, we don't wait for nobody */ + if (sbi->catatonic) + return -ENOENT; + + if (!dentry->d_inode) { + /* + * A wait for a negative dentry is invalid for certain + * cases. A direct or offset mount "always" has its mount + * point directory created and so the request dentry must + * be positive or the map key doesn't exist. The situation + * is very similar for indirect mounts except only dentrys + * in the root of the autofs file system may be negative. + */ + if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET)) + return -ENOENT; + else if (!IS_ROOT(dentry->d_parent)) + return -ENOENT; + } + + name = kmalloc(NAME_MAX + 1, GFP_KERNEL); + if (!name) + return -ENOMEM; + + /* If this is a direct mount request create a dummy name */ + if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT)) + qstr.len = sprintf(name, "%p", dentry); + else { + qstr.len = autofs4_getpath(sbi, dentry, &name); + if (!qstr.len) { kfree(name); - mutex_unlock(&sbi->wq_mutex); - return 0; + return -ENOENT; } } + qstr.name = name; + qstr.hash = full_name_hash(name, qstr.len); + + if (mutex_lock_interruptible(&sbi->wq_mutex)) { + kfree(qstr.name); + return -EINTR; + } + + ret = validate_request(&wq, sbi, &qstr, dentry, notify); + if (ret <= 0) { + if (ret == 0) + mutex_unlock(&sbi->wq_mutex); + kfree(qstr.name); + return ret; + } if (!wq) { /* Create a new wait queue */ wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); if (!wq) { - kfree(name); + kfree(qstr.name); mutex_unlock(&sbi->wq_mutex); return -ENOMEM; } @@ -289,9 +388,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, wq->next = sbi->queues; sbi->queues = wq; init_waitqueue_head(&wq->queue); - wq->hash = hash; - wq->name = name; - wq->len = len; + memcpy(&wq->name, &qstr, sizeof(struct qstr)); wq->dev = autofs4_get_dev(sbi); wq->ino = autofs4_get_ino(sbi); wq->uid = current->uid; @@ -299,7 +396,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, wq->pid = current->pid; wq->tgid = current->tgid; wq->status = -EINTR; /* Status return if interrupted */ - atomic_set(&wq->wait_ctr, 2); + wq->wait_ctr = 2; mutex_unlock(&sbi->wq_mutex); if (sbi->version < 5) { @@ -319,28 +416,25 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, } DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", - (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); + (unsigned long) wq->wait_queue_token, wq->name.len, + wq->name.name, notify); /* autofs4_notify_daemon() may block */ autofs4_notify_daemon(sbi, wq, type); } else { - atomic_inc(&wq->wait_ctr); + wq->wait_ctr++; mutex_unlock(&sbi->wq_mutex); - kfree(name); + kfree(qstr.name); DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", - (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); - } - - /* wq->name is NULL if and only if the lock is already released */ - - if (sbi->catatonic) { - /* We might have slept, so check again for catatonic mode */ - wq->status = -ENOENT; - kfree(wq->name); - wq->name = NULL; + (unsigned long) wq->wait_queue_token, wq->name.len, + wq->name.name, notify); } - if (wq->name) { + /* + * wq->name.name is NULL iff the lock is already released + * or the mount has been made catatonic. + */ + if (wq->name.name) { /* Block all but "shutdown" signals while waiting */ sigset_t oldset; unsigned long irqflags; @@ -351,7 +445,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, recalc_sigpending(); spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); - wait_event_interruptible(wq->queue, wq->name == NULL); + wait_event_interruptible(wq->queue, wq->name.name == NULL); spin_lock_irqsave(¤t->sighand->siglock, irqflags); current->blocked = oldset; @@ -364,8 +458,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, status = wq->status; /* Are we the last process to need status? */ - if (atomic_dec_and_test(&wq->wait_ctr)) + mutex_lock(&sbi->wq_mutex); + if (!--wq->wait_ctr) kfree(wq); + mutex_unlock(&sbi->wq_mutex); return status; } @@ -387,16 +483,13 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok } *wql = wq->next; /* Unlink from chain */ - mutex_unlock(&sbi->wq_mutex); - kfree(wq->name); - wq->name = NULL; /* Do not wait on this queue */ - + kfree(wq->name.name); + wq->name.name = NULL; /* Do not wait on this queue */ wq->status = status; - - if (atomic_dec_and_test(&wq->wait_ctr)) /* Is anyone still waiting for this guy? */ + wake_up_interruptible(&wq->queue); + if (!--wq->wait_ctr) kfree(wq); - else - wake_up_interruptible(&wq->queue); + mutex_unlock(&sbi->wq_mutex); return 0; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d48ff5f370f..639d2d8b571 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -204,6 +204,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_GID, tsk->gid); NEW_AUX_ENT(AT_EGID, tsk->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); + NEW_AUX_ENT(AT_EXECFN, bprm->exec); if (k_platform) { NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 7191306367c..756205314c2 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -27,6 +27,7 @@ #include <linux/namei.h> #include <linux/mount.h> #include <linux/syscalls.h> +#include <linux/fs.h> #include <asm/uaccess.h> @@ -535,31 +536,16 @@ static ssize_t bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) { Node *e = file->f_path.dentry->d_inode->i_private; - loff_t pos = *ppos; ssize_t res; char *page; - int len; if (!(page = (char*) __get_free_page(GFP_KERNEL))) return -ENOMEM; entry_status(e, page); - len = strlen(page); - res = -EINVAL; - if (pos < 0) - goto out; - res = 0; - if (pos >= len) - goto out; - if (len < pos + nbytes) - nbytes = len - pos; - res = -EFAULT; - if (copy_to_user(buf, page + pos, nbytes)) - goto out; - *ppos = pos + nbytes; - res = nbytes; -out: + res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page)); + free_page((unsigned long) page); return res; } diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index e3eb3556622..40c36f7352a 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -362,8 +362,9 @@ static int init_coda_psdev(void) goto out_chrdev; } for (i = 0; i < MAX_CODADEVS; i++) - device_create(coda_psdev_class, NULL, - MKDEV(CODA_PSDEV_MAJOR,i), "cfs%d", i); + device_create_drvdata(coda_psdev_class, NULL, + MKDEV(CODA_PSDEV_MAJOR, i), + NULL, "cfs%d", i); coda_sysctl_init(); goto out; diff --git a/fs/compat.c b/fs/compat.c index ed43e17a5dc..106eba28ec5 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -197,8 +197,8 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs * { if (sizeof ubuf->f_blocks == 4) { - if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) & - 0xffffffff00000000ULL) + if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail | + kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL) return -EOVERFLOW; /* f_files and f_ffree may be -1; it's okay * to stuff that into 32 bits */ @@ -271,8 +271,8 @@ out: static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf) { if (sizeof ubuf->f_blocks == 4) { - if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) & - 0xffffffff00000000ULL) + if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail | + kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL) return -EOVERFLOW; /* f_files and f_ffree may be -1; it's okay * to stuff that into 32 bits */ @@ -2131,9 +2131,9 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, #ifdef CONFIG_SIGNALFD -asmlinkage long compat_sys_signalfd(int ufd, - const compat_sigset_t __user *sigmask, - compat_size_t sigsetsize) +asmlinkage long compat_sys_signalfd4(int ufd, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize, int flags) { compat_sigset_t ss32; sigset_t tmp; @@ -2148,9 +2148,15 @@ asmlinkage long compat_sys_signalfd(int ufd, if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t))) return -EFAULT; - return sys_signalfd(ufd, ksigmask, sizeof(sigset_t)); + return sys_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags); } +asmlinkage long compat_sys_signalfd(int ufd, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize) +{ + return compat_sys_signalfd4(ufd, sigmask, sigsetsize, 0); +} #endif /* CONFIG_SIGNALFD */ #ifdef CONFIG_TIMERFD diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c54eaab71a1..18e2c548161 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -58,7 +58,6 @@ #include <linux/syscalls.h> #include <linux/i2c.h> #include <linux/i2c-dev.h> -#include <linux/wireless.h> #include <linux/atalk.h> #include <linux/loop.h> @@ -1759,64 +1758,6 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a return sys_ioctl(fd, cmd, (unsigned long)tdata); } -struct compat_iw_point { - compat_caddr_t pointer; - __u16 length; - __u16 flags; -}; - -static int do_wireless_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - struct iwreq __user *iwr; - struct iwreq __user *iwr_u; - struct iw_point __user *iwp; - struct compat_iw_point __user *iwp_u; - compat_caddr_t pointer_u; - void __user *pointer; - __u16 length, flags; - int ret; - - iwr_u = compat_ptr(arg); - iwp_u = (struct compat_iw_point __user *) &iwr_u->u.data; - iwr = compat_alloc_user_space(sizeof(*iwr)); - if (iwr == NULL) - return -ENOMEM; - - iwp = &iwr->u.data; - - if (!access_ok(VERIFY_WRITE, iwr, sizeof(*iwr))) - return -EFAULT; - - if (__copy_in_user(&iwr->ifr_ifrn.ifrn_name[0], - &iwr_u->ifr_ifrn.ifrn_name[0], - sizeof(iwr->ifr_ifrn.ifrn_name))) - return -EFAULT; - - if (__get_user(pointer_u, &iwp_u->pointer) || - __get_user(length, &iwp_u->length) || - __get_user(flags, &iwp_u->flags)) - return -EFAULT; - - if (__put_user(compat_ptr(pointer_u), &iwp->pointer) || - __put_user(length, &iwp->length) || - __put_user(flags, &iwp->flags)) - return -EFAULT; - - ret = sys_ioctl(fd, cmd, (unsigned long) iwr); - - if (__get_user(pointer, &iwp->pointer) || - __get_user(length, &iwp->length) || - __get_user(flags, &iwp->flags)) - return -EFAULT; - - if (__put_user(ptr_to_compat(pointer), &iwp_u->pointer) || - __put_user(length, &iwp_u->length) || - __put_user(flags, &iwp_u->flags)) - return -EFAULT; - - return ret; -} - /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE * for some operations; this forces use of the newer bridge-utils that * use compatiable ioctls @@ -2356,8 +2297,6 @@ COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER) COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE) COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI) COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER) -COMPATIBLE_IOCTL(AUTOFS_IOC_ASKREGHOST) -COMPATIBLE_IOCTL(AUTOFS_IOC_TOGGLEREGHOST) COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT) /* Raw devices */ COMPATIBLE_IOCTL(RAW_SETBIND) @@ -2405,6 +2344,7 @@ COMPATIBLE_IOCTL(HCIGETDEVLIST) COMPATIBLE_IOCTL(HCIGETDEVINFO) COMPATIBLE_IOCTL(HCIGETCONNLIST) COMPATIBLE_IOCTL(HCIGETCONNINFO) +COMPATIBLE_IOCTL(HCIGETAUTHINFO) COMPATIBLE_IOCTL(HCISETRAW) COMPATIBLE_IOCTL(HCISETSCAN) COMPATIBLE_IOCTL(HCISETAUTH) @@ -2501,36 +2441,6 @@ COMPATIBLE_IOCTL(I2C_TENBIT) COMPATIBLE_IOCTL(I2C_PEC) COMPATIBLE_IOCTL(I2C_RETRIES) COMPATIBLE_IOCTL(I2C_TIMEOUT) -/* wireless */ -COMPATIBLE_IOCTL(SIOCSIWCOMMIT) -COMPATIBLE_IOCTL(SIOCGIWNAME) -COMPATIBLE_IOCTL(SIOCSIWNWID) -COMPATIBLE_IOCTL(SIOCGIWNWID) -COMPATIBLE_IOCTL(SIOCSIWFREQ) -COMPATIBLE_IOCTL(SIOCGIWFREQ) -COMPATIBLE_IOCTL(SIOCSIWMODE) -COMPATIBLE_IOCTL(SIOCGIWMODE) -COMPATIBLE_IOCTL(SIOCSIWSENS) -COMPATIBLE_IOCTL(SIOCGIWSENS) -COMPATIBLE_IOCTL(SIOCSIWRANGE) -COMPATIBLE_IOCTL(SIOCSIWPRIV) -COMPATIBLE_IOCTL(SIOCSIWSTATS) -COMPATIBLE_IOCTL(SIOCSIWAP) -COMPATIBLE_IOCTL(SIOCGIWAP) -COMPATIBLE_IOCTL(SIOCSIWRATE) -COMPATIBLE_IOCTL(SIOCGIWRATE) -COMPATIBLE_IOCTL(SIOCSIWRTS) -COMPATIBLE_IOCTL(SIOCGIWRTS) -COMPATIBLE_IOCTL(SIOCSIWFRAG) -COMPATIBLE_IOCTL(SIOCGIWFRAG) -COMPATIBLE_IOCTL(SIOCSIWTXPOW) -COMPATIBLE_IOCTL(SIOCGIWTXPOW) -COMPATIBLE_IOCTL(SIOCSIWRETRY) -COMPATIBLE_IOCTL(SIOCGIWRETRY) -COMPATIBLE_IOCTL(SIOCSIWPOWER) -COMPATIBLE_IOCTL(SIOCGIWPOWER) -COMPATIBLE_IOCTL(SIOCSIWAUTH) -COMPATIBLE_IOCTL(SIOCGIWAUTH) /* hiddev */ COMPATIBLE_IOCTL(HIDIOCGVERSION) COMPATIBLE_IOCTL(HIDIOCAPPLICATION) @@ -2761,29 +2671,7 @@ COMPATIBLE_IOCTL(USBDEVFS_IOCTL32) HANDLE_IOCTL(I2C_FUNCS, w_long) HANDLE_IOCTL(I2C_RDWR, do_i2c_rdwr_ioctl) HANDLE_IOCTL(I2C_SMBUS, do_i2c_smbus_ioctl) -/* wireless */ -HANDLE_IOCTL(SIOCGIWRANGE, do_wireless_ioctl) -HANDLE_IOCTL(SIOCGIWPRIV, do_wireless_ioctl) -HANDLE_IOCTL(SIOCGIWSTATS, do_wireless_ioctl) -HANDLE_IOCTL(SIOCSIWSPY, do_wireless_ioctl) -HANDLE_IOCTL(SIOCGIWSPY, do_wireless_ioctl) -HANDLE_IOCTL(SIOCSIWTHRSPY, do_wireless_ioctl) -HANDLE_IOCTL(SIOCGIWTHRSPY, do_wireless_ioctl) -HANDLE_IOCTL(SIOCSIWMLME, do_wireless_ioctl) -HANDLE_IOCTL(SIOCGIWAPLIST, do_wireless_ioctl) -HANDLE_IOCTL(SIOCSIWSCAN, do_wireless_ioctl) -HANDLE_IOCTL(SIOCGIWSCAN, do_wireless_ioctl) -HANDLE_IOCTL(SIOCSIWESSID, do_wireless_ioctl) -HANDLE_IOCTL(SIOCGIWESSID, do_wireless_ioctl) -HANDLE_IOCTL(SIOCSIWNICKN, do_wireless_ioctl) -HANDLE_IOCTL(SIOCGIWNICKN, do_wireless_ioctl) -HANDLE_IOCTL(SIOCSIWENCODE, do_wireless_ioctl) -HANDLE_IOCTL(SIOCGIWENCODE, do_wireless_ioctl) -HANDLE_IOCTL(SIOCSIWGENIE, do_wireless_ioctl) -HANDLE_IOCTL(SIOCGIWGENIE, do_wireless_ioctl) -HANDLE_IOCTL(SIOCSIWENCODEEXT, do_wireless_ioctl) -HANDLE_IOCTL(SIOCGIWENCODEEXT, do_wireless_ioctl) -HANDLE_IOCTL(SIOCSIWPMKSA, do_wireless_ioctl) +/* bridge */ HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl) HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl) /* Not implemented in the native kernel */ diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 0e64312a084..179589be063 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1027,9 +1027,10 @@ EXPORT_SYMBOL(configfs_undepend_item); static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - int ret, module_got = 0; - struct config_group *group; - struct config_item *item; + int ret = 0; + int module_got = 0; + struct config_group *group = NULL; + struct config_item *item = NULL; struct config_item *parent_item; struct configfs_subsystem *subsys; struct configfs_dirent *sd; @@ -1070,25 +1071,30 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); mutex_lock(&subsys->su_mutex); - group = NULL; - item = NULL; if (type->ct_group_ops->make_group) { - ret = type->ct_group_ops->make_group(to_config_group(parent_item), name, &group); - if (!ret) { + group = type->ct_group_ops->make_group(to_config_group(parent_item), name); + if (!group) + group = ERR_PTR(-ENOMEM); + if (!IS_ERR(group)) { link_group(to_config_group(parent_item), group); item = &group->cg_item; - } + } else + ret = PTR_ERR(group); } else { - ret = type->ct_group_ops->make_item(to_config_group(parent_item), name, &item); - if (!ret) + item = type->ct_group_ops->make_item(to_config_group(parent_item), name); + if (!item) + item = ERR_PTR(-ENOMEM); + if (!IS_ERR(item)) link_obj(parent_item, item); + else + ret = PTR_ERR(item); } mutex_unlock(&subsys->su_mutex); kfree(name); if (ret) { /* - * If ret != 0, then link_obj() was never called. + * If item == NULL, then link_obj() was never called. * There are no extra references to clean up. */ goto out_put; diff --git a/fs/dcache.c b/fs/dcache.c index 6068c25b393..3818d6ab76c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -61,7 +61,6 @@ static struct kmem_cache *dentry_cache __read_mostly; static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; static struct hlist_head *dentry_hashtable __read_mostly; -static LIST_HEAD(dentry_unused); /* Statistics gathering. */ struct dentry_stat_t dentry_stat = { @@ -96,14 +95,6 @@ static void d_free(struct dentry *dentry) call_rcu(&dentry->d_u.d_rcu, d_callback); } -static void dentry_lru_remove(struct dentry *dentry) -{ - if (!list_empty(&dentry->d_lru)) { - list_del_init(&dentry->d_lru); - dentry_stat.nr_unused--; - } -} - /* * Release the dentry's inode, using the filesystem * d_iput() operation if defined. @@ -130,6 +121,41 @@ static void dentry_iput(struct dentry * dentry) } } +/* + * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held. + */ +static void dentry_lru_add(struct dentry *dentry) +{ + list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); + dentry->d_sb->s_nr_dentry_unused++; + dentry_stat.nr_unused++; +} + +static void dentry_lru_add_tail(struct dentry *dentry) +{ + list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); + dentry->d_sb->s_nr_dentry_unused++; + dentry_stat.nr_unused++; +} + +static void dentry_lru_del(struct dentry *dentry) +{ + if (!list_empty(&dentry->d_lru)) { + list_del(&dentry->d_lru); + dentry->d_sb->s_nr_dentry_unused--; + dentry_stat.nr_unused--; + } +} + +static void dentry_lru_del_init(struct dentry *dentry) +{ + if (likely(!list_empty(&dentry->d_lru))) { + list_del_init(&dentry->d_lru); + dentry->d_sb->s_nr_dentry_unused--; + dentry_stat.nr_unused--; + } +} + /** * d_kill - kill dentry and return parent * @dentry: dentry to kill @@ -212,8 +238,7 @@ repeat: goto kill_it; if (list_empty(&dentry->d_lru)) { dentry->d_flags |= DCACHE_REFERENCED; - list_add(&dentry->d_lru, &dentry_unused); - dentry_stat.nr_unused++; + dentry_lru_add(dentry); } spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); @@ -222,7 +247,8 @@ repeat: unhash_it: __d_drop(dentry); kill_it: - dentry_lru_remove(dentry); + /* if dentry was on the d_lru list delete it from there */ + dentry_lru_del(dentry); dentry = d_kill(dentry); if (dentry) goto repeat; @@ -290,7 +316,7 @@ int d_invalidate(struct dentry * dentry) static inline struct dentry * __dget_locked(struct dentry *dentry) { atomic_inc(&dentry->d_count); - dentry_lru_remove(dentry); + dentry_lru_del_init(dentry); return dentry; } @@ -406,133 +432,167 @@ static void prune_one_dentry(struct dentry * dentry) if (dentry->d_op && dentry->d_op->d_delete) dentry->d_op->d_delete(dentry); - dentry_lru_remove(dentry); + dentry_lru_del_init(dentry); __d_drop(dentry); dentry = d_kill(dentry); spin_lock(&dcache_lock); } } -/** - * prune_dcache - shrink the dcache - * @count: number of entries to try and free - * @sb: if given, ignore dentries for other superblocks - * which are being unmounted. - * - * Shrink the dcache. This is done when we need - * more memory, or simply when we need to unmount - * something (at which point we need to unuse - * all dentries). - * - * This function may fail to free any resources if - * all the dentries are in use. +/* + * Shrink the dentry LRU on a given superblock. + * @sb : superblock to shrink dentry LRU. + * @count: If count is NULL, we prune all dentries on superblock. + * @flags: If flags is non-zero, we need to do special processing based on + * which flags are set. This means we don't need to maintain multiple + * similar copies of this loop. */ - -static void prune_dcache(int count, struct super_block *sb) +static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) { - spin_lock(&dcache_lock); - for (; count ; count--) { - struct dentry *dentry; - struct list_head *tmp; - struct rw_semaphore *s_umount; - - cond_resched_lock(&dcache_lock); + LIST_HEAD(referenced); + LIST_HEAD(tmp); + struct dentry *dentry; + int cnt = 0; - tmp = dentry_unused.prev; - if (sb) { - /* Try to find a dentry for this sb, but don't try - * too hard, if they aren't near the tail they will - * be moved down again soon + BUG_ON(!sb); + BUG_ON((flags & DCACHE_REFERENCED) && count == NULL); + spin_lock(&dcache_lock); + if (count != NULL) + /* called from prune_dcache() and shrink_dcache_parent() */ + cnt = *count; +restart: + if (count == NULL) + list_splice_init(&sb->s_dentry_lru, &tmp); + else { + while (!list_empty(&sb->s_dentry_lru)) { + dentry = list_entry(sb->s_dentry_lru.prev, + struct dentry, d_lru); + BUG_ON(dentry->d_sb != sb); + + spin_lock(&dentry->d_lock); + /* + * If we are honouring the DCACHE_REFERENCED flag and + * the dentry has this flag set, don't free it. Clear + * the flag and put it back on the LRU. */ - int skip = count; - while (skip && tmp != &dentry_unused && - list_entry(tmp, struct dentry, d_lru)->d_sb != sb) { - skip--; - tmp = tmp->prev; + if ((flags & DCACHE_REFERENCED) + && (dentry->d_flags & DCACHE_REFERENCED)) { + dentry->d_flags &= ~DCACHE_REFERENCED; + list_move_tail(&dentry->d_lru, &referenced); + spin_unlock(&dentry->d_lock); + } else { + list_move_tail(&dentry->d_lru, &tmp); + spin_unlock(&dentry->d_lock); + cnt--; + if (!cnt) + break; } } - if (tmp == &dentry_unused) - break; - list_del_init(tmp); - prefetch(dentry_unused.prev); - dentry_stat.nr_unused--; - dentry = list_entry(tmp, struct dentry, d_lru); - - spin_lock(&dentry->d_lock); + } + while (!list_empty(&tmp)) { + dentry = list_entry(tmp.prev, struct dentry, d_lru); + dentry_lru_del_init(dentry); + spin_lock(&dentry->d_lock); /* * We found an inuse dentry which was not removed from - * dentry_unused because of laziness during lookup. Do not free - * it - just keep it off the dentry_unused list. + * the LRU because of laziness during lookup. Do not free + * it - just keep it off the LRU list. */ - if (atomic_read(&dentry->d_count)) { - spin_unlock(&dentry->d_lock); + if (atomic_read(&dentry->d_count)) { + spin_unlock(&dentry->d_lock); continue; } - /* If the dentry was recently referenced, don't free it. */ - if (dentry->d_flags & DCACHE_REFERENCED) { - dentry->d_flags &= ~DCACHE_REFERENCED; - list_add(&dentry->d_lru, &dentry_unused); - dentry_stat.nr_unused++; - spin_unlock(&dentry->d_lock); + prune_one_dentry(dentry); + /* dentry->d_lock was dropped in prune_one_dentry() */ + cond_resched_lock(&dcache_lock); + } + if (count == NULL && !list_empty(&sb->s_dentry_lru)) + goto restart; + if (count != NULL) + *count = cnt; + if (!list_empty(&referenced)) + list_splice(&referenced, &sb->s_dentry_lru); + spin_unlock(&dcache_lock); +} + +/** + * prune_dcache - shrink the dcache + * @count: number of entries to try to free + * + * Shrink the dcache. This is done when we need more memory, or simply when we + * need to unmount something (at which point we need to unuse all dentries). + * + * This function may fail to free any resources if all the dentries are in use. + */ +static void prune_dcache(int count) +{ + struct super_block *sb; + int w_count; + int unused = dentry_stat.nr_unused; + int prune_ratio; + int pruned; + + if (unused == 0 || count == 0) + return; + spin_lock(&dcache_lock); +restart: + if (count >= unused) + prune_ratio = 1; + else + prune_ratio = unused / count; + spin_lock(&sb_lock); + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_nr_dentry_unused == 0) continue; - } - /* - * If the dentry is not DCACHED_REFERENCED, it is time - * to remove it from the dcache, provided the super block is - * NULL (which means we are trying to reclaim memory) - * or this dentry belongs to the same super block that - * we want to shrink. - */ - /* - * If this dentry is for "my" filesystem, then I can prune it - * without taking the s_umount lock (I already hold it). + sb->s_count++; + /* Now, we reclaim unused dentrins with fairness. + * We reclaim them same percentage from each superblock. + * We calculate number of dentries to scan on this sb + * as follows, but the implementation is arranged to avoid + * overflows: + * number of dentries to scan on this sb = + * count * (number of dentries on this sb / + * number of dentries in the machine) */ - if (sb && dentry->d_sb == sb) { - prune_one_dentry(dentry); - continue; - } + spin_unlock(&sb_lock); + if (prune_ratio != 1) + w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1; + else + w_count = sb->s_nr_dentry_unused; + pruned = w_count; /* - * ...otherwise we need to be sure this filesystem isn't being - * unmounted, otherwise we could race with - * generic_shutdown_super(), and end up holding a reference to - * an inode while the filesystem is unmounted. - * So we try to get s_umount, and make sure s_root isn't NULL. - * (Take a local copy of s_umount to avoid a use-after-free of - * `dentry'). + * We need to be sure this filesystem isn't being unmounted, + * otherwise we could race with generic_shutdown_super(), and + * end up holding a reference to an inode while the filesystem + * is unmounted. So we try to get s_umount, and make sure + * s_root isn't NULL. */ - s_umount = &dentry->d_sb->s_umount; - if (down_read_trylock(s_umount)) { - if (dentry->d_sb->s_root != NULL) { - prune_one_dentry(dentry); - up_read(s_umount); - continue; + if (down_read_trylock(&sb->s_umount)) { + if ((sb->s_root != NULL) && + (!list_empty(&sb->s_dentry_lru))) { + spin_unlock(&dcache_lock); + __shrink_dcache_sb(sb, &w_count, + DCACHE_REFERENCED); + pruned -= w_count; + spin_lock(&dcache_lock); } - up_read(s_umount); + up_read(&sb->s_umount); } - spin_unlock(&dentry->d_lock); + spin_lock(&sb_lock); + count -= pruned; /* - * Insert dentry at the head of the list as inserting at the - * tail leads to a cycle. + * restart only when sb is no longer on the list and + * we have more work to do. */ - list_add(&dentry->d_lru, &dentry_unused); - dentry_stat.nr_unused++; + if (__put_super_and_need_restart(sb) && count > 0) { + spin_unlock(&sb_lock); + goto restart; + } } + spin_unlock(&sb_lock); spin_unlock(&dcache_lock); } -/* - * Shrink the dcache for the specified super block. - * This allows us to unmount a device without disturbing - * the dcache for the other devices. - * - * This implementation makes just two traversals of the - * unused list. On the first pass we move the selected - * dentries to the most recent end, and on the second - * pass we free them. The second pass must restart after - * each dput(), but since the target dentries are all at - * the end, it's really just a single traversal. - */ - /** * shrink_dcache_sb - shrink dcache for a superblock * @sb: superblock @@ -541,44 +601,9 @@ static void prune_dcache(int count, struct super_block *sb) * is used to free the dcache before unmounting a file * system */ - void shrink_dcache_sb(struct super_block * sb) { - struct list_head *tmp, *next; - struct dentry *dentry; - - /* - * Pass one ... move the dentries for the specified - * superblock to the most recent end of the unused list. - */ - spin_lock(&dcache_lock); - list_for_each_prev_safe(tmp, next, &dentry_unused) { - dentry = list_entry(tmp, struct dentry, d_lru); - if (dentry->d_sb != sb) - continue; - list_move_tail(tmp, &dentry_unused); - } - - /* - * Pass two ... free the dentries for this superblock. - */ -repeat: - list_for_each_prev_safe(tmp, next, &dentry_unused) { - dentry = list_entry(tmp, struct dentry, d_lru); - if (dentry->d_sb != sb) - continue; - dentry_stat.nr_unused--; - list_del_init(tmp); - spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count)) { - spin_unlock(&dentry->d_lock); - continue; - } - prune_one_dentry(dentry); - cond_resched_lock(&dcache_lock); - goto repeat; - } - spin_unlock(&dcache_lock); + __shrink_dcache_sb(sb, NULL, 0); } /* @@ -595,7 +620,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* detach this root from the system */ spin_lock(&dcache_lock); - dentry_lru_remove(dentry); + dentry_lru_del_init(dentry); __d_drop(dentry); spin_unlock(&dcache_lock); @@ -609,7 +634,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) spin_lock(&dcache_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { - dentry_lru_remove(loop); + dentry_lru_del_init(loop); __d_drop(loop); cond_resched_lock(&dcache_lock); } @@ -791,14 +816,13 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - dentry_lru_remove(dentry); + dentry_lru_del_init(dentry); /* * move only zero ref count dentries to the end * of the unused list for prune_dcache */ if (!atomic_read(&dentry->d_count)) { - list_add_tail(&dentry->d_lru, &dentry_unused); - dentry_stat.nr_unused++; + dentry_lru_add_tail(dentry); found++; } @@ -840,10 +864,11 @@ out: void shrink_dcache_parent(struct dentry * parent) { + struct super_block *sb = parent->d_sb; int found; while ((found = select_parent(parent)) != 0) - prune_dcache(found, parent->d_sb); + __shrink_dcache_sb(sb, &found, 0); } /* @@ -863,7 +888,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask) if (nr) { if (!(gfp_mask & __GFP_FS)) return -1; - prune_dcache(nr, NULL); + prune_dcache(nr); } return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; } @@ -1215,7 +1240,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while * lookup is going on. * - * dentry_unused list is not updated even if lookup finds the required dentry + * The dentry unused LRU is not updated even if lookup finds the required dentry * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, * select_parent and __dget_locked. This laziness saves lookup from dcache_lock * acquisition. diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index e9602d85c11..08e28c9bb41 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -309,6 +309,31 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, } EXPORT_SYMBOL_GPL(debugfs_create_symlink); +static void __debugfs_remove(struct dentry *dentry, struct dentry *parent) +{ + int ret = 0; + + if (debugfs_positive(dentry)) { + if (dentry->d_inode) { + dget(dentry); + switch (dentry->d_inode->i_mode & S_IFMT) { + case S_IFDIR: + ret = simple_rmdir(parent->d_inode, dentry); + break; + case S_IFLNK: + kfree(dentry->d_inode->i_private); + /* fall through */ + default: + simple_unlink(parent->d_inode, dentry); + break; + } + if (!ret) + d_delete(dentry); + dput(dentry); + } + } +} + /** * debugfs_remove - removes a file or directory from the debugfs filesystem * @dentry: a pointer to a the dentry of the file or directory to be @@ -325,7 +350,6 @@ EXPORT_SYMBOL_GPL(debugfs_create_symlink); void debugfs_remove(struct dentry *dentry) { struct dentry *parent; - int ret = 0; if (!dentry) return; @@ -335,29 +359,83 @@ void debugfs_remove(struct dentry *dentry) return; mutex_lock(&parent->d_inode->i_mutex); - if (debugfs_positive(dentry)) { - if (dentry->d_inode) { - dget(dentry); - switch (dentry->d_inode->i_mode & S_IFMT) { - case S_IFDIR: - ret = simple_rmdir(parent->d_inode, dentry); - break; - case S_IFLNK: - kfree(dentry->d_inode->i_private); - /* fall through */ - default: - simple_unlink(parent->d_inode, dentry); + __debugfs_remove(dentry, parent); + mutex_unlock(&parent->d_inode->i_mutex); + simple_release_fs(&debugfs_mount, &debugfs_mount_count); +} +EXPORT_SYMBOL_GPL(debugfs_remove); + +/** + * debugfs_remove_recursive - recursively removes a directory + * @dentry: a pointer to a the dentry of the directory to be removed. + * + * This function recursively removes a directory tree in debugfs that + * was previously created with a call to another debugfs function + * (like debugfs_create_file() or variants thereof.) + * + * This function is required to be called in order for the file to be + * removed, no automatic cleanup of files will happen when a module is + * removed, you are responsible here. + */ +void debugfs_remove_recursive(struct dentry *dentry) +{ + struct dentry *child; + struct dentry *parent; + + if (!dentry) + return; + + parent = dentry->d_parent; + if (!parent || !parent->d_inode) + return; + + parent = dentry; + mutex_lock(&parent->d_inode->i_mutex); + + while (1) { + /* + * When all dentries under "parent" has been removed, + * walk up the tree until we reach our starting point. + */ + if (list_empty(&parent->d_subdirs)) { + mutex_unlock(&parent->d_inode->i_mutex); + if (parent == dentry) break; - } - if (!ret) - d_delete(dentry); - dput(dentry); + parent = parent->d_parent; + mutex_lock(&parent->d_inode->i_mutex); + } + child = list_entry(parent->d_subdirs.next, struct dentry, + d_u.d_child); + + /* + * If "child" isn't empty, walk down the tree and + * remove all its descendants first. + */ + if (!list_empty(&child->d_subdirs)) { + mutex_unlock(&parent->d_inode->i_mutex); + parent = child; + mutex_lock(&parent->d_inode->i_mutex); + continue; } + __debugfs_remove(child, parent); + if (parent->d_subdirs.next == &child->d_u.d_child) { + /* + * Avoid infinite loop if we fail to remove + * one dentry. + */ + mutex_unlock(&parent->d_inode->i_mutex); + break; + } + simple_release_fs(&debugfs_mount, &debugfs_mount_count); } + + parent = dentry->d_parent; + mutex_lock(&parent->d_inode->i_mutex); + __debugfs_remove(dentry, parent); mutex_unlock(&parent->d_inode->i_mutex); simple_release_fs(&debugfs_mount, &debugfs_mount_count); } -EXPORT_SYMBOL_GPL(debugfs_remove); +EXPORT_SYMBOL_GPL(debugfs_remove_recursive); /** * debugfs_rename - rename a file/directory in the debugfs filesystem diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 492d8caaaf2..c4e7d721bd8 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -41,20 +41,16 @@ struct comm; struct nodes; struct node; -static int make_cluster(struct config_group *, const char *, - struct config_group **); +static struct config_group *make_cluster(struct config_group *, const char *); static void drop_cluster(struct config_group *, struct config_item *); static void release_cluster(struct config_item *); -static int make_space(struct config_group *, const char *, - struct config_group **); +static struct config_group *make_space(struct config_group *, const char *); static void drop_space(struct config_group *, struct config_item *); static void release_space(struct config_item *); -static int make_comm(struct config_group *, const char *, - struct config_item **); +static struct config_item *make_comm(struct config_group *, const char *); static void drop_comm(struct config_group *, struct config_item *); static void release_comm(struct config_item *); -static int make_node(struct config_group *, const char *, - struct config_item **); +static struct config_item *make_node(struct config_group *, const char *); static void drop_node(struct config_group *, struct config_item *); static void release_node(struct config_item *); @@ -396,8 +392,8 @@ static struct node *to_node(struct config_item *i) return i ? container_of(i, struct node, item) : NULL; } -static int make_cluster(struct config_group *g, const char *name, - struct config_group **new_g) +static struct config_group *make_cluster(struct config_group *g, + const char *name) { struct cluster *cl = NULL; struct spaces *sps = NULL; @@ -435,15 +431,14 @@ static int make_cluster(struct config_group *g, const char *name, space_list = &sps->ss_group; comm_list = &cms->cs_group; - *new_g = &cl->group; - return 0; + return &cl->group; fail: kfree(cl); kfree(gps); kfree(sps); kfree(cms); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } static void drop_cluster(struct config_group *g, struct config_item *i) @@ -471,8 +466,7 @@ static void release_cluster(struct config_item *i) kfree(cl); } -static int make_space(struct config_group *g, const char *name, - struct config_group **new_g) +static struct config_group *make_space(struct config_group *g, const char *name) { struct space *sp = NULL; struct nodes *nds = NULL; @@ -495,14 +489,13 @@ static int make_space(struct config_group *g, const char *name, INIT_LIST_HEAD(&sp->members); mutex_init(&sp->members_lock); sp->members_count = 0; - *new_g = &sp->group; - return 0; + return &sp->group; fail: kfree(sp); kfree(gps); kfree(nds); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } static void drop_space(struct config_group *g, struct config_item *i) @@ -529,21 +522,19 @@ static void release_space(struct config_item *i) kfree(sp); } -static int make_comm(struct config_group *g, const char *name, - struct config_item **new_i) +static struct config_item *make_comm(struct config_group *g, const char *name) { struct comm *cm; cm = kzalloc(sizeof(struct comm), GFP_KERNEL); if (!cm) - return -ENOMEM; + return ERR_PTR(-ENOMEM); config_item_init_type_name(&cm->item, name, &comm_type); cm->nodeid = -1; cm->local = 0; cm->addr_count = 0; - *new_i = &cm->item; - return 0; + return &cm->item; } static void drop_comm(struct config_group *g, struct config_item *i) @@ -563,15 +554,14 @@ static void release_comm(struct config_item *i) kfree(cm); } -static int make_node(struct config_group *g, const char *name, - struct config_item **new_i) +static struct config_item *make_node(struct config_group *g, const char *name) { struct space *sp = to_space(g->cg_item.ci_parent); struct node *nd; nd = kzalloc(sizeof(struct node), GFP_KERNEL); if (!nd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); config_item_init_type_name(&nd->item, name, &node_type); nd->nodeid = -1; @@ -583,8 +573,7 @@ static int make_node(struct config_group *g, const char *name, sp->members_count++; mutex_unlock(&sp->members_lock); - *new_i = &nd->item; - return 0; + return &nd->item; } static void drop_node(struct config_group *g, struct config_item *i) diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index 1e34a7fd488..b4755a85996 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o -ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o +ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o kthread.o debug.o diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index e2832bc7869..7b99917ffad 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -33,6 +33,7 @@ #include <linux/crypto.h> #include <linux/file.h> #include <linux/scatterlist.h> +#include <asm/unaligned.h> #include "ecryptfs_kernel.h" static int @@ -1032,10 +1033,8 @@ static int contains_ecryptfs_marker(char *data) { u32 m_1, m_2; - memcpy(&m_1, data, 4); - m_1 = be32_to_cpu(m_1); - memcpy(&m_2, (data + 4), 4); - m_2 = be32_to_cpu(m_2); + m_1 = get_unaligned_be32(data); + m_2 = get_unaligned_be32(data + 4); if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2) return 1; ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; " @@ -1073,8 +1072,7 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat, int i; u32 flags; - memcpy(&flags, page_virt, 4); - flags = be32_to_cpu(flags); + flags = get_unaligned_be32(page_virt); for (i = 0; i < ((sizeof(ecryptfs_flag_map) / sizeof(struct ecryptfs_flag_map_elem))); i++) if (flags & ecryptfs_flag_map[i].file_flag) { @@ -1100,11 +1098,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written) get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER); - m_1 = cpu_to_be32(m_1); - memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); - m_2 = cpu_to_be32(m_2); - memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2, - (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); + put_unaligned_be32(m_1, page_virt); + page_virt += (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2); + put_unaligned_be32(m_2, page_virt); (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; } @@ -1121,8 +1117,7 @@ write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, flags |= ecryptfs_flag_map[i].file_flag; /* Version is in top 8 bits of the 32-bit flag vector */ flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000); - flags = cpu_to_be32(flags); - memcpy(page_virt, &flags, 4); + put_unaligned_be32(flags, page_virt); (*written) = 4; } @@ -1238,11 +1233,9 @@ ecryptfs_write_header_metadata(char *virt, num_header_extents_at_front = (u16)(crypt_stat->num_header_bytes_at_front / crypt_stat->extent_size); - header_extent_size = cpu_to_be32(header_extent_size); - memcpy(virt, &header_extent_size, 4); + put_unaligned_be32(header_extent_size, virt); virt += 4; - num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front); - memcpy(virt, &num_header_extents_at_front, 2); + put_unaligned_be16(num_header_extents_at_front, virt); (*written) = 6; } @@ -1410,15 +1403,13 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, u32 header_extent_size; u16 num_header_extents_at_front; - memcpy(&header_extent_size, virt, sizeof(u32)); - header_extent_size = be32_to_cpu(header_extent_size); - virt += sizeof(u32); - memcpy(&num_header_extents_at_front, virt, sizeof(u16)); - num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front); + header_extent_size = get_unaligned_be32(virt); + virt += sizeof(__be32); + num_header_extents_at_front = get_unaligned_be16(virt); crypt_stat->num_header_bytes_at_front = (((size_t)num_header_extents_at_front * (size_t)header_extent_size)); - (*bytes_read) = (sizeof(u32) + sizeof(u16)); + (*bytes_read) = (sizeof(__be32) + sizeof(__be16)); if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) && (crypt_stat->num_header_bytes_at_front < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index c15c25745e0..b73fb752c5f 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -559,10 +559,25 @@ extern struct kmem_cache *ecryptfs_key_record_cache; extern struct kmem_cache *ecryptfs_key_sig_cache; extern struct kmem_cache *ecryptfs_global_auth_tok_cache; extern struct kmem_cache *ecryptfs_key_tfm_cache; +extern struct kmem_cache *ecryptfs_open_req_cache; +struct ecryptfs_open_req { +#define ECRYPTFS_REQ_PROCESSED 0x00000001 +#define ECRYPTFS_REQ_DROPPED 0x00000002 +#define ECRYPTFS_REQ_ZOMBIE 0x00000004 + u32 flags; + struct file **lower_file; + struct dentry *lower_dentry; + struct vfsmount *lower_mnt; + wait_queue_head_t wait; + struct mutex mux; + struct list_head kthread_ctl_list; +}; + +#define ECRYPTFS_INTERPOSE_FLAG_D_ADD 0x00000001 int ecryptfs_interpose(struct dentry *hidden_dentry, struct dentry *this_dentry, struct super_block *sb, - int flag); + u32 flags); int ecryptfs_fill_zeros(struct file *file, loff_t new_length); int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, const char *name, int length, @@ -690,5 +705,11 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx); int ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, struct user_namespace *user_ns, struct pid *pid); +int ecryptfs_init_kthread(void); +void ecryptfs_destroy_kthread(void); +int ecryptfs_privileged_open(struct file **lower_file, + struct dentry *lower_dentry, + struct vfsmount *lower_mnt); +int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry); #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 24749bf0668..9244d653743 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -192,6 +192,23 @@ static int ecryptfs_open(struct inode *inode, struct file *file) | ECRYPTFS_ENCRYPTED); } mutex_unlock(&crypt_stat->cs_mutex); + if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY) + && !(file->f_flags & O_RDONLY)) { + rc = -EPERM; + printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs " + "file must hence be opened RO\n", __func__); + goto out; + } + if (!ecryptfs_inode_to_private(inode)->lower_file) { + rc = ecryptfs_init_persistent_file(ecryptfs_dentry); + if (rc) { + printk(KERN_ERR "%s: Error attempting to initialize " + "the persistent file for the dentry with name " + "[%s]; rc = [%d]\n", __func__, + ecryptfs_dentry->d_name.name, rc); + goto out; + } + } ecryptfs_set_file_lower( file, ecryptfs_inode_to_private(inode)->lower_file); if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index c92cc1c00aa..d755455e3bf 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -31,6 +31,7 @@ #include <linux/mount.h> #include <linux/crypto.h> #include <linux/fs_stack.h> +#include <asm/unaligned.h> #include "ecryptfs_kernel.h" static struct dentry *lock_parent(struct dentry *dentry) @@ -188,6 +189,16 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) "context; rc = [%d]\n", rc); goto out; } + if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) { + rc = ecryptfs_init_persistent_file(ecryptfs_dentry); + if (rc) { + printk(KERN_ERR "%s: Error attempting to initialize " + "the persistent file for the dentry with name " + "[%s]; rc = [%d]\n", __func__, + ecryptfs_dentry->d_name.name, rc); + goto out; + } + } rc = ecryptfs_write_metadata(ecryptfs_dentry); if (rc) { printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc); @@ -307,10 +318,11 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, d_add(dentry, NULL); goto out; } - rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1); + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, + ECRYPTFS_INTERPOSE_FLAG_D_ADD); if (rc) { ecryptfs_printk(KERN_ERR, "Error interposing\n"); - goto out_dput; + goto out; } if (S_ISDIR(lower_inode->i_mode)) { ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n"); @@ -336,11 +348,21 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Cannot ecryptfs_kmalloc a page\n"); - goto out_dput; + goto out; } crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) ecryptfs_set_default_sizes(crypt_stat); + if (!ecryptfs_inode_to_private(dentry->d_inode)->lower_file) { + rc = ecryptfs_init_persistent_file(dentry); + if (rc) { + printk(KERN_ERR "%s: Error attempting to initialize " + "the persistent file for the dentry with name " + "[%s]; rc = [%d]\n", __func__, + dentry->d_name.name, rc); + goto out; + } + } rc = ecryptfs_read_and_validate_header_region(page_virt, dentry->d_inode); if (rc) { @@ -364,8 +386,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, else file_size = i_size_read(lower_dentry->d_inode); } else { - memcpy(&file_size, page_virt, sizeof(file_size)); - file_size = be64_to_cpu(file_size); + file_size = get_unaligned_be64(page_virt); } i_size_write(dentry->d_inode, (loff_t)file_size); kmem_cache_free(ecryptfs_header_cache_2, page_virt); diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index e82b457180b..f5b76a331b9 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -44,15 +44,15 @@ static int process_request_key_err(long err_code) int rc = 0; switch (err_code) { - case ENOKEY: + case -ENOKEY: ecryptfs_printk(KERN_WARNING, "No key\n"); rc = -ENOENT; break; - case EKEYEXPIRED: + case -EKEYEXPIRED: ecryptfs_printk(KERN_WARNING, "Key expired\n"); rc = -ETIME; break; - case EKEYREVOKED: + case -EKEYREVOKED: ecryptfs_printk(KERN_WARNING, "Key revoked\n"); rc = -EINVAL; break; @@ -963,8 +963,7 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { printk(KERN_ERR "Could not find key with description: [%s]\n", sig); - process_request_key_err(PTR_ERR(*auth_tok_key)); - rc = -EINVAL; + rc = process_request_key_err(PTR_ERR(*auth_tok_key)); goto out; } (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key); diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c new file mode 100644 index 00000000000..c440c6b58b2 --- /dev/null +++ b/fs/ecryptfs/kthread.c @@ -0,0 +1,203 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 2008 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <linux/kthread.h> +#include <linux/freezer.h> +#include <linux/wait.h> +#include <linux/mount.h> +#include "ecryptfs_kernel.h" + +struct kmem_cache *ecryptfs_open_req_cache; + +static struct ecryptfs_kthread_ctl { +#define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001 + u32 flags; + struct mutex mux; + struct list_head req_list; + wait_queue_head_t wait; +} ecryptfs_kthread_ctl; + +static struct task_struct *ecryptfs_kthread; + +/** + * ecryptfs_threadfn + * @ignored: ignored + * + * The eCryptfs kernel thread that has the responsibility of getting + * the lower persistent file with RW permissions. + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_threadfn(void *ignored) +{ + set_freezable(); + while (1) { + struct ecryptfs_open_req *req; + + wait_event_freezable( + ecryptfs_kthread_ctl.wait, + (!list_empty(&ecryptfs_kthread_ctl.req_list) + || kthread_should_stop())); + mutex_lock(&ecryptfs_kthread_ctl.mux); + if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) { + mutex_unlock(&ecryptfs_kthread_ctl.mux); + goto out; + } + while (!list_empty(&ecryptfs_kthread_ctl.req_list)) { + req = list_first_entry(&ecryptfs_kthread_ctl.req_list, + struct ecryptfs_open_req, + kthread_ctl_list); + mutex_lock(&req->mux); + list_del(&req->kthread_ctl_list); + if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) { + dget(req->lower_dentry); + mntget(req->lower_mnt); + (*req->lower_file) = dentry_open( + req->lower_dentry, req->lower_mnt, + (O_RDWR | O_LARGEFILE)); + req->flags |= ECRYPTFS_REQ_PROCESSED; + } + wake_up(&req->wait); + mutex_unlock(&req->mux); + } + mutex_unlock(&ecryptfs_kthread_ctl.mux); + } +out: + return 0; +} + +int ecryptfs_init_kthread(void) +{ + int rc = 0; + + mutex_init(&ecryptfs_kthread_ctl.mux); + init_waitqueue_head(&ecryptfs_kthread_ctl.wait); + INIT_LIST_HEAD(&ecryptfs_kthread_ctl.req_list); + ecryptfs_kthread = kthread_run(&ecryptfs_threadfn, NULL, + "ecryptfs-kthread"); + if (IS_ERR(ecryptfs_kthread)) { + rc = PTR_ERR(ecryptfs_kthread); + printk(KERN_ERR "%s: Failed to create kernel thread; rc = [%d]" + "\n", __func__, rc); + } + return rc; +} + +void ecryptfs_destroy_kthread(void) +{ + struct ecryptfs_open_req *req; + + mutex_lock(&ecryptfs_kthread_ctl.mux); + ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE; + list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list, + kthread_ctl_list) { + mutex_lock(&req->mux); + req->flags |= ECRYPTFS_REQ_ZOMBIE; + wake_up(&req->wait); + mutex_unlock(&req->mux); + } + mutex_unlock(&ecryptfs_kthread_ctl.mux); + kthread_stop(ecryptfs_kthread); + wake_up(&ecryptfs_kthread_ctl.wait); +} + +/** + * ecryptfs_privileged_open + * @lower_file: Result of dentry_open by root on lower dentry + * @lower_dentry: Lower dentry for file to open + * @lower_mnt: Lower vfsmount for file to open + * + * This function gets a r/w file opened againt the lower dentry. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_privileged_open(struct file **lower_file, + struct dentry *lower_dentry, + struct vfsmount *lower_mnt) +{ + struct ecryptfs_open_req *req; + int rc = 0; + + /* Corresponding dput() and mntput() are done when the + * persistent file is fput() when the eCryptfs inode is + * destroyed. */ + dget(lower_dentry); + mntget(lower_mnt); + (*lower_file) = dentry_open(lower_dentry, lower_mnt, + (O_RDWR | O_LARGEFILE)); + if (!IS_ERR(*lower_file)) + goto out; + req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL); + if (!req) { + rc = -ENOMEM; + goto out; + } + mutex_init(&req->mux); + req->lower_file = lower_file; + req->lower_dentry = lower_dentry; + req->lower_mnt = lower_mnt; + init_waitqueue_head(&req->wait); + req->flags = 0; + mutex_lock(&ecryptfs_kthread_ctl.mux); + if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) { + rc = -EIO; + mutex_unlock(&ecryptfs_kthread_ctl.mux); + printk(KERN_ERR "%s: We are in the middle of shutting down; " + "aborting privileged request to open lower file\n", + __func__); + goto out_free; + } + list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list); + mutex_unlock(&ecryptfs_kthread_ctl.mux); + wake_up(&ecryptfs_kthread_ctl.wait); + wait_event(req->wait, (req->flags != 0)); + mutex_lock(&req->mux); + BUG_ON(req->flags == 0); + if (req->flags & ECRYPTFS_REQ_DROPPED + || req->flags & ECRYPTFS_REQ_ZOMBIE) { + rc = -EIO; + printk(KERN_WARNING "%s: Privileged open request dropped\n", + __func__); + goto out_unlock; + } + if (IS_ERR(*req->lower_file)) { + rc = PTR_ERR(*req->lower_file); + dget(lower_dentry); + mntget(lower_mnt); + (*lower_file) = dentry_open(lower_dentry, lower_mnt, + (O_RDONLY | O_LARGEFILE)); + if (IS_ERR(*lower_file)) { + rc = PTR_ERR(*req->lower_file); + (*lower_file) = NULL; + printk(KERN_WARNING "%s: Error attempting privileged " + "open of lower file with either RW or RO " + "perms; rc = [%d]. Giving up.\n", + __func__, rc); + } + } +out_unlock: + mutex_unlock(&req->mux); +out_free: + kmem_cache_free(ecryptfs_open_req_cache, req); +out: + return rc; +} diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index d603631601e..6f403cfba14 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -117,7 +117,7 @@ void __ecryptfs_printk(const char *fmt, ...) * * Returns zero on success; non-zero otherwise */ -static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) +int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) { struct ecryptfs_inode_info *inode_info = ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); @@ -130,26 +130,12 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); - /* Corresponding dput() and mntput() are done when the - * persistent file is fput() when the eCryptfs inode - * is destroyed. */ - dget(lower_dentry); - mntget(lower_mnt); - inode_info->lower_file = dentry_open(lower_dentry, - lower_mnt, - (O_RDWR | O_LARGEFILE)); - if (IS_ERR(inode_info->lower_file)) { - dget(lower_dentry); - mntget(lower_mnt); - inode_info->lower_file = dentry_open(lower_dentry, - lower_mnt, - (O_RDONLY - | O_LARGEFILE)); - } - if (IS_ERR(inode_info->lower_file)) { + rc = ecryptfs_privileged_open(&inode_info->lower_file, + lower_dentry, lower_mnt); + if (rc || IS_ERR(inode_info->lower_file)) { printk(KERN_ERR "Error opening lower persistent file " - "for lower_dentry [0x%p] and lower_mnt [0x%p]\n", - lower_dentry, lower_mnt); + "for lower_dentry [0x%p] and lower_mnt [0x%p]; " + "rc = [%d]\n", lower_dentry, lower_mnt, rc); rc = PTR_ERR(inode_info->lower_file); inode_info->lower_file = NULL; } @@ -163,14 +149,14 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) * @lower_dentry: Existing dentry in the lower filesystem * @dentry: ecryptfs' dentry * @sb: ecryptfs's super_block - * @flag: If set to true, then d_add is called, else d_instantiate is called + * @flags: flags to govern behavior of interpose procedure * * Interposes upper and lower dentries. * * Returns zero on success; non-zero otherwise */ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, - struct super_block *sb, int flag) + struct super_block *sb, u32 flags) { struct inode *lower_inode; struct inode *inode; @@ -207,7 +193,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, init_special_inode(inode, lower_inode->i_mode, lower_inode->i_rdev); dentry->d_op = &ecryptfs_dops; - if (flag) + if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD) d_add(dentry, inode); else d_instantiate(dentry, inode); @@ -215,13 +201,6 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, /* This size will be overwritten for real files w/ headers and * other metadata */ fsstack_copy_inode_size(inode, lower_inode); - rc = ecryptfs_init_persistent_file(dentry); - if (rc) { - printk(KERN_ERR "%s: Error attempting to initialize the " - "persistent file for the dentry with name [%s]; " - "rc = [%d]\n", __func__, dentry->d_name.name, rc); - goto out; - } out: return rc; } @@ -262,10 +241,11 @@ static int ecryptfs_init_global_auth_toks( "session keyring for sig specified in mount " "option: [%s]\n", global_auth_tok->sig); global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID; - rc = 0; + goto out; } else global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID; } +out: return rc; } @@ -314,7 +294,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) char *cipher_name_dst; char *cipher_name_src; char *cipher_key_bytes_src; - int cipher_name_len; if (!options) { rc = -EINVAL; @@ -395,17 +374,12 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) goto out; } if (!cipher_name_set) { - cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); - if (unlikely(cipher_name_len - >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) { - rc = -EINVAL; - BUG(); - goto out; - } - memcpy(mount_crypt_stat->global_default_cipher_name, - ECRYPTFS_DEFAULT_CIPHER, cipher_name_len); - mount_crypt_stat->global_default_cipher_name[cipher_name_len] - = '\0'; + int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); + + BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE); + + strcpy(mount_crypt_stat->global_default_cipher_name, + ECRYPTFS_DEFAULT_CIPHER); } if (!cipher_key_bytes_set) { mount_crypt_stat->global_default_cipher_key_size = 0; @@ -430,7 +404,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) printk(KERN_WARNING "One or more global auth toks could not " "properly register; rc = [%d]\n", rc); } - rc = 0; out: return rc; } @@ -679,6 +652,11 @@ static struct ecryptfs_cache_info { .name = "ecryptfs_key_tfm_cache", .size = sizeof(struct ecryptfs_key_tfm), }, + { + .cache = &ecryptfs_open_req_cache, + .name = "ecryptfs_open_req_cache", + .size = sizeof(struct ecryptfs_open_req), + }, }; static void ecryptfs_free_kmem_caches(void) @@ -795,11 +773,17 @@ static int __init ecryptfs_init(void) printk(KERN_ERR "sysfs registration failed\n"); goto out_unregister_filesystem; } + rc = ecryptfs_init_kthread(); + if (rc) { + printk(KERN_ERR "%s: kthread initialization failed; " + "rc = [%d]\n", __func__, rc); + goto out_do_sysfs_unregistration; + } rc = ecryptfs_init_messaging(ecryptfs_transport); if (rc) { - ecryptfs_printk(KERN_ERR, "Failure occured while attempting to " + printk(KERN_ERR "Failure occured while attempting to " "initialize the eCryptfs netlink socket\n"); - goto out_do_sysfs_unregistration; + goto out_destroy_kthread; } rc = ecryptfs_init_crypto(); if (rc) { @@ -814,6 +798,8 @@ static int __init ecryptfs_init(void) goto out; out_release_messaging: ecryptfs_release_messaging(ecryptfs_transport); +out_destroy_kthread: + ecryptfs_destroy_kthread(); out_do_sysfs_unregistration: do_sysfs_unregistration(); out_unregister_filesystem: @@ -833,6 +819,7 @@ static void __exit ecryptfs_exit(void) printk(KERN_ERR "Failure whilst attempting to destroy crypto; " "rc = [%d]\n", rc); ecryptfs_release_messaging(ecryptfs_transport); + ecryptfs_destroy_kthread(); do_sysfs_unregistration(); unregister_filesystem(&ecryptfs_fs_type); ecryptfs_free_kmem_caches(); diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 09a4522f65e..b484792a099 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -358,46 +358,6 @@ out_unlock_daemon: } /** - * ecryptfs_miscdev_helo - * @euid: effective user id of miscdevess sending helo packet - * @user_ns: The namespace in which @euid applies - * @pid: miscdevess id of miscdevess sending helo packet - * - * Returns zero on success; non-zero otherwise - */ -static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns, - struct pid *pid) -{ - int rc; - - rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns, - pid); - if (rc) - printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); - return rc; -} - -/** - * ecryptfs_miscdev_quit - * @euid: effective user id of miscdevess sending quit packet - * @user_ns: The namespace in which @euid applies - * @pid: miscdevess id of miscdevess sending quit packet - * - * Returns zero on success; non-zero otherwise - */ -static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns, - struct pid *pid) -{ - int rc; - - rc = ecryptfs_process_quit(euid, user_ns, pid); - if (rc) - printk(KERN_WARNING - "Error processing QUIT message; rc = [%d]\n", rc); - return rc; -} - -/** * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon * @data: Bytes comprising struct ecryptfs_message * @data_size: sizeof(struct ecryptfs_message) + data len @@ -512,26 +472,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, __func__, rc); break; case ECRYPTFS_MSG_HELO: - rc = ecryptfs_miscdev_helo(current->euid, - current->nsproxy->user_ns, - task_pid(current)); - if (rc) { - printk(KERN_ERR "%s: Error attempting to process " - "helo from pid [0x%p]; rc = [%d]\n", __func__, - task_pid(current), rc); - goto out_free; - } - break; case ECRYPTFS_MSG_QUIT: - rc = ecryptfs_miscdev_quit(current->euid, - current->nsproxy->user_ns, - task_pid(current)); - if (rc) { - printk(KERN_ERR "%s: Error attempting to process " - "quit from pid [0x%p]; rc = [%d]\n", __func__, - task_pid(current), rc); - goto out_free; - } break; default: ecryptfs_printk(KERN_WARNING, "Dropping miscdev " diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 2b6fe1e6e8b..245c2dc02d5 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -32,6 +32,7 @@ #include <linux/file.h> #include <linux/crypto.h> #include <linux/scatterlist.h> +#include <asm/unaligned.h> #include "ecryptfs_kernel.h" /** @@ -372,7 +373,6 @@ out: */ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode) { - u64 file_size; char *file_size_virt; int rc; @@ -381,9 +381,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode) rc = -ENOMEM; goto out; } - file_size = (u64)i_size_read(ecryptfs_inode); - file_size = cpu_to_be64(file_size); - memcpy(file_size_virt, &file_size, sizeof(u64)); + put_unaligned_be64(i_size_read(ecryptfs_inode), file_size_virt); rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0, sizeof(u64)); kfree(file_size_virt); @@ -403,7 +401,6 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode) struct dentry *lower_dentry = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry; struct inode *lower_inode = lower_dentry->d_inode; - u64 file_size; int rc; if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) { @@ -424,9 +421,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode) xattr_virt, PAGE_CACHE_SIZE); if (size < 0) size = 8; - file_size = (u64)i_size_read(ecryptfs_inode); - file_size = cpu_to_be64(file_size); - memcpy(xattr_virt, &file_size, sizeof(u64)); + put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt); rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME, xattr_virt, size, 0); mutex_unlock(&lower_inode->i_mutex); diff --git a/fs/eventfd.c b/fs/eventfd.c index 343942deeec..08bf558d040 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -198,11 +198,18 @@ struct file *eventfd_fget(int fd) return file; } -asmlinkage long sys_eventfd(unsigned int count) +asmlinkage long sys_eventfd2(unsigned int count, int flags) { int fd; struct eventfd_ctx *ctx; + /* Check the EFD_* constants for consistency. */ + BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); + BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); + + if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK)) + return -EINVAL; + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -214,9 +221,15 @@ asmlinkage long sys_eventfd(unsigned int count) * When we call this, the initialization must be complete, since * anon_inode_getfd() will install the fd. */ - fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx); + fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, + flags & (O_CLOEXEC | O_NONBLOCK)); if (fd < 0) kfree(ctx); return fd; } +asmlinkage long sys_eventfd(unsigned int count) +{ + return sys_eventfd2(count, 0); +} + diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 990c01d2d66..0c87474f791 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1046,20 +1046,25 @@ retry: * RB tree. With the current implementation, the "size" parameter is ignored * (besides sanity checks). */ -asmlinkage long sys_epoll_create(int size) +asmlinkage long sys_epoll_create1(int flags) { int error, fd = -1; struct eventpoll *ep; + /* Check the EPOLL_* constant for consistency. */ + BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); + + if (flags & ~EPOLL_CLOEXEC) + return -EINVAL; + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", - current, size)); + current, flags)); /* - * Sanity check on the size parameter, and create the internal data - * structure ( "struct eventpoll" ). + * Create the internal data structure ( "struct eventpoll" ). */ - error = -EINVAL; - if (size <= 0 || (error = ep_alloc(&ep)) < 0) { + error = ep_alloc(&ep); + if (error < 0) { fd = error; goto error_return; } @@ -1068,17 +1073,26 @@ asmlinkage long sys_epoll_create(int size) * Creates all the items needed to setup an eventpoll file. That is, * a file structure and a free file descriptor. */ - fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep); + fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, + flags & O_CLOEXEC); if (fd < 0) ep_free(ep); error_return: DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", - current, size, fd)); + current, flags, fd)); return fd; } +asmlinkage long sys_epoll_create(int size) +{ + if (size < 0) + return -EINVAL; + + return sys_epoll_create1(0); +} + /* * The following function implements the controller interface for * the eventpoll file that enables the insertion/removal/change of diff --git a/fs/exec.c b/fs/exec.c index fd9234379e8..190ed1f9277 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -541,7 +541,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) /* * when the old and new regions overlap clear from new_end. */ - free_pgd_range(&tlb, new_end, old_end, new_end, + free_pgd_range(tlb, new_end, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : 0); } else { /* @@ -550,7 +550,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * have constraints on va-space that make this illegal (IA64) - * for the others its just a little faster. */ - free_pgd_range(&tlb, old_start, old_end, new_end, + free_pgd_range(tlb, old_start, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : 0); } tlb_finish_mmu(tlb, new_end, old_end); diff --git a/fs/fcntl.c b/fs/fcntl.c index 330a7d78259..9679fcbdeaa 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -125,13 +125,16 @@ static int dupfd(struct file *file, unsigned int start, int cloexec) return fd; } -asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) +asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) { int err = -EBADF; struct file * file, *tofree; struct files_struct * files = current->files; struct fdtable *fdt; + if ((flags & ~O_CLOEXEC) != 0) + return -EINVAL; + spin_lock(&files->file_lock); if (!(file = fcheck(oldfd))) goto out_unlock; @@ -163,7 +166,10 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) rcu_assign_pointer(fdt->fd[newfd], file); FD_SET(newfd, fdt->open_fds); - FD_CLR(newfd, fdt->close_on_exec); + if (flags & O_CLOEXEC) + FD_SET(newfd, fdt->close_on_exec); + else + FD_CLR(newfd, fdt->close_on_exec); spin_unlock(&files->file_lock); if (tofree) @@ -181,6 +187,11 @@ out_fput: goto out; } +asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) +{ + return sys_dup3(oldfd, newfd, 0); +} + asmlinkage long sys_dup(unsigned int fildes) { int ret = -EBADF; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index aeabf80f81a..dbd01d262ca 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group; enum { Opt_size, Opt_nr_inodes, Opt_mode, Opt_uid, Opt_gid, + Opt_pagesize, Opt_err, }; @@ -62,6 +63,7 @@ static match_table_t tokens = { {Opt_mode, "mode=%o"}, {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, + {Opt_pagesize, "pagesize=%s"}, {Opt_err, NULL}, }; @@ -80,6 +82,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) struct inode *inode = file->f_path.dentry->d_inode; loff_t len, vma_len; int ret; + struct hstate *h = hstate_file(file); /* * vma address alignment (but not the pgoff alignment) has @@ -92,7 +95,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_HUGETLB | VM_RESERVED; vma->vm_ops = &hugetlb_vm_ops; - if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT)) + if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; vma_len = (loff_t)(vma->vm_end - vma->vm_start); @@ -103,9 +106,9 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) ret = -ENOMEM; len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - if (vma->vm_flags & VM_MAYSHARE && - hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), - len >> HPAGE_SHIFT)) + if (hugetlb_reserve_pages(inode, + vma->vm_pgoff >> huge_page_order(h), + len >> huge_page_shift(h), vma)) goto out; ret = 0; @@ -130,20 +133,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long start_addr; + struct hstate *h = hstate_file(file); - if (len & ~HPAGE_MASK) + if (len & ~huge_page_mask(h)) return -EINVAL; if (len > TASK_SIZE) return -ENOMEM; if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len)) + if (prepare_hugepage_range(file, addr, len)) return -EINVAL; return addr; } if (addr) { - addr = ALIGN(addr, HPAGE_SIZE); + addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && (!vma || addr + len <= vma->vm_start)) @@ -156,7 +160,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, start_addr = TASK_UNMAPPED_BASE; full_search: - addr = ALIGN(start_addr, HPAGE_SIZE); + addr = ALIGN(start_addr, huge_page_size(h)); for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ @@ -174,7 +178,7 @@ full_search: if (!vma || addr + len <= vma->vm_start) return addr; - addr = ALIGN(vma->vm_end, HPAGE_SIZE); + addr = ALIGN(vma->vm_end, huge_page_size(h)); } } #endif @@ -225,10 +229,11 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset, static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { + struct hstate *h = hstate_file(filp); struct address_space *mapping = filp->f_mapping; struct inode *inode = mapping->host; - unsigned long index = *ppos >> HPAGE_SHIFT; - unsigned long offset = *ppos & ~HPAGE_MASK; + unsigned long index = *ppos >> huge_page_shift(h); + unsigned long offset = *ppos & ~huge_page_mask(h); unsigned long end_index; loff_t isize; ssize_t retval = 0; @@ -243,17 +248,17 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, if (!isize) goto out; - end_index = (isize - 1) >> HPAGE_SHIFT; + end_index = (isize - 1) >> huge_page_shift(h); for (;;) { struct page *page; - int nr, ret; + unsigned long nr, ret; /* nr is the maximum number of bytes to copy from this page */ - nr = HPAGE_SIZE; + nr = huge_page_size(h); if (index >= end_index) { if (index > end_index) goto out; - nr = ((isize - 1) & ~HPAGE_MASK) + 1; + nr = ((isize - 1) & ~huge_page_mask(h)) + 1; if (nr <= offset) { goto out; } @@ -287,8 +292,8 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, offset += ret; retval += ret; len -= ret; - index += offset >> HPAGE_SHIFT; - offset &= ~HPAGE_MASK; + index += offset >> huge_page_shift(h); + offset &= ~huge_page_mask(h); if (page) page_cache_release(page); @@ -298,7 +303,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, break; } out: - *ppos = ((loff_t)index << HPAGE_SHIFT) + offset; + *ppos = ((loff_t)index << huge_page_shift(h)) + offset; mutex_unlock(&inode->i_mutex); return retval; } @@ -339,8 +344,9 @@ static void truncate_huge_page(struct page *page) static void truncate_hugepages(struct inode *inode, loff_t lstart) { + struct hstate *h = hstate_inode(inode); struct address_space *mapping = &inode->i_data; - const pgoff_t start = lstart >> HPAGE_SHIFT; + const pgoff_t start = lstart >> huge_page_shift(h); struct pagevec pvec; pgoff_t next; int i, freed = 0; @@ -441,7 +447,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) v_offset = 0; __unmap_hugepage_range(vma, - vma->vm_start + v_offset, vma->vm_end); + vma->vm_start + v_offset, vma->vm_end, NULL); } } @@ -449,8 +455,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) { pgoff_t pgoff; struct address_space *mapping = inode->i_mapping; + struct hstate *h = hstate_inode(inode); - BUG_ON(offset & ~HPAGE_MASK); + BUG_ON(offset & ~huge_page_mask(h)); pgoff = offset >> PAGE_SHIFT; i_size_write(inode, offset); @@ -465,6 +472,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; + struct hstate *h = hstate_inode(inode); int error; unsigned int ia_valid = attr->ia_valid; @@ -476,7 +484,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) if (ia_valid & ATTR_SIZE) { error = -EINVAL; - if (!(attr->ia_size & ~HPAGE_MASK)) + if (!(attr->ia_size & ~huge_page_mask(h))) error = hugetlb_vmtruncate(inode, attr->ia_size); if (error) goto out; @@ -610,9 +618,10 @@ static int hugetlbfs_set_page_dirty(struct page *page) static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); + struct hstate *h = hstate_inode(dentry->d_inode); buf->f_type = HUGETLBFS_MAGIC; - buf->f_bsize = HPAGE_SIZE; + buf->f_bsize = huge_page_size(h); if (sbinfo) { spin_lock(&sbinfo->stat_lock); /* If no limits set, just report 0 for max/free/used @@ -743,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) char *p, *rest; substring_t args[MAX_OPT_ARGS]; int option; + unsigned long long size = 0; + enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE; if (!options) return 0; @@ -773,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) break; case Opt_size: { - unsigned long long size; /* memparse() will accept a K/M/G without a digit */ if (!isdigit(*args[0].from)) goto bad_val; size = memparse(args[0].from, &rest); - if (*rest == '%') { - size <<= HPAGE_SHIFT; - size *= max_huge_pages; - do_div(size, 100); - } - pconfig->nr_blocks = (size >> HPAGE_SHIFT); + setsize = SIZE_STD; + if (*rest == '%') + setsize = SIZE_PERCENT; break; } @@ -794,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) pconfig->nr_inodes = memparse(args[0].from, &rest); break; + case Opt_pagesize: { + unsigned long ps; + ps = memparse(args[0].from, &rest); + pconfig->hstate = size_to_hstate(ps); + if (!pconfig->hstate) { + printk(KERN_ERR + "hugetlbfs: Unsupported page size %lu MB\n", + ps >> 20); + return -EINVAL; + } + break; + } + default: printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", p); @@ -801,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) break; } } + + /* Do size after hstate is set up */ + if (setsize > NO_SIZE) { + struct hstate *h = pconfig->hstate; + if (setsize == SIZE_PERCENT) { + size <<= huge_page_shift(h); + size *= h->max_huge_pages; + do_div(size, 100); + } + pconfig->nr_blocks = (size >> huge_page_shift(h)); + } + return 0; bad_val: @@ -825,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) config.uid = current->fsuid; config.gid = current->fsgid; config.mode = 0755; + config.hstate = &default_hstate; ret = hugetlbfs_parse_options(data, &config); if (ret) return ret; @@ -833,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) if (!sbinfo) return -ENOMEM; sb->s_fs_info = sbinfo; + sbinfo->hstate = config.hstate; spin_lock_init(&sbinfo->stat_lock); sbinfo->max_blocks = config.nr_blocks; sbinfo->free_blocks = config.nr_blocks; sbinfo->max_inodes = config.nr_inodes; sbinfo->free_inodes = config.nr_inodes; sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_blocksize = HPAGE_SIZE; - sb->s_blocksize_bits = HPAGE_SHIFT; + sb->s_blocksize = huge_page_size(config.hstate); + sb->s_blocksize_bits = huge_page_shift(config.hstate); sb->s_magic = HUGETLBFS_MAGIC; sb->s_op = &hugetlbfs_ops; sb->s_time_gran = 1; @@ -942,7 +976,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size) goto out_dentry; error = -ENOMEM; - if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) + if (hugetlb_reserve_pages(inode, 0, + size >> huge_page_shift(hstate_inode(inode)), NULL)) goto out_inode; d_instantiate(dentry, inode); diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 6676c06bb7c..fe79c25d95d 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -566,7 +566,7 @@ static const struct inotify_operations inotify_user_ops = { .destroy_watch = free_inotify_user_watch, }; -asmlinkage long sys_inotify_init(void) +asmlinkage long sys_inotify_init1(int flags) { struct inotify_device *dev; struct inotify_handle *ih; @@ -574,7 +574,14 @@ asmlinkage long sys_inotify_init(void) struct file *filp; int fd, ret; - fd = get_unused_fd(); + /* Check the IN_* constants for consistency. */ + BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC); + BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK); + + if (flags & ~(IN_CLOEXEC | IN_NONBLOCK)) + return -EINVAL; + + fd = get_unused_fd_flags(flags & O_CLOEXEC); if (fd < 0) return fd; @@ -610,7 +617,7 @@ asmlinkage long sys_inotify_init(void) filp->f_path.dentry = dget(inotify_mnt->mnt_root); filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; filp->f_mode = FMODE_READ; - filp->f_flags = O_RDONLY; + filp->f_flags = O_RDONLY | (flags & O_NONBLOCK); filp->private_data = dev; INIT_LIST_HEAD(&dev->events); @@ -638,6 +645,11 @@ out_put_fd: return ret; } +asmlinkage long sys_inotify_init(void) +{ + return sys_inotify_init1(0); +} + asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) { struct inode *inode; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 2169af4d545..5bd9bf0fa9d 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -50,7 +50,7 @@ EXPORT_SYMBOL(nlmsvc_ops); static DEFINE_MUTEX(nlmsvc_mutex); static unsigned int nlmsvc_users; static struct task_struct *nlmsvc_task; -static struct svc_serv *nlmsvc_serv; +static struct svc_rqst *nlmsvc_rqst; int nlmsvc_grace_period; unsigned long nlmsvc_timeout; @@ -194,20 +194,11 @@ lockd(void *vrqstp) svc_process(rqstp); } - flush_signals(current); if (nlmsvc_ops) nlmsvc_invalidate_all(); nlm_shutdown_hosts(); - unlock_kernel(); - - nlmsvc_task = NULL; - nlmsvc_serv = NULL; - - /* Exit the RPC thread */ - svc_exit_thread(rqstp); - return 0; } @@ -254,16 +245,15 @@ int lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ { struct svc_serv *serv; - struct svc_rqst *rqstp; int error = 0; mutex_lock(&nlmsvc_mutex); /* * Check whether we're already up and running. */ - if (nlmsvc_serv) { + if (nlmsvc_rqst) { if (proto) - error = make_socks(nlmsvc_serv, proto); + error = make_socks(nlmsvc_rqst->rq_server, proto); goto out; } @@ -288,9 +278,10 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ /* * Create the kernel thread and wait for it to start. */ - rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); - if (IS_ERR(rqstp)) { - error = PTR_ERR(rqstp); + nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); + if (IS_ERR(nlmsvc_rqst)) { + error = PTR_ERR(nlmsvc_rqst); + nlmsvc_rqst = NULL; printk(KERN_WARNING "lockd_up: svc_rqst allocation failed, error=%d\n", error); @@ -298,16 +289,15 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ } svc_sock_update_bufs(serv); - nlmsvc_serv = rqstp->rq_server; - nlmsvc_task = kthread_run(lockd, rqstp, serv->sv_name); + nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name); if (IS_ERR(nlmsvc_task)) { error = PTR_ERR(nlmsvc_task); + svc_exit_thread(nlmsvc_rqst); nlmsvc_task = NULL; - nlmsvc_serv = NULL; + nlmsvc_rqst = NULL; printk(KERN_WARNING "lockd_up: kthread_run failed, error=%d\n", error); - svc_exit_thread(rqstp); goto destroy_and_out; } @@ -346,6 +336,9 @@ lockd_down(void) BUG(); } kthread_stop(nlmsvc_task); + svc_exit_thread(nlmsvc_rqst); + nlmsvc_task = NULL; + nlmsvc_rqst = NULL; out: mutex_unlock(&nlmsvc_mutex); } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 2e27176ff42..39944463933 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -58,8 +58,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, return 0; no_locks: - if (host) - nlm_release_host(host); + nlm_release_host(host); if (error) return error; return nlm_lck_denied_nolocks; @@ -100,7 +99,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; /* Now check for conflicting locks */ - resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie); + resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie); if (resp->status == nlm_drop_reply) rc = rpc_drop_reply; else @@ -146,7 +145,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, #endif /* Now try to lock the file */ - resp->status = nlmsvc_lock(rqstp, file, &argp->lock, + resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock, argp->block, &argp->cookie); if (resp->status == nlm_drop_reply) rc = rpc_drop_reply; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 56a08ab9a4c..821b9acdfb6 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -129,9 +129,9 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock) static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b) { - if(a->len != b->len) + if (a->len != b->len) return 0; - if(memcmp(a->data,b->data,a->len)) + if (memcmp(a->data, b->data, a->len)) return 0; return 1; } @@ -180,6 +180,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host, struct nlm_block *block; struct nlm_rqst *call = NULL; + nlm_get_host(host); call = nlm_alloc_call(host); if (call == NULL) return NULL; @@ -358,10 +359,10 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block) */ __be32 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, - struct nlm_lock *lock, int wait, struct nlm_cookie *cookie) + struct nlm_host *host, struct nlm_lock *lock, int wait, + struct nlm_cookie *cookie) { struct nlm_block *block = NULL; - struct nlm_host *host; int error; __be32 ret; @@ -373,11 +374,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, (long long)lock->fl.fl_end, wait); - /* Create host handle for callback */ - host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len); - if (host == NULL) - return nlm_lck_denied_nolocks; - /* Lock file against concurrent access */ mutex_lock(&file->f_mutex); /* Get existing block (in case client is busy-waiting) @@ -385,8 +381,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, */ block = nlmsvc_lookup_block(file, lock); if (block == NULL) { - block = nlmsvc_create_block(rqstp, nlm_get_host(host), file, - lock, cookie); + block = nlmsvc_create_block(rqstp, host, file, lock, cookie); ret = nlm_lck_denied_nolocks; if (block == NULL) goto out; @@ -417,7 +412,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, lock->fl.fl_flags &= ~FL_SLEEP; dprintk("lockd: vfs_lock_file returned %d\n", error); - switch(error) { + switch (error) { case 0: ret = nlm_granted; goto out; @@ -450,7 +445,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, out: mutex_unlock(&file->f_mutex); nlmsvc_release_block(block); - nlm_release_host(host); dprintk("lockd: nlmsvc_lock returned %u\n", ret); return ret; } @@ -460,8 +454,8 @@ out: */ __be32 nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, - struct nlm_lock *lock, struct nlm_lock *conflock, - struct nlm_cookie *cookie) + struct nlm_host *host, struct nlm_lock *lock, + struct nlm_lock *conflock, struct nlm_cookie *cookie) { struct nlm_block *block = NULL; int error; @@ -479,16 +473,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, if (block == NULL) { struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL); - struct nlm_host *host; if (conf == NULL) return nlm_granted; - /* Create host handle for callback */ - host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len); - if (host == NULL) { - kfree(conf); - return nlm_lck_denied_nolocks; - } block = nlmsvc_create_block(rqstp, host, file, lock, cookie); if (block == NULL) { kfree(conf); @@ -897,7 +884,7 @@ nlmsvc_retry_blocked(void) if (block->b_when == NLM_NEVER) break; - if (time_after(block->b_when,jiffies)) { + if (time_after(block->b_when, jiffies)) { timeout = block->b_when - jiffies; break; } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index ce6952b50a7..76019d2ff72 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -87,8 +87,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, return 0; no_locks: - if (host) - nlm_release_host(host); + nlm_release_host(host); if (error) return error; return nlm_lck_denied_nolocks; @@ -129,7 +128,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; /* Now check for conflicting locks */ - resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie)); + resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie)); if (resp->status == nlm_drop_reply) rc = rpc_drop_reply; else @@ -176,7 +175,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, #endif /* Now try to lock the file */ - resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock, + resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock, argp->block, &argp->cookie)); if (resp->status == nlm_drop_reply) rc = rpc_drop_reply; diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index d1c48b539df..198b4e55b37 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -373,13 +373,16 @@ nlmsvc_free_host_resources(struct nlm_host *host) } } -/* - * Remove all locks held for clients +/** + * nlmsvc_invalidate_all - remove all locks held for clients + * + * Release all locks held by NFS clients. + * */ void nlmsvc_invalidate_all(void) { - /* Release all locks held by NFS clients. + /* * Previously, the code would call * nlmsvc_free_host_resources for each client in * turn, which is about as inefficient as it gets. @@ -396,6 +399,12 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file) return sb == file->f_file->f_path.mnt->mnt_sb; } +/** + * nlmsvc_unlock_all_by_sb - release locks held on this file system + * @sb: super block + * + * Release all locks held by clients accessing this file system. + */ int nlmsvc_unlock_all_by_sb(struct super_block *sb) { @@ -409,17 +418,22 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb); static int nlmsvc_match_ip(void *datap, struct nlm_host *host) { - __be32 *server_addr = datap; - - return host->h_saddr.sin_addr.s_addr == *server_addr; + return nlm_cmp_addr(&host->h_saddr, datap); } +/** + * nlmsvc_unlock_all_by_ip - release local locks by IP address + * @server_addr: server's IP address as seen by clients + * + * Release all locks held by clients accessing this host + * via the passed in IP address. + */ int -nlmsvc_unlock_all_by_ip(__be32 server_addr) +nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr) { int ret; - ret = nlm_traverse_files(&server_addr, nlmsvc_match_ip, NULL); - return ret ? -EIO : 0; + ret = nlm_traverse_files(server_addr, nlmsvc_match_ip, NULL); + return ret ? -EIO : 0; } EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_ip); diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 9e4a568a501..6b6225ac492 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -35,7 +35,7 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp) fh.fh_export = NULL; exp_readlock(); - nfserr = nfsd_open(rqstp, &fh, S_IFREG, MAY_LOCK, filp); + nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp); fh_put(&fh); rqstp->rq_client = NULL; exp_readunlock(); diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 1c3b7654e96..4e3219e8411 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -40,7 +40,8 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); fh = fh_copy(&resp->fh, &argp->fh); - if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + if (nfserr) RETURN_STATUS(nfserr); if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) @@ -107,7 +108,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp, dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); fh = fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR); + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); if (!nfserr) { nfserr = nfserrno( nfsd_set_posix_acl( @@ -134,7 +135,7 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp, dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - return fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + return fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); } /* diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index b647f2f872d..9981dbb377a 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -36,7 +36,8 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, __be32 nfserr = 0; fh = fh_copy(&resp->fh, &argp->fh); - if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + if (nfserr) RETURN_STATUS(nfserr); if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) @@ -101,7 +102,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp, __be32 nfserr = 0; fh = fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR); + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); if (!nfserr) { nfserr = nfserrno( nfsd_set_posix_acl( diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index c721a1e6e9d..4d617ea28cf 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -63,7 +63,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); if (nfserr) RETURN_STATUS(nfserr); @@ -242,7 +242,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, attr = &argp->attrs; /* Get the directory inode */ - nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_CREATE); + nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE); if (nfserr) RETURN_STATUS(nfserr); @@ -558,7 +558,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, resp->f_maxfilesize = ~(u32) 0; resp->f_properties = NFS3_FSF_DEFAULT; - nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP); + nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); /* Check special features of the file system. May request * different read/write sizes for file systems known to have @@ -597,7 +597,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, resp->p_case_insensitive = 0; resp->p_case_preserving = 1; - nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP); + nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); if (nfserr == 0) { struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c309c881bd4..eef1629806f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -71,11 +71,11 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs return nfserr_inval; if (open->op_share_access & NFS4_SHARE_ACCESS_READ) - accmode |= MAY_READ; + accmode |= NFSD_MAY_READ; if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - accmode |= (MAY_WRITE | MAY_TRUNC); + accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC); if (open->op_share_deny & NFS4_SHARE_DENY_WRITE) - accmode |= MAY_WRITE; + accmode |= NFSD_MAY_WRITE; status = fh_verify(rqstp, current_fh, S_IFREG, accmode); @@ -126,7 +126,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size); if (!created) - status = do_open_permission(rqstp, current_fh, open, MAY_NOP); + status = do_open_permission(rqstp, current_fh, open, + NFSD_MAY_NOP); out: fh_put(&resfh); @@ -157,7 +158,8 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && (open->op_iattr.ia_size == 0); - status = do_open_permission(rqstp, current_fh, open, MAY_OWNER_OVERRIDE); + status = do_open_permission(rqstp, current_fh, open, + NFSD_MAY_OWNER_OVERRIDE); return status; } @@ -186,7 +188,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len; memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh, rp->rp_openfh_len); - status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); + status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); if (status) dprintk("nfsd4_open: replay failed" " restoring previous filehandle\n"); @@ -285,7 +287,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, putfh->pf_fhlen); - return fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); + return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); } static __be32 @@ -363,7 +365,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fh_init(&resfh, NFS4_FHSIZE); - status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, MAY_CREATE); + status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, + NFSD_MAY_CREATE); if (status == nfserr_symlink) status = nfserr_notdir; if (status) @@ -445,7 +448,7 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; - status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); + status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); if (status) return status; @@ -730,7 +733,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, int count; __be32 status; - status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); + status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); if (status) return status; @@ -843,10 +846,13 @@ struct nfsd4_operation { #define ALLOWED_WITHOUT_FH 1 /* GETATTR and ops not listed as returning NFS4ERR_MOVED: */ #define ALLOWED_ON_ABSENT_FS 2 + char *op_name; }; static struct nfsd4_operation nfsd4_ops[]; +static inline char *nfsd4_op_name(unsigned opnum); + /* * COMPOUND call. */ @@ -888,7 +894,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; - dprintk("nfsv4 compound op #%d: %d\n", resp->opcnt, op->opnum); + dprintk("nfsv4 compound op #%d/%d: %d (%s)\n", + resp->opcnt, args->opcnt, op->opnum, + nfsd4_op_name(op->opnum)); /* * The XDR decode routines may have pre-set op->status; @@ -952,126 +960,170 @@ encode_op: out: nfsd4_release_compoundargs(args); cstate_free(cstate); + dprintk("nfsv4 compound returned %d\n", ntohl(status)); return status; } static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { [OP_ACCESS] = { .op_func = (nfsd4op_func)nfsd4_access, + .op_name = "OP_ACCESS", }, [OP_CLOSE] = { .op_func = (nfsd4op_func)nfsd4_close, + .op_name = "OP_CLOSE", }, [OP_COMMIT] = { .op_func = (nfsd4op_func)nfsd4_commit, + .op_name = "OP_COMMIT", }, [OP_CREATE] = { .op_func = (nfsd4op_func)nfsd4_create, + .op_name = "OP_CREATE", }, [OP_DELEGRETURN] = { .op_func = (nfsd4op_func)nfsd4_delegreturn, + .op_name = "OP_DELEGRETURN", }, [OP_GETATTR] = { .op_func = (nfsd4op_func)nfsd4_getattr, .op_flags = ALLOWED_ON_ABSENT_FS, + .op_name = "OP_GETATTR", }, [OP_GETFH] = { .op_func = (nfsd4op_func)nfsd4_getfh, + .op_name = "OP_GETFH", }, [OP_LINK] = { .op_func = (nfsd4op_func)nfsd4_link, + .op_name = "OP_LINK", }, [OP_LOCK] = { .op_func = (nfsd4op_func)nfsd4_lock, + .op_name = "OP_LOCK", }, [OP_LOCKT] = { .op_func = (nfsd4op_func)nfsd4_lockt, + .op_name = "OP_LOCKT", }, [OP_LOCKU] = { .op_func = (nfsd4op_func)nfsd4_locku, + .op_name = "OP_LOCKU", }, [OP_LOOKUP] = { .op_func = (nfsd4op_func)nfsd4_lookup, + .op_name = "OP_LOOKUP", }, [OP_LOOKUPP] = { .op_func = (nfsd4op_func)nfsd4_lookupp, + .op_name = "OP_LOOKUPP", }, [OP_NVERIFY] = { .op_func = (nfsd4op_func)nfsd4_nverify, + .op_name = "OP_NVERIFY", }, [OP_OPEN] = { .op_func = (nfsd4op_func)nfsd4_open, + .op_name = "OP_OPEN", }, [OP_OPEN_CONFIRM] = { .op_func = (nfsd4op_func)nfsd4_open_confirm, + .op_name = "OP_OPEN_CONFIRM", }, [OP_OPEN_DOWNGRADE] = { .op_func = (nfsd4op_func)nfsd4_open_downgrade, + .op_name = "OP_OPEN_DOWNGRADE", }, [OP_PUTFH] = { .op_func = (nfsd4op_func)nfsd4_putfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_name = "OP_PUTFH", }, [OP_PUTPUBFH] = { - /* unsupported; just for future reference: */ + /* unsupported, just for future reference: */ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_name = "OP_PUTPUBFH", }, [OP_PUTROOTFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_name = "OP_PUTROOTFH", }, [OP_READ] = { .op_func = (nfsd4op_func)nfsd4_read, + .op_name = "OP_READ", }, [OP_READDIR] = { .op_func = (nfsd4op_func)nfsd4_readdir, + .op_name = "OP_READDIR", }, [OP_READLINK] = { .op_func = (nfsd4op_func)nfsd4_readlink, + .op_name = "OP_READLINK", }, [OP_REMOVE] = { .op_func = (nfsd4op_func)nfsd4_remove, + .op_name = "OP_REMOVE", }, [OP_RENAME] = { + .op_name = "OP_RENAME", .op_func = (nfsd4op_func)nfsd4_rename, }, [OP_RENEW] = { .op_func = (nfsd4op_func)nfsd4_renew, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_name = "OP_RENEW", }, [OP_RESTOREFH] = { .op_func = (nfsd4op_func)nfsd4_restorefh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_name = "OP_RESTOREFH", }, [OP_SAVEFH] = { .op_func = (nfsd4op_func)nfsd4_savefh, + .op_name = "OP_SAVEFH", }, [OP_SECINFO] = { .op_func = (nfsd4op_func)nfsd4_secinfo, + .op_name = "OP_SECINFO", }, [OP_SETATTR] = { .op_func = (nfsd4op_func)nfsd4_setattr, + .op_name = "OP_SETATTR", }, [OP_SETCLIENTID] = { .op_func = (nfsd4op_func)nfsd4_setclientid, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_name = "OP_SETCLIENTID", }, [OP_SETCLIENTID_CONFIRM] = { .op_func = (nfsd4op_func)nfsd4_setclientid_confirm, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_name = "OP_SETCLIENTID_CONFIRM", }, [OP_VERIFY] = { .op_func = (nfsd4op_func)nfsd4_verify, + .op_name = "OP_VERIFY", }, [OP_WRITE] = { .op_func = (nfsd4op_func)nfsd4_write, + .op_name = "OP_WRITE", }, [OP_RELEASE_LOCKOWNER] = { .op_func = (nfsd4op_func)nfsd4_release_lockowner, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_name = "OP_RELEASE_LOCKOWNER", }, }; +static inline char * +nfsd4_op_name(unsigned opnum) +{ + if (opnum < ARRAY_SIZE(nfsd4_ops)) + return nfsd4_ops[opnum].op_name; + return "unknown_operation"; +} + #define nfs4svc_decode_voidargs NULL #define nfs4svc_release_void NULL #define nfsd4_voidres nfsd4_voidargs diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8799b870818..1578d7a2667 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1173,6 +1173,24 @@ static inline int deny_valid(u32 x) return x <= NFS4_SHARE_DENY_BOTH; } +/* + * We store the NONE, READ, WRITE, and BOTH bits separately in the + * st_{access,deny}_bmap field of the stateid, in order to track not + * only what share bits are currently in force, but also what + * combinations of share bits previous opens have used. This allows us + * to enforce the recommendation of rfc 3530 14.2.19 that the server + * return an error if the client attempt to downgrade to a combination + * of share bits not explicable by closing some of its previous opens. + * + * XXX: This enforcement is actually incomplete, since we don't keep + * track of access/deny bit combinations; so, e.g., we allow: + * + * OPEN allow read, deny write + * OPEN allow both, deny none + * DOWNGRADE allow read, deny none + * + * which we should reject. + */ static void set_access(unsigned int *access, unsigned long bmap) { int i; @@ -1570,6 +1588,10 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta int err = get_write_access(inode); if (err) return nfserrno(err); + err = mnt_want_write(cur_fh->fh_export->ex_path.mnt); + if (err) + return nfserrno(err); + file_take_write(filp); } status = nfsd4_truncate(rqstp, cur_fh, open); if (status) { @@ -1579,8 +1601,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta } /* remember the open */ filp->f_mode |= open->op_share_access; - set_bit(open->op_share_access, &stp->st_access_bmap); - set_bit(open->op_share_deny, &stp->st_deny_bmap); + __set_bit(open->op_share_access, &stp->st_access_bmap); + __set_bit(open->op_share_deny, &stp->st_deny_bmap); return nfs_ok; } @@ -1722,9 +1744,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf /* Stateid was not found, this is a new OPEN */ int flags = 0; if (open->op_share_access & NFS4_SHARE_ACCESS_READ) - flags |= MAY_READ; + flags |= NFSD_MAY_READ; if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - flags |= MAY_WRITE; + flags |= NFSD_MAY_WRITE; status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags); if (status) goto out; @@ -2610,7 +2632,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_inval; if ((status = fh_verify(rqstp, &cstate->current_fh, - S_IFREG, MAY_LOCK))) { + S_IFREG, NFSD_MAY_LOCK))) { dprintk("NFSD: nfsd4_lock: permission denied!\n"); return status; } @@ -3249,12 +3271,14 @@ nfs4_state_shutdown(void) nfs4_unlock_state(); } +/* + * user_recovery_dirname is protected by the nfsd_mutex since it's only + * accessed when nfsd is starting. + */ static void nfs4_set_recdir(char *recdir) { - nfs4_lock_state(); strcpy(user_recovery_dirname, recdir); - nfs4_unlock_state(); } /* @@ -3278,6 +3302,12 @@ nfs4_reset_recoverydir(char *recdir) return status; } +char * +nfs4_recoverydir(void) +{ + return user_recovery_dirname; +} + /* * Called when leasetime is changed. * @@ -3286,11 +3316,12 @@ nfs4_reset_recoverydir(char *recdir) * we start to register any changes in lease time. If the administrator * really wants to change the lease time *now*, they can go ahead and bring * nfsd down and then back up again after changing the lease time. + * + * user_lease_time is protected by nfsd_mutex since it's only really accessed + * when nfsd is starting */ void nfs4_reset_lease(time_t leasetime) { - lock_kernel(); user_lease_time = leasetime; - unlock_kernel(); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c513bbdf2d3..14ba4d9b285 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -986,10 +986,74 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel } static __be32 +nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) +{ + return nfs_ok; +} + +static __be32 +nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) +{ + return nfserr_opnotsupp; +} + +typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); + +static nfsd4_dec nfsd4_dec_ops[] = { + [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, + [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, + [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, + [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, + [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, + [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, + [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, + [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, + [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, + [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, + [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, + [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, + [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, + [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, + [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, + [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, + [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_READ] = (nfsd4_dec)nfsd4_decode_read, + [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, + [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, + [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, + [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, + [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew, + [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, + [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, + [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid, + [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm, + [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, +}; + +struct nfsd4_minorversion_ops { + nfsd4_dec *decoders; + int nops; +}; + +static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { + [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, +}; + +static __be32 nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { DECODE_HEAD; struct nfsd4_op *op; + struct nfsd4_minorversion_ops *ops; int i; /* @@ -1019,6 +1083,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) } } + if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion)) + argp->opcnt = 0; + + ops = &nfsd4_minorversion[argp->minorversion]; for (i = 0; i < argp->opcnt; i++) { op = &argp->ops[i]; op->replay = NULL; @@ -1056,120 +1124,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) } op->opnum = ntohl(*argp->p++); - switch (op->opnum) { - case 2: /* Reserved operation */ - op->opnum = OP_ILLEGAL; - if (argp->minorversion == 0) - op->status = nfserr_op_illegal; - else - op->status = nfserr_minor_vers_mismatch; - break; - case OP_ACCESS: - op->status = nfsd4_decode_access(argp, &op->u.access); - break; - case OP_CLOSE: - op->status = nfsd4_decode_close(argp, &op->u.close); - break; - case OP_COMMIT: - op->status = nfsd4_decode_commit(argp, &op->u.commit); - break; - case OP_CREATE: - op->status = nfsd4_decode_create(argp, &op->u.create); - break; - case OP_DELEGRETURN: - op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn); - break; - case OP_GETATTR: - op->status = nfsd4_decode_getattr(argp, &op->u.getattr); - break; - case OP_GETFH: - op->status = nfs_ok; - break; - case OP_LINK: - op->status = nfsd4_decode_link(argp, &op->u.link); - break; - case OP_LOCK: - op->status = nfsd4_decode_lock(argp, &op->u.lock); - break; - case OP_LOCKT: - op->status = nfsd4_decode_lockt(argp, &op->u.lockt); - break; - case OP_LOCKU: - op->status = nfsd4_decode_locku(argp, &op->u.locku); - break; - case OP_LOOKUP: - op->status = nfsd4_decode_lookup(argp, &op->u.lookup); - break; - case OP_LOOKUPP: - op->status = nfs_ok; - break; - case OP_NVERIFY: - op->status = nfsd4_decode_verify(argp, &op->u.nverify); - break; - case OP_OPEN: - op->status = nfsd4_decode_open(argp, &op->u.open); - break; - case OP_OPEN_CONFIRM: - op->status = nfsd4_decode_open_confirm(argp, &op->u.open_confirm); - break; - case OP_OPEN_DOWNGRADE: - op->status = nfsd4_decode_open_downgrade(argp, &op->u.open_downgrade); - break; - case OP_PUTFH: - op->status = nfsd4_decode_putfh(argp, &op->u.putfh); - break; - case OP_PUTROOTFH: - op->status = nfs_ok; - break; - case OP_READ: - op->status = nfsd4_decode_read(argp, &op->u.read); - break; - case OP_READDIR: - op->status = nfsd4_decode_readdir(argp, &op->u.readdir); - break; - case OP_READLINK: - op->status = nfs_ok; - break; - case OP_REMOVE: - op->status = nfsd4_decode_remove(argp, &op->u.remove); - break; - case OP_RENAME: - op->status = nfsd4_decode_rename(argp, &op->u.rename); - break; - case OP_RESTOREFH: - op->status = nfs_ok; - break; - case OP_RENEW: - op->status = nfsd4_decode_renew(argp, &op->u.renew); - break; - case OP_SAVEFH: - op->status = nfs_ok; - break; - case OP_SECINFO: - op->status = nfsd4_decode_secinfo(argp, &op->u.secinfo); - break; - case OP_SETATTR: - op->status = nfsd4_decode_setattr(argp, &op->u.setattr); - break; - case OP_SETCLIENTID: - op->status = nfsd4_decode_setclientid(argp, &op->u.setclientid); - break; - case OP_SETCLIENTID_CONFIRM: - op->status = nfsd4_decode_setclientid_confirm(argp, &op->u.setclientid_confirm); - break; - case OP_VERIFY: - op->status = nfsd4_decode_verify(argp, &op->u.verify); - break; - case OP_WRITE: - op->status = nfsd4_decode_write(argp, &op->u.write); - break; - case OP_RELEASE_LOCKOWNER: - op->status = nfsd4_decode_release_lockowner(argp, &op->u.release_lockowner); - break; - default: + if (op->opnum >= OP_ACCESS && op->opnum < ops->nops) + op->status = ops->decoders[op->opnum](argp, &op->u); + else { op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; - break; } if (op->status) { @@ -1201,11 +1160,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) *p++ = htonl((u32)((n) >> 32)); \ *p++ = htonl((u32)(n)); \ } while (0) -#define WRITEMEM(ptr,nbytes) do { \ +#define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \ *(p + XDR_QUADLEN(nbytes) -1) = 0; \ memcpy(p, ptr, nbytes); \ p += XDR_QUADLEN(nbytes); \ -} while (0) +}} while (0) #define WRITECINFO(c) do { \ *p++ = htonl(c.atomic); \ *p++ = htonl(c.before_ctime_sec); \ @@ -1991,7 +1950,7 @@ fail: return -EINVAL; } -static void +static __be32 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) { ENCODE_HEAD; @@ -2002,9 +1961,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ WRITE32(access->ac_resp_access); ADJUST_ARGS(); } + return nfserr; } -static void +static __be32 nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) { ENCODE_SEQID_OP_HEAD; @@ -2016,10 +1976,11 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c ADJUST_ARGS(); } ENCODE_SEQID_OP_TAIL(close->cl_stateowner); + return nfserr; } -static void +static __be32 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) { ENCODE_HEAD; @@ -2029,9 +1990,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ WRITEMEM(commit->co_verf.data, 8); ADJUST_ARGS(); } + return nfserr; } -static void +static __be32 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) { ENCODE_HEAD; @@ -2044,6 +2006,7 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ WRITE32(create->cr_bmval[1]); ADJUST_ARGS(); } + return nfserr; } static __be32 @@ -2064,9 +2027,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return nfserr; } -static void -nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh *fhp) +static __be32 +nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) { + struct svc_fh *fhp = *fhpp; unsigned int len; ENCODE_HEAD; @@ -2077,6 +2041,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh WRITEMEM(&fhp->fh_handle.fh_base, len); ADJUST_ARGS(); } + return nfserr; } /* @@ -2104,7 +2069,7 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie ADJUST_ARGS(); } -static void +static __be32 nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) { ENCODE_SEQID_OP_HEAD; @@ -2118,16 +2083,18 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo nfsd4_encode_lock_denied(resp, &lock->lk_denied); ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); + return nfserr; } -static void +static __be32 nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) { if (nfserr == nfserr_denied) nfsd4_encode_lock_denied(resp, &lockt->lt_denied); + return nfserr; } -static void +static __be32 nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) { ENCODE_SEQID_OP_HEAD; @@ -2140,10 +2107,11 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l } ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); + return nfserr; } -static void +static __be32 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) { ENCODE_HEAD; @@ -2153,10 +2121,11 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li WRITECINFO(link->li_cinfo); ADJUST_ARGS(); } + return nfserr; } -static void +static __be32 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { ENCODE_SEQID_OP_HEAD; @@ -2219,9 +2188,10 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op /* XXX save filehandle here */ out: ENCODE_SEQID_OP_TAIL(open->op_stateowner); + return nfserr; } -static void +static __be32 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) { ENCODE_SEQID_OP_HEAD; @@ -2234,9 +2204,10 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct } ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); + return nfserr; } -static void +static __be32 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) { ENCODE_SEQID_OP_HEAD; @@ -2249,6 +2220,7 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc } ENCODE_SEQID_OP_TAIL(od->od_stateowner); + return nfserr; } static __be32 @@ -2443,7 +2415,7 @@ err_no_verf: return nfserr; } -static void +static __be32 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) { ENCODE_HEAD; @@ -2453,9 +2425,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ WRITECINFO(remove->rm_cinfo); ADJUST_ARGS(); } + return nfserr; } -static void +static __be32 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) { ENCODE_HEAD; @@ -2466,9 +2439,10 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ WRITECINFO(rename->rn_tinfo); ADJUST_ARGS(); } + return nfserr; } -static void +static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo *secinfo) { @@ -2532,13 +2506,14 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, out: if (exp) exp_put(exp); + return nfserr; } /* * The SETATTR encode routine is special -- it always encodes a bitmap, * regardless of the error status. */ -static void +static __be32 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) { ENCODE_HEAD; @@ -2555,9 +2530,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 WRITE32(setattr->sa_bmval[1]); } ADJUST_ARGS(); + return nfserr; } -static void +static __be32 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) { ENCODE_HEAD; @@ -2574,9 +2550,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n WRITE32(0); ADJUST_ARGS(); } + return nfserr; } -static void +static __be32 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) { ENCODE_HEAD; @@ -2588,8 +2565,56 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w WRITEMEM(write->wr_verifier.data, 8); ADJUST_ARGS(); } + return nfserr; } +static __be32 +nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) +{ + return nfserr; +} + +typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); + +static nfsd4_enc nfsd4_enc_ops[] = { + [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, + [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, + [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit, + [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create, + [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr, + [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh, + [OP_LINK] = (nfsd4_enc)nfsd4_encode_link, + [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock, + [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt, + [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku, + [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop, + [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open, + [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm, + [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade, + [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_READ] = (nfsd4_enc)nfsd4_encode_read, + [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir, + [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink, + [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove, + [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename, + [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop, + [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo, + [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr, + [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid, + [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop, + [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, + [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, +}; + void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { @@ -2601,101 +2626,12 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) statp = p++; /* to be backfilled at the end */ ADJUST_ARGS(); - switch (op->opnum) { - case OP_ACCESS: - nfsd4_encode_access(resp, op->status, &op->u.access); - break; - case OP_CLOSE: - nfsd4_encode_close(resp, op->status, &op->u.close); - break; - case OP_COMMIT: - nfsd4_encode_commit(resp, op->status, &op->u.commit); - break; - case OP_CREATE: - nfsd4_encode_create(resp, op->status, &op->u.create); - break; - case OP_DELEGRETURN: - break; - case OP_GETATTR: - op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr); - break; - case OP_GETFH: - nfsd4_encode_getfh(resp, op->status, op->u.getfh); - break; - case OP_LINK: - nfsd4_encode_link(resp, op->status, &op->u.link); - break; - case OP_LOCK: - nfsd4_encode_lock(resp, op->status, &op->u.lock); - break; - case OP_LOCKT: - nfsd4_encode_lockt(resp, op->status, &op->u.lockt); - break; - case OP_LOCKU: - nfsd4_encode_locku(resp, op->status, &op->u.locku); - break; - case OP_LOOKUP: - break; - case OP_LOOKUPP: - break; - case OP_NVERIFY: - break; - case OP_OPEN: - nfsd4_encode_open(resp, op->status, &op->u.open); - break; - case OP_OPEN_CONFIRM: - nfsd4_encode_open_confirm(resp, op->status, &op->u.open_confirm); - break; - case OP_OPEN_DOWNGRADE: - nfsd4_encode_open_downgrade(resp, op->status, &op->u.open_downgrade); - break; - case OP_PUTFH: - break; - case OP_PUTROOTFH: - break; - case OP_READ: - op->status = nfsd4_encode_read(resp, op->status, &op->u.read); - break; - case OP_READDIR: - op->status = nfsd4_encode_readdir(resp, op->status, &op->u.readdir); - break; - case OP_READLINK: - op->status = nfsd4_encode_readlink(resp, op->status, &op->u.readlink); - break; - case OP_REMOVE: - nfsd4_encode_remove(resp, op->status, &op->u.remove); - break; - case OP_RENAME: - nfsd4_encode_rename(resp, op->status, &op->u.rename); - break; - case OP_RENEW: - break; - case OP_RESTOREFH: - break; - case OP_SAVEFH: - break; - case OP_SECINFO: - nfsd4_encode_secinfo(resp, op->status, &op->u.secinfo); - break; - case OP_SETATTR: - nfsd4_encode_setattr(resp, op->status, &op->u.setattr); - break; - case OP_SETCLIENTID: - nfsd4_encode_setclientid(resp, op->status, &op->u.setclientid); - break; - case OP_SETCLIENTID_CONFIRM: - break; - case OP_VERIFY: - break; - case OP_WRITE: - nfsd4_encode_write(resp, op->status, &op->u.write); - break; - case OP_RELEASE_LOCKOWNER: - break; - default: - break; - } - + if (op->opnum == OP_ILLEGAL) + goto status; + BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || + !nfsd4_enc_ops[op->opnum]); + op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); +status: /* * Note: We write the status directly, instead of using WRITE32(), * since it is already in network byte order. diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 5ac00c4fee9..1955a2702e6 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -310,9 +310,12 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size) { - __be32 server_ip; - char *fo_path, c; + struct sockaddr_in sin = { + .sin_family = AF_INET, + }; int b1, b2, b3, b4; + char c; + char *fo_path; /* sanity check */ if (size == 0) @@ -326,11 +329,13 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size) return -EINVAL; /* get ipv4 address */ - if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4) + if (sscanf(fo_path, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) != 4) return -EINVAL; - server_ip = htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); + if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255) + return -EINVAL; + sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4); - return nlmsvc_unlock_all_by_ip(server_ip); + return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin); } static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size) @@ -450,22 +455,26 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) int i; int rv; int len; - int npools = nfsd_nrpools(); + int npools; int *nthreads; + mutex_lock(&nfsd_mutex); + npools = nfsd_nrpools(); if (npools == 0) { /* * NFS is shut down. The admin can start it by * writing to the threads file but NOT the pool_threads * file, sorry. Report zero threads. */ + mutex_unlock(&nfsd_mutex); strcpy(buf, "0\n"); return strlen(buf); } nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL); + rv = -ENOMEM; if (nthreads == NULL) - return -ENOMEM; + goto out_free; if (size > 0) { for (i = 0; i < npools; i++) { @@ -496,14 +505,16 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) mesg += len; } + mutex_unlock(&nfsd_mutex); return (mesg-buf); out_free: kfree(nthreads); + mutex_unlock(&nfsd_mutex); return rv; } -static ssize_t write_versions(struct file *file, char *buf, size_t size) +static ssize_t __write_versions(struct file *file, char *buf, size_t size) { /* * Format: @@ -566,14 +577,23 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) return len; } -static ssize_t write_ports(struct file *file, char *buf, size_t size) +static ssize_t write_versions(struct file *file, char *buf, size_t size) +{ + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __write_versions(file, buf, size); + mutex_unlock(&nfsd_mutex); + return rv; +} + +static ssize_t __write_ports(struct file *file, char *buf, size_t size) { if (size == 0) { int len = 0; - lock_kernel(); + if (nfsd_serv) len = svc_xprt_names(nfsd_serv, buf, 0); - unlock_kernel(); return len; } /* Either a single 'fd' number is written, in which @@ -603,9 +623,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) /* Decrease the count, but don't shutdown the * the service */ - lock_kernel(); nfsd_serv->sv_nrthreads--; - unlock_kernel(); } return err < 0 ? err : 0; } @@ -614,10 +632,8 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) int len = 0; if (!toclose) return -ENOMEM; - lock_kernel(); if (nfsd_serv) len = svc_sock_names(buf, nfsd_serv, toclose); - unlock_kernel(); if (len >= 0) lockd_down(); kfree(toclose); @@ -655,7 +671,6 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) { if (port == 0) return -EINVAL; - lock_kernel(); if (nfsd_serv) { xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); @@ -666,13 +681,23 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) } else err = -ENOTCONN; } - unlock_kernel(); return err < 0 ? err : 0; } } return -EINVAL; } +static ssize_t write_ports(struct file *file, char *buf, size_t size) +{ + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __write_ports(file, buf, size); + mutex_unlock(&nfsd_mutex); + return rv; +} + + int nfsd_max_blksize; static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) @@ -691,13 +716,13 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) if (bsize > NFSSVC_MAXBLKSIZE) bsize = NFSSVC_MAXBLKSIZE; bsize &= ~(1024-1); - lock_kernel(); + mutex_lock(&nfsd_mutex); if (nfsd_serv && nfsd_serv->sv_nrthreads) { - unlock_kernel(); + mutex_unlock(&nfsd_mutex); return -EBUSY; } nfsd_max_blksize = bsize; - unlock_kernel(); + mutex_unlock(&nfsd_mutex); } return sprintf(buf, "%d\n", nfsd_max_blksize); } @@ -705,16 +730,17 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) #ifdef CONFIG_NFSD_V4 extern time_t nfs4_leasetime(void); -static ssize_t write_leasetime(struct file *file, char *buf, size_t size) +static ssize_t __write_leasetime(struct file *file, char *buf, size_t size) { /* if size > 10 seconds, call * nfs4_reset_lease() then write out the new lease (seconds) as reply */ char *mesg = buf; - int rv; + int rv, lease; if (size > 0) { - int lease; + if (nfsd_serv) + return -EBUSY; rv = get_int(&mesg, &lease); if (rv) return rv; @@ -726,24 +752,52 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) return strlen(buf); } -static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) +static ssize_t write_leasetime(struct file *file, char *buf, size_t size) +{ + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __write_leasetime(file, buf, size); + mutex_unlock(&nfsd_mutex); + return rv; +} + +extern char *nfs4_recoverydir(void); + +static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) { char *mesg = buf; char *recdir; int len, status; - if (size == 0 || size > PATH_MAX || buf[size-1] != '\n') - return -EINVAL; - buf[size-1] = 0; + if (size > 0) { + if (nfsd_serv) + return -EBUSY; + if (size > PATH_MAX || buf[size-1] != '\n') + return -EINVAL; + buf[size-1] = 0; - recdir = mesg; - len = qword_get(&mesg, recdir, size); - if (len <= 0) - return -EINVAL; + recdir = mesg; + len = qword_get(&mesg, recdir, size); + if (len <= 0) + return -EINVAL; - status = nfs4_reset_recoverydir(recdir); + status = nfs4_reset_recoverydir(recdir); + } + sprintf(buf, "%s\n", nfs4_recoverydir()); return strlen(buf); } + +static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) +{ + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __write_recoverydir(file, buf, size); + mutex_unlock(&nfsd_mutex); + return rv; +} + #endif /*----------------------------------------------------------------------------*/ diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 100ae564116..f45451eb1e3 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -176,9 +176,24 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (IS_ERR(exp)) return nfserrno(PTR_ERR(exp)); - error = nfsd_setuser_and_check_port(rqstp, exp); - if (error) - goto out; + if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) { + /* Elevate privileges so that the lack of 'r' or 'x' + * permission on some parent directory will + * not stop exportfs_decode_fh from being able + * to reconnect a directory into the dentry cache. + * The same problem can affect "SUBTREECHECK" exports, + * but as nfsd_acceptable depends on correct + * access control settings being in effect, we cannot + * fix that case easily. + */ + current->cap_effective = + cap_raise_nfsd_set(current->cap_effective, + current->cap_permitted); + } else { + error = nfsd_setuser_and_check_port(rqstp, exp); + if (error) + goto out; + } /* * Look up the dentry using the NFS file handle. @@ -215,6 +230,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) goto out; } + if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) { + error = nfsd_setuser_and_check_port(rqstp, exp); + if (error) { + dput(dentry); + goto out; + } + } + if (S_ISDIR(dentry->d_inode->i_mode) && (dentry->d_flags & DCACHE_DISCONNECTED)) { printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", @@ -279,7 +302,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) if (error) goto out; - if (!(access & MAY_LOCK)) { + if (!(access & NFSD_MAY_LOCK)) { /* * pseudoflavor restrictions are not enforced on NLM, * which clients virtually always use auth_sys for, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 6cfc96a1248..0766f95d236 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -65,7 +65,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); return nfsd_return_attrs(nfserr, resp); } @@ -215,11 +215,11 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, SVCFH_fmt(dirfhp), argp->len, argp->name); /* First verify the parent file handle */ - nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_EXEC); + nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC); if (nfserr) goto done; /* must fh_put dirfhp even on error */ - /* Check for MAY_WRITE in nfsd_create if necessary */ + /* Check for NFSD_MAY_WRITE in nfsd_create if necessary */ nfserr = nfserr_acces; if (!argp->len) @@ -281,7 +281,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, nfserr = nfsd_permission(rqstp, newfhp->fh_export, newfhp->fh_dentry, - MAY_WRITE|MAY_LOCAL_ACCESS); + NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS); if (nfserr && nfserr != nfserr_rofs) goto out_unlock; } @@ -614,6 +614,7 @@ nfserrno (int errno) #endif { nfserr_stale, -ESTALE }, { nfserr_jukebox, -ETIMEDOUT }, + { nfserr_jukebox, -ERESTARTSYS }, { nfserr_dropit, -EAGAIN }, { nfserr_dropit, -ENOMEM }, { nfserr_badname, -ESRCH }, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 941041f4b13..80292ff5e92 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -21,6 +21,7 @@ #include <linux/smp_lock.h> #include <linux/freezer.h> #include <linux/fs_struct.h> +#include <linux/kthread.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/stats.h> @@ -36,28 +37,38 @@ #define NFSDDBG_FACILITY NFSDDBG_SVC -/* these signals will be delivered to an nfsd thread - * when handling a request - */ -#define ALLOWED_SIGS (sigmask(SIGKILL)) -/* these signals will be delivered to an nfsd thread - * when not handling a request. i.e. when waiting - */ -#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT)) -/* if the last thread dies with SIGHUP, then the exports table is - * left unchanged ( like 2.4-{0-9} ). Any other signal will clear - * the exports table (like 2.2). - */ -#define SIG_NOCLEAN SIGHUP - extern struct svc_program nfsd_program; -static void nfsd(struct svc_rqst *rqstp); +static int nfsd(void *vrqstp); struct timeval nfssvc_boot; - struct svc_serv *nfsd_serv; static atomic_t nfsd_busy; static unsigned long nfsd_last_call; static DEFINE_SPINLOCK(nfsd_call_lock); +/* + * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members + * of the svc_serv struct. In particular, ->sv_nrthreads but also to some + * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt + * + * If (out side the lock) nfsd_serv is non-NULL, then it must point to a + * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number + * of nfsd threads must exist and each must listed in ->sp_all_threads in each + * entry of ->sv_pools[]. + * + * Transitions of the thread count between zero and non-zero are of particular + * interest since the svc_serv needs to be created and initialized at that + * point, or freed. + * + * Finally, the nfsd_mutex also protects some of the global variables that are + * accessed when nfsd starts and that are settable via the write_* routines in + * nfsctl.c. In particular: + * + * user_recovery_dirname + * user_lease_time + * nfsd_versions + */ +DEFINE_MUTEX(nfsd_mutex); +struct svc_serv *nfsd_serv; + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; static struct svc_version * nfsd_acl_version[] = { @@ -145,13 +156,14 @@ int nfsd_vers(int vers, enum vers_op change) int nfsd_nrthreads(void) { - if (nfsd_serv == NULL) - return 0; - else - return nfsd_serv->sv_nrthreads; + int rv = 0; + mutex_lock(&nfsd_mutex); + if (nfsd_serv) + rv = nfsd_serv->sv_nrthreads; + mutex_unlock(&nfsd_mutex); + return rv; } -static int killsig; /* signal that was used to kill last nfsd */ static void nfsd_last_thread(struct svc_serv *serv) { /* When last nfsd thread exits we need to do some clean-up */ @@ -162,11 +174,9 @@ static void nfsd_last_thread(struct svc_serv *serv) nfsd_racache_shutdown(); nfs4_state_shutdown(); - printk(KERN_WARNING "nfsd: last server has exited\n"); - if (killsig != SIG_NOCLEAN) { - printk(KERN_WARNING "nfsd: unexporting all filesystems\n"); - nfsd_export_flush(); - } + printk(KERN_WARNING "nfsd: last server has exited, flushing export " + "cache\n"); + nfsd_export_flush(); } void nfsd_reset_versions(void) @@ -190,13 +200,14 @@ void nfsd_reset_versions(void) } } + int nfsd_create_serv(void) { int err = 0; - lock_kernel(); + + WARN_ON(!mutex_is_locked(&nfsd_mutex)); if (nfsd_serv) { svc_get(nfsd_serv); - unlock_kernel(); return 0; } if (nfsd_max_blksize == 0) { @@ -217,13 +228,11 @@ int nfsd_create_serv(void) } atomic_set(&nfsd_busy, 0); - nfsd_serv = svc_create_pooled(&nfsd_program, - nfsd_max_blksize, - nfsd_last_thread, - nfsd, SIG_NOCLEAN, THIS_MODULE); + nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, + nfsd_last_thread, nfsd, THIS_MODULE); if (nfsd_serv == NULL) err = -ENOMEM; - unlock_kernel(); + do_gettimeofday(&nfssvc_boot); /* record boot time */ return err; } @@ -282,6 +291,8 @@ int nfsd_set_nrthreads(int n, int *nthreads) int tot = 0; int err = 0; + WARN_ON(!mutex_is_locked(&nfsd_mutex)); + if (nfsd_serv == NULL || n <= 0) return 0; @@ -316,7 +327,6 @@ int nfsd_set_nrthreads(int n, int *nthreads) nthreads[0] = 1; /* apply the new numbers */ - lock_kernel(); svc_get(nfsd_serv); for (i = 0; i < n; i++) { err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i], @@ -325,7 +335,6 @@ int nfsd_set_nrthreads(int n, int *nthreads) break; } svc_destroy(nfsd_serv); - unlock_kernel(); return err; } @@ -334,8 +343,8 @@ int nfsd_svc(unsigned short port, int nrservs) { int error; - - lock_kernel(); + + mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); error = -EINVAL; if (nrservs <= 0) @@ -363,7 +372,7 @@ nfsd_svc(unsigned short port, int nrservs) failure: svc_destroy(nfsd_serv); /* Release server */ out: - unlock_kernel(); + mutex_unlock(&nfsd_mutex); return error; } @@ -391,18 +400,17 @@ update_thread_usage(int busy_threads) /* * This is the NFS server kernel thread */ -static void -nfsd(struct svc_rqst *rqstp) +static int +nfsd(void *vrqstp) { + struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; struct fs_struct *fsp; - int err; - sigset_t shutdown_mask, allowed_mask; + int err, preverr = 0; /* Lock module and set up kernel thread */ - lock_kernel(); - daemonize("nfsd"); + mutex_lock(&nfsd_mutex); - /* After daemonize() this kernel thread shares current->fs + /* At this point, the thread shares current->fs * with the init process. We need to create files with a * umask of 0 instead of init's umask. */ fsp = copy_fs_struct(current->fs); @@ -414,14 +422,17 @@ nfsd(struct svc_rqst *rqstp) current->fs = fsp; current->fs->umask = 0; - siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS); - siginitsetinv(&allowed_mask, ALLOWED_SIGS); + /* + * thread is spawned with all signals set to SIG_IGN, re-enable + * the ones that will bring down the thread + */ + allow_signal(SIGKILL); + allow_signal(SIGHUP); + allow_signal(SIGINT); + allow_signal(SIGQUIT); nfsdstats.th_cnt++; - - rqstp->rq_task = current; - - unlock_kernel(); + mutex_unlock(&nfsd_mutex); /* * We want less throttling in balance_dirty_pages() so that nfs to @@ -435,26 +446,30 @@ nfsd(struct svc_rqst *rqstp) * The main request loop */ for (;;) { - /* Block all but the shutdown signals */ - sigprocmask(SIG_SETMASK, &shutdown_mask, NULL); - /* * Find a socket with data available and call its * recvfrom routine. */ while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN) ; - if (err < 0) + if (err == -EINTR) break; + else if (err < 0) { + if (err != preverr) { + printk(KERN_WARNING "%s: unexpected error " + "from svc_recv (%d)\n", __func__, -err); + preverr = err; + } + schedule_timeout_uninterruptible(HZ); + continue; + } + update_thread_usage(atomic_read(&nfsd_busy)); atomic_inc(&nfsd_busy); /* Lock the export hash tables for reading. */ exp_readlock(); - /* Process request with signals blocked. */ - sigprocmask(SIG_SETMASK, &allowed_mask, NULL); - svc_process(rqstp); /* Unlock export hash tables */ @@ -463,22 +478,10 @@ nfsd(struct svc_rqst *rqstp) atomic_dec(&nfsd_busy); } - if (err != -EINTR) { - printk(KERN_WARNING "nfsd: terminating on error %d\n", -err); - } else { - unsigned int signo; - - for (signo = 1; signo <= _NSIG; signo++) - if (sigismember(¤t->pending.signal, signo) && - !sigismember(¤t->blocked, signo)) - break; - killsig = signo; - } /* Clear signals before calling svc_exit_thread() */ flush_signals(current); - lock_kernel(); - + mutex_lock(&nfsd_mutex); nfsdstats.th_cnt --; out: @@ -486,8 +489,9 @@ out: svc_exit_thread(rqstp); /* Release module */ - unlock_kernel(); + mutex_unlock(&nfsd_mutex); module_put_and_exit(0); + return 0; } static __be32 map_new_errors(u32 vers, __be32 nfserr) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a3a291f771f..0f4481e0502 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -144,7 +144,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); /* Obtain dentry and export. */ - err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC); + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); if (err) return err; @@ -262,14 +262,14 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, { struct dentry *dentry; struct inode *inode; - int accmode = MAY_SATTR; + int accmode = NFSD_MAY_SATTR; int ftype = 0; __be32 err; int host_err; int size_change = 0; if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) - accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE; + accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; if (iap->ia_valid & ATTR_SIZE) ftype = S_IFREG; @@ -331,7 +331,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, */ if (iap->ia_valid & ATTR_SIZE) { if (iap->ia_size < inode->i_size) { - err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE); + err = nfsd_permission(rqstp, fhp->fh_export, dentry, + NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE); if (err) goto out; } @@ -462,7 +463,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int flags = 0; /* Get inode */ - error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR); + error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); if (error) return error; @@ -563,20 +564,20 @@ struct accessmap { int how; }; static struct accessmap nfs3_regaccess[] = { - { NFS3_ACCESS_READ, MAY_READ }, - { NFS3_ACCESS_EXECUTE, MAY_EXEC }, - { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_TRUNC }, - { NFS3_ACCESS_EXTEND, MAY_WRITE }, + { NFS3_ACCESS_READ, NFSD_MAY_READ }, + { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC }, + { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC }, + { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE }, { 0, 0 } }; static struct accessmap nfs3_diraccess[] = { - { NFS3_ACCESS_READ, MAY_READ }, - { NFS3_ACCESS_LOOKUP, MAY_EXEC }, - { NFS3_ACCESS_MODIFY, MAY_EXEC|MAY_WRITE|MAY_TRUNC }, - { NFS3_ACCESS_EXTEND, MAY_EXEC|MAY_WRITE }, - { NFS3_ACCESS_DELETE, MAY_REMOVE }, + { NFS3_ACCESS_READ, NFSD_MAY_READ }, + { NFS3_ACCESS_LOOKUP, NFSD_MAY_EXEC }, + { NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC}, + { NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE }, + { NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE }, { 0, 0 } }; @@ -589,10 +590,10 @@ static struct accessmap nfs3_anyaccess[] = { * mainly at mode bits, and we make sure to ignore read-only * filesystem checks */ - { NFS3_ACCESS_READ, MAY_READ }, - { NFS3_ACCESS_EXECUTE, MAY_EXEC }, - { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_LOCAL_ACCESS }, - { NFS3_ACCESS_EXTEND, MAY_WRITE|MAY_LOCAL_ACCESS }, + { NFS3_ACCESS_READ, NFSD_MAY_READ }, + { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC }, + { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS }, + { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS }, { 0, 0 } }; @@ -606,7 +607,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor u32 query, result = 0, sresult = 0; __be32 error; - error = fh_verify(rqstp, fhp, 0, MAY_NOP); + error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); if (error) goto out; @@ -678,7 +679,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, * and (hopefully) checked permission - so allow OWNER_OVERRIDE * in case a chmod has now revoked permission. */ - err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE); + err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE); if (err) goto out; @@ -689,7 +690,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, * or any access when mandatory locking enabled */ err = nfserr_perm; - if (IS_APPEND(inode) && (access & MAY_WRITE)) + if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE)) goto out; /* * We must ignore files (but only files) which might have mandatory @@ -706,14 +707,14 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, * Check to see if there are any leases on this file. * This may block while leases are broken. */ - host_err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0)); + host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? FMODE_WRITE : 0)); if (host_err == -EWOULDBLOCK) host_err = -ETIMEDOUT; if (host_err) /* NOMEM or WOULDBLOCK */ goto out_nfserr; - if (access & MAY_WRITE) { - if (access & MAY_READ) + if (access & NFSD_MAY_WRITE) { + if (access & NFSD_MAY_READ) flags = O_RDWR|O_LARGEFILE; else flags = O_WRONLY|O_LARGEFILE; @@ -1069,12 +1070,12 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (file) { err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - MAY_READ|MAY_OWNER_OVERRIDE); + NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); if (err) goto out; err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); } else { - err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file); + err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); if (err) goto out; err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); @@ -1098,13 +1099,13 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (file) { err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - MAY_WRITE|MAY_OWNER_OVERRIDE); + NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE); if (err) goto out; err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stablep); } else { - err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file); + err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); if (err) goto out; @@ -1136,7 +1137,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, if ((u64)count > ~(u64)offset) return nfserr_inval; - if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0) + err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); + if (err) return err; if (EX_ISSYNC(fhp->fh_export)) { if (file->f_op && file->f_op->fsync) { @@ -1197,7 +1199,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (isdotent(fname, flen)) goto out; - err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; @@ -1248,36 +1250,34 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, iap->ia_mode = 0; iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type; + err = nfserr_inval; + if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) { + printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n", + type); + goto out; + } + + host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); + if (host_err) + goto out_nfserr; + /* * Get the dir op function pointer. */ err = 0; switch (type) { case S_IFREG: - host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); - if (host_err) - goto out_nfserr; host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); break; case S_IFDIR: - host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); - if (host_err) - goto out_nfserr; host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); break; case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: - host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); - if (host_err) - goto out_nfserr; host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); break; - default: - printk("nfsd: bad file type %o in nfsd_create\n", type); - host_err = -EINVAL; - goto out_nfserr; } if (host_err < 0) { mnt_drop_write(fhp->fh_export->ex_path.mnt); @@ -1289,7 +1289,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, write_inode_now(dchild->d_inode, 1); } - err2 = nfsd_create_setattr(rqstp, resfhp, iap); if (err2) err = err2; @@ -1334,7 +1333,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; if (!(iap->ia_valid & ATTR_MODE)) iap->ia_mode = 0; - err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; @@ -1471,7 +1470,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) __be32 err; int host_err; - err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP); + err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP); if (err) goto out; @@ -1526,7 +1525,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, if (isdotent(fname, flen)) goto out; - err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; fh_lock(fhp); @@ -1591,10 +1590,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, __be32 err; int host_err; - err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE); + err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; - err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP); + err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP); if (err) goto out; @@ -1661,10 +1660,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, __be32 err; int host_err; - err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE); + err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); if (err) goto out; - err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE); + err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; @@ -1768,7 +1767,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, err = nfserr_acces; if (!flen || isdotent(fname, flen)) goto out; - err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE); + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE); if (err) goto out; @@ -1834,7 +1833,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, struct file *file; loff_t offset = *offsetp; - err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file); + err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file); if (err) goto out; @@ -1875,7 +1874,7 @@ out: __be32 nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) { - __be32 err = fh_verify(rqstp, fhp, 0, MAY_NOP); + __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); if (!err && vfs_statfs(fhp->fh_dentry,stat)) err = nfserr_io; return err; @@ -1896,18 +1895,18 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, struct inode *inode = dentry->d_inode; int err; - if (acc == MAY_NOP) + if (acc == NFSD_MAY_NOP) return 0; #if 0 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", acc, - (acc & MAY_READ)? " read" : "", - (acc & MAY_WRITE)? " write" : "", - (acc & MAY_EXEC)? " exec" : "", - (acc & MAY_SATTR)? " sattr" : "", - (acc & MAY_TRUNC)? " trunc" : "", - (acc & MAY_LOCK)? " lock" : "", - (acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "", + (acc & NFSD_MAY_READ)? " read" : "", + (acc & NFSD_MAY_WRITE)? " write" : "", + (acc & NFSD_MAY_EXEC)? " exec" : "", + (acc & NFSD_MAY_SATTR)? " sattr" : "", + (acc & NFSD_MAY_TRUNC)? " trunc" : "", + (acc & NFSD_MAY_LOCK)? " lock" : "", + (acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "", inode->i_mode, IS_IMMUTABLE(inode)? " immut" : "", IS_APPEND(inode)? " append" : "", @@ -1920,18 +1919,18 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, * system. But if it is IRIX doing check on write-access for a * device special file, we ignore rofs. */ - if (!(acc & MAY_LOCAL_ACCESS)) - if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { + if (!(acc & NFSD_MAY_LOCAL_ACCESS)) + if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) { if (exp_rdonly(rqstp, exp) || __mnt_is_readonly(exp->ex_path.mnt)) return nfserr_rofs; - if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) + if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode)) return nfserr_perm; } - if ((acc & MAY_TRUNC) && IS_APPEND(inode)) + if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode)) return nfserr_perm; - if (acc & MAY_LOCK) { + if (acc & NFSD_MAY_LOCK) { /* If we cannot rely on authentication in NLM requests, * just allow locks, otherwise require read permission, or * ownership @@ -1939,7 +1938,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, if (exp->ex_flags & NFSEXP_NOAUTHNLM) return 0; else - acc = MAY_READ | MAY_OWNER_OVERRIDE; + acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE; } /* * The file owner always gets access permission for accesses that @@ -1955,15 +1954,16 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, * We must trust the client to do permission checking - using "ACCESS" * with NFSv3. */ - if ((acc & MAY_OWNER_OVERRIDE) && + if ((acc & NFSD_MAY_OWNER_OVERRIDE) && inode->i_uid == current->fsuid) return 0; + /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL); /* Allow read access to binaries even when mode 111 */ if (err == -EACCES && S_ISREG(inode->i_mode) && - acc == (MAY_READ | MAY_OWNER_OVERRIDE)) + acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) err = permission(inode, MAY_EXEC, NULL); return err? nfserrno(err) : 0; diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 443d108211a..7dce1612553 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1489,31 +1489,22 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group : NULL; } -static int o2hb_heartbeat_group_make_item(struct config_group *group, - const char *name, - struct config_item **new_item) +static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, + const char *name) { struct o2hb_region *reg = NULL; - int ret = 0; reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); - if (reg == NULL) { - ret = -ENOMEM; - goto out; - } + if (reg == NULL) + return ERR_PTR(-ENOMEM); config_item_init_type_name(®->hr_item, name, &o2hb_region_type); - *new_item = ®->hr_item; - spin_lock(&o2hb_live_lock); list_add_tail(®->hr_all_item, &o2hb_all_regions); spin_unlock(&o2hb_live_lock); -out: - if (ret) - kfree(reg); - return ret; + return ®->hr_item; } static void o2hb_heartbeat_group_drop_item(struct config_group *group, diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index b364b7052e4..816a3f61330 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -644,35 +644,23 @@ out: return ret; } -static int o2nm_node_group_make_item(struct config_group *group, - const char *name, - struct config_item **new_item) +static struct config_item *o2nm_node_group_make_item(struct config_group *group, + const char *name) { struct o2nm_node *node = NULL; - int ret = 0; - if (strlen(name) > O2NM_MAX_NAME_LEN) { - ret = -ENAMETOOLONG; - goto out; - } + if (strlen(name) > O2NM_MAX_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); node = kzalloc(sizeof(struct o2nm_node), GFP_KERNEL); - if (node == NULL) { - ret = -ENOMEM; - goto out; - } + if (node == NULL) + return ERR_PTR(-ENOMEM); strcpy(node->nd_name, name); /* use item.ci_namebuf instead? */ config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); spin_lock_init(&node->nd_lock); - *new_item = &node->nd_item; - -out: - if (ret) - kfree(node); - - return ret; + return &node->nd_item; } static void o2nm_node_group_drop_item(struct config_group *group, @@ -756,31 +744,25 @@ static struct o2nm_cluster_group *to_o2nm_cluster_group(struct config_group *gro } #endif -static int o2nm_cluster_group_make_group(struct config_group *group, - const char *name, - struct config_group **new_group) +static struct config_group *o2nm_cluster_group_make_group(struct config_group *group, + const char *name) { struct o2nm_cluster *cluster = NULL; struct o2nm_node_group *ns = NULL; - struct config_group *o2hb_group = NULL; + struct config_group *o2hb_group = NULL, *ret = NULL; void *defs = NULL; - int ret = 0; /* this runs under the parent dir's i_mutex; there can be only * one caller in here at a time */ - if (o2nm_single_cluster) { - ret = -ENOSPC; - goto out; - } + if (o2nm_single_cluster) + return ERR_PTR(-ENOSPC); cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL); ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL); defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); o2hb_group = o2hb_alloc_hb_set(); - if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL) { - ret = -ENOMEM; + if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL) goto out; - } config_group_init_type_name(&cluster->cl_group, name, &o2nm_cluster_type); @@ -797,15 +779,16 @@ static int o2nm_cluster_group_make_group(struct config_group *group, cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; - *new_group = &cluster->cl_group; + ret = &cluster->cl_group; o2nm_single_cluster = cluster; out: - if (ret) { + if (ret == NULL) { kfree(cluster); kfree(ns); o2hb_free_hb_set(o2hb_group); kfree(defs); + ret = ERR_PTR(-ENOMEM); } return ret; diff --git a/fs/open.c b/fs/open.c index a99ad09c319..bb98d2fe809 100644 --- a/fs/open.c +++ b/fs/open.c @@ -64,7 +64,8 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) memcpy(buf, &st, sizeof(st)); else { if (sizeof buf->f_blocks == 4) { - if ((st.f_blocks | st.f_bfree | st.f_bavail) & + if ((st.f_blocks | st.f_bfree | st.f_bavail | + st.f_bsize | st.f_frsize) & 0xffffffff00000000ULL) return -EOVERFLOW; /* diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 6149e4b58c8..efef715135d 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -401,7 +401,7 @@ void register_disk(struct gendisk *disk) disk->dev.parent = disk->driverfs_dev; disk->dev.devt = MKDEV(disk->major, disk->first_minor); - strlcpy(disk->dev.bus_id, disk->disk_name, KOBJ_NAME_LEN); + strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); /* ewww... some of these buggers have / in the name... */ s = strchr(disk->dev.bus_id, '/'); if (s) diff --git a/fs/pipe.c b/fs/pipe.c index 700f4e0d957..10c4e9aa5c4 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -950,7 +950,7 @@ fail_inode: return NULL; } -struct file *create_write_pipe(void) +struct file *create_write_pipe(int flags) { int err; struct inode *inode; @@ -983,7 +983,7 @@ struct file *create_write_pipe(void) goto err_dentry; f->f_mapping = inode->i_mapping; - f->f_flags = O_WRONLY; + f->f_flags = O_WRONLY | (flags & O_NONBLOCK); f->f_version = 0; return f; @@ -1007,7 +1007,7 @@ void free_write_pipe(struct file *f) put_filp(f); } -struct file *create_read_pipe(struct file *wrf) +struct file *create_read_pipe(struct file *wrf, int flags) { struct file *f = get_empty_filp(); if (!f) @@ -1019,7 +1019,7 @@ struct file *create_read_pipe(struct file *wrf) f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; f->f_pos = 0; - f->f_flags = O_RDONLY; + f->f_flags = O_RDONLY | (flags & O_NONBLOCK); f->f_op = &read_pipe_fops; f->f_mode = FMODE_READ; f->f_version = 0; @@ -1027,26 +1027,29 @@ struct file *create_read_pipe(struct file *wrf) return f; } -int do_pipe(int *fd) +int do_pipe_flags(int *fd, int flags) { struct file *fw, *fr; int error; int fdw, fdr; - fw = create_write_pipe(); + if (flags & ~(O_CLOEXEC | O_NONBLOCK)) + return -EINVAL; + + fw = create_write_pipe(flags); if (IS_ERR(fw)) return PTR_ERR(fw); - fr = create_read_pipe(fw); + fr = create_read_pipe(fw, flags); error = PTR_ERR(fr); if (IS_ERR(fr)) goto err_write_pipe; - error = get_unused_fd(); + error = get_unused_fd_flags(flags); if (error < 0) goto err_read_pipe; fdr = error; - error = get_unused_fd(); + error = get_unused_fd_flags(flags); if (error < 0) goto err_fdr; fdw = error; @@ -1074,16 +1077,21 @@ int do_pipe(int *fd) return error; } +int do_pipe(int *fd) +{ + return do_pipe_flags(fd, 0); +} + /* * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way Unix traditionally does this, though. */ -asmlinkage long __weak sys_pipe(int __user *fildes) +asmlinkage long __weak sys_pipe2(int __user *fildes, int flags) { int fd[2]; int error; - error = do_pipe(fd); + error = do_pipe_flags(fd, flags); if (!error) { if (copy_to_user(fildes, fd, sizeof(fd))) { sys_close(fd[0]); @@ -1094,6 +1102,11 @@ asmlinkage long __weak sys_pipe(int __user *fildes) return error; } +asmlinkage long __weak sys_pipe(int __user *fildes) +{ + return sys_pipe2(fildes, 0); +} + /* * pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole whorehouse mounted. So we don't need diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index c652d469dc0..ded96986296 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -232,7 +232,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off, #undef K } -extern const struct seq_operations fragmentation_op; static int fragmentation_open(struct inode *inode, struct file *file) { (void)inode; @@ -246,7 +245,6 @@ static const struct file_operations fragmentation_file_operations = { .release = seq_release, }; -extern const struct seq_operations pagetypeinfo_op; static int pagetypeinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &pagetypeinfo_op); @@ -259,7 +257,6 @@ static const struct file_operations pagetypeinfo_file_ops = { .release = seq_release, }; -extern const struct seq_operations zoneinfo_op; static int zoneinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &zoneinfo_op); @@ -356,7 +353,6 @@ static const struct file_operations proc_devinfo_operations = { .release = seq_release, }; -extern const struct seq_operations vmstat_op; static int vmstat_open(struct inode *inode, struct file *file) { return seq_open(file, &vmstat_op); @@ -468,14 +464,25 @@ static const struct file_operations proc_slabstats_operations = { #ifdef CONFIG_MMU static int vmalloc_open(struct inode *inode, struct file *file) { - return seq_open(file, &vmalloc_op); + unsigned int *ptr = NULL; + int ret; + + if (NUMA_BUILD) + ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); + ret = seq_open(file, &vmalloc_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = ptr; + } else + kfree(ptr); + return ret; } static const struct file_operations proc_vmalloc_operations = { .open = vmalloc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; #endif diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 83f357b30d7..7bc296f424a 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -27,6 +27,11 @@ #include "internal.h" +static struct net *get_proc_net(const struct inode *inode) +{ + return maybe_get_net(PDE_NET(PDE(inode))); +} + int seq_open_net(struct inode *ino, struct file *f, const struct seq_operations *ops, int size) { @@ -51,6 +56,30 @@ int seq_open_net(struct inode *ino, struct file *f, } EXPORT_SYMBOL_GPL(seq_open_net); +int single_open_net(struct inode *inode, struct file *file, + int (*show)(struct seq_file *, void *)) +{ + int err; + struct net *net; + + err = -ENXIO; + net = get_proc_net(inode); + if (net == NULL) + goto err_net; + + err = single_open(file, show, net); + if (err < 0) + goto err_open; + + return 0; + +err_open: + put_net(net); +err_net: + return err; +} +EXPORT_SYMBOL_GPL(single_open_net); + int seq_release_net(struct inode *ino, struct file *f) { struct seq_file *seq; @@ -63,6 +92,14 @@ int seq_release_net(struct inode *ino, struct file *f) } EXPORT_SYMBOL_GPL(seq_release_net); +int single_release_net(struct inode *ino, struct file *f) +{ + struct seq_file *seq = f->private_data; + put_net(seq->private); + return single_release(ino, f); +} +EXPORT_SYMBOL_GPL(single_release_net); + static struct net *get_proc_task_net(struct inode *dir) { struct task_struct *task; @@ -153,12 +190,6 @@ void proc_net_remove(struct net *net, const char *name) } EXPORT_SYMBOL_GPL(proc_net_remove); -struct net *get_proc_net(const struct inode *inode) -{ - return maybe_get_net(PDE_NET(PDE(inode))); -} -EXPORT_SYMBOL_GPL(get_proc_net); - static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 21f490f5d65..d153946d6d1 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -136,54 +136,6 @@ static const struct file_operations proc_tty_drivers_operations = { .release = seq_release, }; -static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) -{ - return (*pos < NR_LDISCS) ? pos : NULL; -} - -static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos) -{ - (*pos)++; - return (*pos < NR_LDISCS) ? pos : NULL; -} - -static void tty_ldiscs_seq_stop(struct seq_file *m, void *v) -{ -} - -static int tty_ldiscs_seq_show(struct seq_file *m, void *v) -{ - int i = *(loff_t *)v; - struct tty_ldisc *ld; - - ld = tty_ldisc_get(i); - if (ld == NULL) - return 0; - seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i); - tty_ldisc_put(i); - return 0; -} - -static const struct seq_operations tty_ldiscs_seq_ops = { - .start = tty_ldiscs_seq_start, - .next = tty_ldiscs_seq_next, - .stop = tty_ldiscs_seq_stop, - .show = tty_ldiscs_seq_show, -}; - -static int proc_tty_ldiscs_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &tty_ldiscs_seq_ops); -} - -static const struct file_operations tty_ldiscs_proc_fops = { - .owner = THIS_MODULE, - .open = proc_tty_ldiscs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - /* * This function is called by tty_register_driver() to handle * registering the driver's /proc handler into /proc/tty/driver/<foo> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 164bd9f9ede..7546a918f79 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -636,7 +636,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, struct pagemapread pm; int pagecount; int ret = -ESRCH; - struct mm_walk pagemap_walk; + struct mm_walk pagemap_walk = {}; unsigned long src; unsigned long svpfn; unsigned long start_vaddr; diff --git a/fs/signalfd.c b/fs/signalfd.c index 619725644c7..9c39bc7f843 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -205,11 +205,19 @@ static const struct file_operations signalfd_fops = { .read = signalfd_read, }; -asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) +asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask, + size_t sizemask, int flags) { sigset_t sigmask; struct signalfd_ctx *ctx; + /* Check the SFD_* constants for consistency. */ + BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC); + BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK); + + if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK)) + return -EINVAL; + if (sizemask != sizeof(sigset_t) || copy_from_user(&sigmask, user_mask, sizeof(sigmask))) return -EINVAL; @@ -227,7 +235,8 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas * When we call this, the initialization must be complete, since * anon_inode_getfd() will install the fd. */ - ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx); + ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx, + flags & (O_CLOEXEC | O_NONBLOCK)); if (ufd < 0) kfree(ctx); } else { @@ -249,3 +258,9 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas return ufd; } + +asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, + size_t sizemask) +{ + return sys_signalfd4(ufd, user_mask, sizemask, 0); +} diff --git a/fs/super.c b/fs/super.c index 453877c5697..e931ae9511f 100644 --- a/fs/super.c +++ b/fs/super.c @@ -70,6 +70,7 @@ static struct super_block *alloc_super(struct file_system_type *type) INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); + INIT_LIST_HEAD(&s->s_dentry_lru); init_rwsem(&s->s_umount); mutex_init(&s->s_lock); lockdep_set_class(&s->s_umount, &type->s_umount_key); diff --git a/fs/sync.c b/fs/sync.c index 228e17b5e9e..2967562d416 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -139,7 +139,8 @@ asmlinkage long sys_fdatasync(unsigned int fd) * before performing the write. * * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the - * range which are not presently under writeback. + * range which are not presently under writeback. Note that this may block for + * significant periods due to exhaustion of disk request structures. * * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range * after performing the write. diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 8c0e4b92574..c1a7efb310b 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -398,7 +398,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, } /** - * sysfs_add_one - add sysfs_dirent to parent + * __sysfs_add_one - add sysfs_dirent to parent without warning * @acxt: addrm context to use * @sd: sysfs_dirent to be added * @@ -417,7 +417,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, * 0 on success, -EEXIST if entry with the given name already * exists. */ -int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) { if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) return -EEXIST; @@ -435,6 +435,39 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) } /** + * sysfs_add_one - add sysfs_dirent to parent + * @acxt: addrm context to use + * @sd: sysfs_dirent to be added + * + * Get @acxt->parent_sd and set sd->s_parent to it and increment + * nlink of parent inode if @sd is a directory and link into the + * children list of the parent. + * + * This function should be called between calls to + * sysfs_addrm_start() and sysfs_addrm_finish() and should be + * passed the same @acxt as passed to sysfs_addrm_start(). + * + * LOCKING: + * Determined by sysfs_addrm_start(). + * + * RETURNS: + * 0 on success, -EEXIST if entry with the given name already + * exists. + */ +int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +{ + int ret; + + ret = __sysfs_add_one(acxt, sd); + if (ret == -EEXIST) { + printk(KERN_WARNING "sysfs: duplicate filename '%s' " + "can not be created\n", sd->s_name); + WARN_ON(1); + } + return ret; +} + +/** * sysfs_remove_one - remove sysfs_dirent from parent * @acxt: addrm context to use * @sd: sysfs_dirent to be removed diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index e7735f643cd..3f07893ff89 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -14,6 +14,7 @@ #include <linux/kobject.h> #include <linux/kallsyms.h> #include <linux/slab.h> +#include <linux/fsnotify.h> #include <linux/namei.h> #include <linux/poll.h> #include <linux/list.h> @@ -585,9 +586,11 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - rc = notify_change(victim, &newattrs); + newattrs.ia_ctime = current_fs_time(inode->i_sb); + rc = sysfs_setattr(victim, &newattrs); if (rc == 0) { + fsnotify_change(victim, newattrs.ia_valid); mutex_lock(&sysfs_mutex); victim_sd->s_mode = newattrs.ia_mode; mutex_unlock(&sysfs_mutex); diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 817f5966edc..a3ba217fbe7 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -19,13 +19,8 @@ #include "sysfs.h" -/** - * sysfs_create_link - create symlink between two objects. - * @kobj: object whose directory we're creating the link in. - * @target: object we're pointing to. - * @name: name of the symlink. - */ -int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name) +static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, + const char *name, int warn) { struct sysfs_dirent *parent_sd = NULL; struct sysfs_dirent *target_sd = NULL; @@ -65,7 +60,10 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char target_sd = NULL; /* reference is now owned by the symlink */ sysfs_addrm_start(&acxt, parent_sd); - error = sysfs_add_one(&acxt, sd); + if (warn) + error = sysfs_add_one(&acxt, sd); + else + error = __sysfs_add_one(&acxt, sd); sysfs_addrm_finish(&acxt); if (error) @@ -80,6 +78,33 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char } /** + * sysfs_create_link - create symlink between two objects. + * @kobj: object whose directory we're creating the link in. + * @target: object we're pointing to. + * @name: name of the symlink. + */ +int sysfs_create_link(struct kobject *kobj, struct kobject *target, + const char *name) +{ + return sysfs_do_create_link(kobj, target, name, 1); +} + +/** + * sysfs_create_link_nowarn - create symlink between two objects. + * @kobj: object whose directory we're creating the link in. + * @target: object we're pointing to. + * @name: name of the symlink. + * + * This function does the same as sysf_create_link(), but it + * doesn't warn if the link already exists. + */ +int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target, + const char *name) +{ + return sysfs_do_create_link(kobj, target, name, 0); +} + +/** * sysfs_remove_link - remove symlink in object's directory. * @kobj: object we're acting for. * @name: name of the symlink to remove. diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index ce4e15f8aae..a5db496f71c 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -107,6 +107,7 @@ struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd); void sysfs_put_active_two(struct sysfs_dirent *sd); void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *parent_sd); +int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); diff --git a/fs/timerfd.c b/fs/timerfd.c index d87d354ec42..c502c60e4f5 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -184,7 +184,11 @@ asmlinkage long sys_timerfd_create(int clockid, int flags) int ufd; struct timerfd_ctx *ctx; - if (flags) + /* Check the TFD_* constants for consistency. */ + BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); + BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); + + if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) return -EINVAL; if (clockid != CLOCK_MONOTONIC && clockid != CLOCK_REALTIME) @@ -198,7 +202,8 @@ asmlinkage long sys_timerfd_create(int clockid, int flags) ctx->clockid = clockid; hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); - ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx); + ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, + flags & (O_CLOEXEC | O_NONBLOCK)); if (ufd < 0) kfree(ctx); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 85b22b5977f..506f724055c 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1232,7 +1232,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs) { struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; - struct match_token *tp = tokens; + const struct match_token *tp = tokens; while (tp->token != Opt_onerror_panic && tp->token != mval) ++tp; |