From 2628b766363aa3791e816a7e5807373ef551c776 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 31 Jul 2008 17:16:51 +0100 Subject: [PATCH] Factor out nfsd_do_readdir() into its own function Signed-off-by: David Woodhouse Signed-off-by: Al Viro --- fs/nfsd/vfs.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) (limited to 'fs/nfsd/vfs.c') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index aa1d0d6489a..14eda20cc29 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1813,6 +1813,29 @@ out: return err; } +static int nfsd_do_readdir(struct file *file, filldir_t func, + struct readdir_cd *cdp, loff_t *offsetp) +{ + int host_err; + + /* + * Read the directory entries. This silly loop is necessary because + * readdir() is not guaranteed to fill up the entire buffer, but + * may choose to do less. + */ + do { + cdp->err = nfserr_eof; /* will be cleared on successful read */ + host_err = vfs_readdir(file, func, cdp); + } while (host_err >=0 && cdp->err == nfs_ok); + + *offsetp = vfs_llseek(file, 0, 1); + + if (host_err) + return nfserrno(host_err); + else + return cdp->err; +} + /* * Read entries from a directory. * The NFSv3/4 verifier we ignore for now. @@ -1822,7 +1845,6 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, struct readdir_cd *cdp, filldir_t func) { __be32 err; - int host_err; struct file *file; loff_t offset = *offsetp; @@ -1836,21 +1858,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, goto out_close; } - /* - * Read the directory entries. This silly loop is necessary because - * readdir() is not guaranteed to fill up the entire buffer, but - * may choose to do less. - */ - - do { - cdp->err = nfserr_eof; /* will be cleared on successful read */ - host_err = vfs_readdir(file, func, cdp); - } while (host_err >=0 && cdp->err == nfs_ok); - if (host_err) - err = nfserrno(host_err); - else - err = cdp->err; - *offsetp = vfs_llseek(file, 0, 1); + err = nfsd_do_readdir(file, func, cdp, offsetp); if (err == nfserr_eof || err == nfserr_toosmall) err = nfs_ok; /* can still be found in ->err */ -- cgit v1.2.3-70-g09d2 From 14f7dd632011bb89c035722edd6ea0d90ca6b078 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 31 Jul 2008 20:29:12 +0100 Subject: [PATCH] Copy XFS readdir hack into nfsd code. Some file systems with their own internal locking have problems with the way that nfsd calls the ->lookup() method from within a filldir function called from their ->readdir() method. The recursion back into the file system code can cause deadlock. XFS has a fairly hackish solution to this which involves doing the readdir() into a locally-allocated buffer, then going back through it calling the filldir function afterwards. It's not ideal, but it works. It's particularly suboptimal because XFS does this for local file systems too, where it's completely unnecessary. Copy this hack into the NFS code where it can be used only for NFS export. In response to feedback, use it unconditionally rather than only for the affected file systems. Signed-off-by: David Woodhouse Signed-off-by: Al Viro --- fs/nfsd/vfs.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 93 insertions(+), 15 deletions(-) (limited to 'fs/nfsd/vfs.c') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 14eda20cc29..93b22f661d9 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1813,27 +1813,105 @@ out: return err; } -static int nfsd_do_readdir(struct file *file, filldir_t func, - struct readdir_cd *cdp, loff_t *offsetp) +/* + * We do this buffering because we must not call back into the file + * system's ->lookup() method from the filldir callback. That may well + * deadlock a number of file systems. + * + * This is based heavily on the implementation of same in XFS. + */ +struct buffered_dirent { + u64 ino; + loff_t offset; + int namlen; + unsigned int d_type; + char name[]; +}; + +struct readdir_data { + char *dirent; + size_t used; +}; + +static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_data *buf = __buf; + struct buffered_dirent *de = (void *)(buf->dirent + buf->used); + unsigned int reclen; + + reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64)); + if (buf->used + reclen > PAGE_SIZE) + return -EINVAL; + + de->namlen = namlen; + de->offset = offset; + de->ino = ino; + de->d_type = d_type; + memcpy(de->name, name, namlen); + buf->used += reclen; + + return 0; +} + +static int nfsd_buffered_readdir(struct file *file, filldir_t func, + struct readdir_cd *cdp, loff_t *offsetp) { + struct readdir_data buf; + struct buffered_dirent *de; int host_err; + int size; + loff_t offset; - /* - * Read the directory entries. This silly loop is necessary because - * readdir() is not guaranteed to fill up the entire buffer, but - * may choose to do less. - */ - do { - cdp->err = nfserr_eof; /* will be cleared on successful read */ - host_err = vfs_readdir(file, func, cdp); - } while (host_err >=0 && cdp->err == nfs_ok); + buf.dirent = (void *)__get_free_page(GFP_KERNEL); + if (!buf.dirent) + return -ENOMEM; + + offset = *offsetp; + cdp->err = nfserr_eof; /* will be cleared on successful read */ - *offsetp = vfs_llseek(file, 0, 1); + while (1) { + unsigned int reclen; + + buf.used = 0; + + host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf); + if (host_err) + break; + + size = buf.used; + + if (!size) + break; + + + de = (struct buffered_dirent *)buf.dirent; + while (size > 0) { + offset = de->offset; + + if (func(cdp, de->name, de->namlen, de->offset, + de->ino, de->d_type)) + goto done; + + if (cdp->err != nfs_ok) + goto done; + + reclen = ALIGN(sizeof(*de) + de->namlen, + sizeof(u64)); + size -= reclen; + de = (struct buffered_dirent *)((char *)de + reclen); + } + offset = vfs_llseek(file, 0, 1); + } + + done: + free_page((unsigned long)(buf.dirent)); if (host_err) return nfserrno(host_err); - else - return cdp->err; + + *offsetp = offset; + return cdp->err; } /* @@ -1858,7 +1936,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, goto out_close; } - err = nfsd_do_readdir(file, func, cdp, offsetp); + err = nfsd_buffered_readdir(file, func, cdp, offsetp); if (err == nfserr_eof || err == nfserr_toosmall) err = nfs_ok; /* can still be found in ->err */ -- cgit v1.2.3-70-g09d2 From 53c9c5c0e32c69f9df1822e47671c13e3402c82f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Aug 2008 07:29:52 -0400 Subject: [PATCH] prepare vfs_readdir() callers to returning filldir result It's not the final state, but it allows moving ->readdir() instances to passing filldir return value to caller of vfs_readdir(). Signed-off-by: Al Viro --- arch/alpha/kernel/osf_sys.c | 7 ++----- arch/parisc/hpux/fs.c | 5 ++--- fs/compat.c | 22 ++++++++-------------- fs/exportfs/expfs.c | 7 ++++--- fs/nfsd/vfs.c | 11 +++++++++-- fs/readdir.c | 22 ++++++++-------------- 6 files changed, 33 insertions(+), 41 deletions(-) (limited to 'fs/nfsd/vfs.c') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 8509dad3120..f25f6c49095 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -165,14 +165,11 @@ osf_getdirentries(unsigned int fd, struct osf_dirent __user *dirent, buf.error = 0; error = vfs_readdir(file, osf_filldir, &buf); - if (error < 0) - goto out_putf; - - error = buf.error; + if (error >= 0) + error = buf.error; if (count != buf.count) error = count - buf.count; - out_putf: fput(file); out: return error; diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index 12c04c5e558..bd9a4db3bd4 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -127,9 +127,8 @@ int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned i buf.error = 0; error = vfs_readdir(file, filldir, &buf); - if (error < 0) - goto out_putf; - error = buf.error; + if (error >= 0) + error = buf.error; lastdirent = buf.previous; if (lastdirent) { if (put_user(file->f_pos, &lastdirent->d_off)) diff --git a/fs/compat.c b/fs/compat.c index 5f9ec449c79..cb36245f9fe 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -869,7 +869,7 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, buf.dirent = dirent; error = vfs_readdir(file, compat_fillonedir, &buf); - if (error >= 0) + if (buf.result) error = buf.result; fput(file); @@ -956,9 +956,8 @@ asmlinkage long compat_sys_getdents(unsigned int fd, buf.error = 0; error = vfs_readdir(file, compat_filldir, &buf); - if (error < 0) - goto out_putf; - error = buf.error; + if (error >= 0) + error = buf.error; lastdirent = buf.previous; if (lastdirent) { if (put_user(file->f_pos, &lastdirent->d_off)) @@ -966,8 +965,6 @@ asmlinkage long compat_sys_getdents(unsigned int fd, else error = count - buf.count; } - -out_putf: fput(file); out: return error; @@ -1047,19 +1044,16 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, buf.error = 0; error = vfs_readdir(file, compat_filldir64, &buf); - if (error < 0) - goto out_putf; - error = buf.error; + if (error >= 0) + error = buf.error; lastdirent = buf.previous; if (lastdirent) { typeof(lastdirent->d_off) d_off = file->f_pos; - error = -EFAULT; if (__put_user_unaligned(d_off, &lastdirent->d_off)) - goto out_putf; - error = count - buf.count; + error = -EFAULT; + else + error = count - buf.count; } - -out_putf: fput(file); out: return error; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 51bdc5cab06..80246bad1b7 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -280,13 +280,14 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry, int old_seq = buffer.sequence; error = vfs_readdir(file, filldir_one, &buffer); + if (buffer.found) { + error = 0; + break; + } if (error < 0) break; - error = 0; - if (buffer.found) - break; error = -ENOENT; if (old_seq == buffer.sequence) break; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 93b22f661d9..e3e37f7c847 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1831,6 +1831,7 @@ struct buffered_dirent { struct readdir_data { char *dirent; size_t used; + int full; }; static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, @@ -1841,8 +1842,10 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, unsigned int reclen; reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64)); - if (buf->used + reclen > PAGE_SIZE) + if (buf->used + reclen > PAGE_SIZE) { + buf->full = 1; return -EINVAL; + } de->namlen = namlen; de->offset = offset; @@ -1874,9 +1877,13 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, unsigned int reclen; buf.used = 0; + buf.full = 0; host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf); - if (host_err) + if (buf.full) + host_err = 0; + + if (host_err < 0) break; size = buf.used; diff --git a/fs/readdir.c b/fs/readdir.c index 93a7559bbfd..b318d9b5af2 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -117,7 +117,7 @@ asmlinkage long old_readdir(unsigned int fd, struct old_linux_dirent __user * di buf.dirent = dirent; error = vfs_readdir(file, fillonedir, &buf); - if (error >= 0) + if (buf.result) error = buf.result; fput(file); @@ -209,9 +209,8 @@ asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user * diren buf.error = 0; error = vfs_readdir(file, filldir, &buf); - if (error < 0) - goto out_putf; - error = buf.error; + if (error >= 0) + error = buf.error; lastdirent = buf.previous; if (lastdirent) { if (put_user(file->f_pos, &lastdirent->d_off)) @@ -219,8 +218,6 @@ asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user * diren else error = count - buf.count; } - -out_putf: fput(file); out: return error; @@ -293,19 +290,16 @@ asmlinkage long sys_getdents64(unsigned int fd, struct linux_dirent64 __user * d buf.error = 0; error = vfs_readdir(file, filldir64, &buf); - if (error < 0) - goto out_putf; - error = buf.error; + if (error >= 0) + error = buf.error; lastdirent = buf.previous; if (lastdirent) { typeof(lastdirent->d_off) d_off = file->f_pos; - error = -EFAULT; if (__put_user(d_off, &lastdirent->d_off)) - goto out_putf; - error = count - buf.count; + error = -EFAULT; + else + error = count - buf.count; } - -out_putf: fput(file); out: return error; -- cgit v1.2.3-70-g09d2 From c002a6c7977320f95b5edede5ce4e0eeecf291ff Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 17 Aug 2008 17:21:18 +0100 Subject: [PATCH] Optimise NFS readdir hack slightly. Avoid calling the underlying ->readdir() again when we reached the end already; keep going round the loop only if we stopped due to our own buffer being full. [AV: tidy the things up a bit, while we are there] Signed-off-by: David Woodhouse Signed-off-by: Al Viro --- fs/nfsd/vfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nfsd/vfs.c') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e3e37f7c847..49d4b8725ca 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1891,7 +1891,6 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, if (!size) break; - de = (struct buffered_dirent *)buf.dirent; while (size > 0) { offset = de->offset; @@ -1908,7 +1907,9 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, size -= reclen; de = (struct buffered_dirent *)((char *)de + reclen); } - offset = vfs_llseek(file, 0, 1); + offset = vfs_llseek(file, 0, SEEK_CUR); + if (!buf.full) + break; } done: -- cgit v1.2.3-70-g09d2