From 48e70bc18ac81881dedd3aa327c55b924fc41ecf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Apr 2009 08:19:27 +0200 Subject: Document and move the various READ/WRITE types It's a somewhat twisty maze of hints and behavioural modifiers, try and clear it up a bit with some documentation. Signed-off-by: Jens Axboe --- include/linux/fs.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 562d2855cf3..b535aec4406 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -87,6 +87,60 @@ struct inodes_stat_t { */ #define FMODE_NOCMTIME ((__force fmode_t)2048) +/* + * The below are the various read and write types that we support. Some of + * them include behavioral modifiers that send information down to the + * block layer and IO scheduler. Terminology: + * + * The block layer uses device plugging to defer IO a little bit, in + * the hope that we will see more IO very shortly. This increases + * coalescing of adjacent IO and thus reduces the number of IOs we + * have to send to the device. It also allows for better queuing, + * if the IO isn't mergeable. If the caller is going to be waiting + * for the IO, then he must ensure that the device is unplugged so + * that the IO is dispatched to the driver. + * + * All IO is handled async in Linux. This is fine for background + * writes, but for reads or writes that someone waits for completion + * on, we want to notify the block layer and IO scheduler so that they + * know about it. That allows them to make better scheduling + * decisions. So when the below references 'sync' and 'async', it + * is referencing this priority hint. + * + * With that in mind, the available types are: + * + * READ A normal read operation. Device will be plugged. + * READ_SYNC A synchronous read. Device is not plugged, caller can + * immediately wait on this read without caring about + * unplugging. + * READA Used for read-ahead operations. Lower priority, and the + * block layer could (in theory) choose to ignore this + * request if it runs into resource problems. + * WRITE A normal async write. Device will be plugged. + * SWRITE Like WRITE, but a special case for ll_rw_block() that + * tells it to lock the buffer first. Normally a buffer + * must be locked before doing IO. + * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down + * the hint that someone will be waiting on this IO + * shortly. The device must still be unplugged explicitly, + * WRITE_SYNC_PLUG does not do this as we could be + * submitting more writes before we actually wait on any + * of them. + * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device + * immediately after submission. The write equivalent + * of READ_SYNC. + * WRITE_ODIRECT Special case write for O_DIRECT only. + * SWRITE_SYNC + * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. + * See SWRITE. + * WRITE_BARRIER Like WRITE, but tells the block layer that all + * previously submitted writes must be safely on storage + * before this one is started. Also guarantees that when + * this write is complete, it itself is also safely on + * storage. Prevents reordering of writes on both sides + * of this IO. + * + */ #define RW_MASK 1 #define RWA_MASK 2 #define READ 0 @@ -102,6 +156,11 @@ struct inodes_stat_t { (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) + +/* + * These aren't really reads or writes, they pass down information about + * parts of device that are now unused by the file system. + */ #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) -- cgit v1.2.3-70-g09d2 From f8cc774ce4844811a55e2352f1443055e3994e28 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Apr 2009 19:48:40 +0200 Subject: splice: remove generic_file_splice_write_nolock() Remove the now unused generic_file_splice_write_nolock() function. It's conceptually broken anyway, because splice may need to wait for pipe events so holding locks across the whole operation is wrong. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- fs/splice.c | 59 ------------------------------------------------------ include/linux/fs.h | 2 -- 2 files changed, 61 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/splice.c b/fs/splice.c index 584b2b7a1db..128ee36a719 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -810,65 +810,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, return ret; } -/** - * generic_file_splice_write_nolock - generic_file_splice_write without mutexes - * @pipe: pipe info - * @out: file to write to - * @ppos: position in @out - * @len: number of bytes to splice - * @flags: splice modifier flags - * - * Description: - * Will either move or copy pages (determined by @flags options) from - * the given pipe inode to the given file. The caller is responsible - * for acquiring i_mutex on both inodes. - * - */ -ssize_t -generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags) -{ - struct address_space *mapping = out->f_mapping; - struct inode *inode = mapping->host; - struct splice_desc sd = { - .total_len = len, - .flags = flags, - .pos = *ppos, - .u.file = out, - }; - ssize_t ret; - int err; - - err = file_remove_suid(out); - if (unlikely(err)) - return err; - - ret = __splice_from_pipe(pipe, &sd, pipe_to_file); - if (ret > 0) { - unsigned long nr_pages; - - *ppos += ret; - nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - /* - * If file or inode is SYNC and we actually wrote some data, - * sync it. - */ - if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { - err = generic_osync_inode(inode, mapping, - OSYNC_METADATA|OSYNC_DATA); - - if (err) - ret = err; - } - balance_dirty_pages_ratelimited_nr(mapping, nr_pages); - } - - return ret; -} - -EXPORT_SYMBOL(generic_file_splice_write_nolock); - /** * generic_file_splice_write - splice data from a pipe to a file * @pipe: pipe info diff --git a/include/linux/fs.h b/include/linux/fs.h index b535aec4406..907d8f56c6f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2209,8 +2209,6 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); -extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *, - struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *, size_t len, unsigned int flags); extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, -- cgit v1.2.3-70-g09d2 From 61e0d47c33cc371f725bcda4a47ae0efe652dba8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Apr 2009 19:48:41 +0200 Subject: splice: add helpers for locking pipe inode There are lots of sequences like this, especially in splice code: if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); /* do something */ if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); so introduce helpers which do the conditional locking and unlocking. Also replace the inode_double_lock() call with a pipe_double_lock() helper to avoid spreading the use of this functionality beyond the pipe code. This patch is just a cleanup, and should cause no behavioral changes. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- fs/inode.c | 36 ---------------------------------- fs/pipe.c | 42 +++++++++++++++++++++++++++++++++++---- fs/splice.c | 50 ++++++++++++++++++++--------------------------- include/linux/fs.h | 3 --- include/linux/pipe_fs_i.h | 5 +++++ 5 files changed, 64 insertions(+), 72 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/inode.c b/fs/inode.c index d06d6d268de..6ad14a1cd8c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1470,42 +1470,6 @@ static void __wait_on_freeing_inode(struct inode *inode) spin_lock(&inode_lock); } -/* - * We rarely want to lock two inodes that do not have a parent/child - * relationship (such as directory, child inode) simultaneously. The - * vast majority of file systems should be able to get along fine - * without this. Do not use these functions except as a last resort. - */ -void inode_double_lock(struct inode *inode1, struct inode *inode2) -{ - if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { - if (inode1) - mutex_lock(&inode1->i_mutex); - else if (inode2) - mutex_lock(&inode2->i_mutex); - return; - } - - if (inode1 < inode2) { - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); - } -} -EXPORT_SYMBOL(inode_double_lock); - -void inode_double_unlock(struct inode *inode1, struct inode *inode2) -{ - if (inode1) - mutex_unlock(&inode1->i_mutex); - - if (inode2 && inode2 != inode1) - mutex_unlock(&inode2->i_mutex); -} -EXPORT_SYMBOL(inode_double_unlock); - static __initdata unsigned long ihash_entries; static int __init set_ihash_entries(char *str) { diff --git a/fs/pipe.c b/fs/pipe.c index 4af7aa52181..13414ec45b8 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -37,6 +37,42 @@ * -- Manfred Spraul 2002-05-09 */ +static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) +{ + if (pipe->inode) + mutex_lock_nested(&pipe->inode->i_mutex, subclass); +} + +void pipe_lock(struct pipe_inode_info *pipe) +{ + /* + * pipe_lock() nests non-pipe inode locks (for writing to a file) + */ + pipe_lock_nested(pipe, I_MUTEX_PARENT); +} +EXPORT_SYMBOL(pipe_lock); + +void pipe_unlock(struct pipe_inode_info *pipe) +{ + if (pipe->inode) + mutex_unlock(&pipe->inode->i_mutex); +} +EXPORT_SYMBOL(pipe_unlock); + +void pipe_double_lock(struct pipe_inode_info *pipe1, + struct pipe_inode_info *pipe2) +{ + BUG_ON(pipe1 == pipe2); + + if (pipe1 < pipe2) { + pipe_lock_nested(pipe1, I_MUTEX_PARENT); + pipe_lock_nested(pipe2, I_MUTEX_CHILD); + } else { + pipe_lock_nested(pipe2, I_MUTEX_CHILD); + pipe_lock_nested(pipe1, I_MUTEX_PARENT); + } +} + /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe) { @@ -47,12 +83,10 @@ void pipe_wait(struct pipe_inode_info *pipe) * is considered a noninteractive wait: */ prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); schedule(); finish_wait(&pipe->wait, &wait); - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); } static int diff --git a/fs/splice.c b/fs/splice.c index 128ee36a719..5384a90665d 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -182,8 +182,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, do_wakeup = 0; page_nr = 0; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); for (;;) { if (!pipe->readers) { @@ -245,15 +244,13 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, pipe->waiting_writers--; } - if (pipe->inode) { - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - } + if (do_wakeup) { + smp_mb(); + if (waitqueue_active(&pipe->wait)) + wake_up_interruptible(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } while (page_nr < spd_pages) @@ -801,11 +798,9 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, .u.file = out, }; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); ret = __splice_from_pipe(pipe, &sd, actor); - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); return ret; } @@ -837,8 +832,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, }; ssize_t ret; - if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); + pipe_lock(pipe); splice_from_pipe_begin(&sd); do { @@ -854,8 +848,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, } while (ret > 0); splice_from_pipe_end(pipe, &sd); - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); if (sd.num_spliced) ret = sd.num_spliced; @@ -1348,8 +1341,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, if (!pipe) return -EBADF; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); error = ret = 0; while (nr_segs) { @@ -1404,8 +1396,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, iov++; } - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); if (!ret) ret = error; @@ -1533,7 +1524,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) return 0; ret = 0; - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); while (!pipe->nrbufs) { if (signal_pending(current)) { @@ -1551,7 +1542,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) pipe_wait(pipe); } - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); return ret; } @@ -1571,7 +1562,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) return 0; ret = 0; - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); while (pipe->nrbufs >= PIPE_BUFFERS) { if (!pipe->readers) { @@ -1592,7 +1583,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) pipe->waiting_writers--; } - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); return ret; } @@ -1608,10 +1599,10 @@ static int link_pipe(struct pipe_inode_info *ipipe, /* * Potential ABBA deadlock, work around it by ordering lock - * grabbing by inode address. Otherwise two different processes + * grabbing by pipe info address. Otherwise two different processes * could deadlock (one doing tee from A -> B, the other from B -> A). */ - inode_double_lock(ipipe->inode, opipe->inode); + pipe_double_lock(ipipe, opipe); do { if (!opipe->readers) { @@ -1662,7 +1653,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) ret = -EAGAIN; - inode_double_unlock(ipipe->inode, opipe->inode); + pipe_unlock(ipipe); + pipe_unlock(opipe); /* * If we put data in the output pipe, wakeup any potential readers. diff --git a/include/linux/fs.h b/include/linux/fs.h index 907d8f56c6f..e766be0d432 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -797,9 +797,6 @@ enum inode_i_mutex_lock_class I_MUTEX_QUOTA }; -extern void inode_double_lock(struct inode *inode1, struct inode *inode2); -extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); - /* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 8e4120285f7..c8f038554e8 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -134,6 +134,11 @@ struct pipe_buf_operations { memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE +/* Pipe lock and unlock operations */ +void pipe_lock(struct pipe_inode_info *); +void pipe_unlock(struct pipe_inode_info *); +void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); + /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe); -- cgit v1.2.3-70-g09d2 From 0112fc2229847feb6c4eb011e6833d8f1742a375 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Wed, 8 Apr 2009 20:05:42 +0400 Subject: Separate out common fstatat code into vfs_fstatat This is a version incorporating Christoph's suggestion. Separate out common *fstatat functionality into a single function instead of duplicating it all over the code. Signed-off-by: Oleg Drokin Signed-off-by: Al Viro --- arch/arm/kernel/sys_oabi-compat.c | 19 ++++--------- arch/s390/kernel/compat_linux.c | 18 ++++--------- arch/sparc/kernel/sys_sparc32.c | 19 ++++--------- arch/x86/ia32/sys_ia32.c | 19 ++++--------- fs/compat.c | 19 ++++--------- fs/stat.c | 56 +++++++++++++++++++-------------------- include/linux/fs.h | 1 + 7 files changed, 54 insertions(+), 97 deletions(-) (limited to 'include/linux/fs.h') diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index e04173c7e62..d59a0cd537f 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -177,21 +177,12 @@ asmlinkage long sys_oabi_fstatat64(int dfd, int flag) { struct kstat stat; - int error = -EINVAL; + int error; - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_oldabi_stat64(&stat, statbuf); - -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_oldabi_stat64(&stat, statbuf); } struct oabi_flock64 { diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 6cc87d8c868..002c70d3cb7 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -702,20 +702,12 @@ asmlinkage long sys32_fstatat64(unsigned int dfd, char __user *filename, struct stat64_emu31 __user* statbuf, int flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); + int error; - if (!error) - error = cp_stat64(statbuf, &stat); -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_stat64(statbuf, &stat); } /* diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index e800503879e..f5000a460c0 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -206,21 +206,12 @@ asmlinkage long compat_sys_fstatat64(unsigned int dfd, char __user *filename, struct compat_stat64 __user * statbuf, int flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_compat_stat64(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_compat_stat64(&stat, statbuf); } asmlinkage long compat_sys_sysfs(int option, u32 arg1, u32 arg2) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index efac92fd1ef..085a8c35f14 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -129,21 +129,12 @@ asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename, struct stat64 __user *statbuf, int flag) { struct kstat stat; - int error = -EINVAL; + int error; - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_stat64(statbuf, &stat); - -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_stat64(statbuf, &stat); } /* diff --git a/fs/compat.c b/fs/compat.c index 3f84d5f1588..dda72e26709 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -204,21 +204,12 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename, struct compat_stat __user *statbuf, int flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_compat_stat(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); } #endif diff --git a/fs/stat.c b/fs/stat.c index 2db740a0cfb..54711662b85 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -109,6 +109,24 @@ int vfs_fstat(unsigned int fd, struct kstat *stat) EXPORT_SYMBOL(vfs_fstat); +int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag) +{ + int error = -EINVAL; + + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; + + if (flag & AT_SYMLINK_NOFOLLOW) + error = vfs_lstat_fd(dfd, filename, stat); + else + error = vfs_stat_fd(dfd, filename, stat); +out: + return error; +} + +EXPORT_SYMBOL(vfs_fstatat); + + #ifdef __ARCH_WANT_OLD_STAT /* @@ -264,21 +282,12 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename, struct stat __user *, statbuf, int, flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_new_stat(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_new_stat(&stat, statbuf); } #endif @@ -404,21 +413,12 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename, struct stat64 __user *, statbuf, int, flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_new_stat64(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_new_stat64(&stat, statbuf); } #endif /* __ARCH_WANT_STAT64 */ diff --git a/include/linux/fs.h b/include/linux/fs.h index e766be0d432..257f4d37ad2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2302,6 +2302,7 @@ extern int vfs_lstat(char __user *, struct kstat *); extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); +extern int vfs_fstatat(int , char __user *, struct kstat *, int); extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg); -- cgit v1.2.3-70-g09d2 From 2eae7a1874ca5be3232765d89e0250a449f1bc90 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Apr 2009 16:34:03 -0400 Subject: kill vfs_stat_fd / vfs_lstat_fd There's really no reason to keep vfs_stat_fd and vfs_lstat_fd with Oleg's vfs_fstatat. Use vfs_fstatat for the few cases having the directory fd, and switch all others to vfs_stat / vfs_lstat. Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/compat.c | 18 +++++---- fs/stat.c | 105 +++++++++++++++++++++-------------------------------- include/linux/fs.h | 2 - 3 files changed, 52 insertions(+), 73 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/compat.c b/fs/compat.c index dda72e26709..379a399bf5c 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -181,22 +181,24 @@ asmlinkage long compat_sys_newstat(char __user * filename, struct compat_stat __user *statbuf) { struct kstat stat; - int error = vfs_stat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_compat_stat(&stat, statbuf); - return error; + error = vfs_stat(filename, &stat); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); } asmlinkage long compat_sys_newlstat(char __user * filename, struct compat_stat __user *statbuf) { struct kstat stat; - int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_compat_stat(&stat, statbuf); - return error; + error = vfs_lstat(filename, &stat); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); } #ifndef __ARCH_WANT_STAT64 diff --git a/fs/stat.c b/fs/stat.c index 54711662b85..075694e31d8 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -55,46 +55,6 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) EXPORT_SYMBOL(vfs_getattr); -int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) -{ - struct path path; - int error; - - error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path); - if (!error) { - error = vfs_getattr(path.mnt, path.dentry, stat); - path_put(&path); - } - return error; -} - -int vfs_stat(char __user *name, struct kstat *stat) -{ - return vfs_stat_fd(AT_FDCWD, name, stat); -} - -EXPORT_SYMBOL(vfs_stat); - -int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) -{ - struct path path; - int error; - - error = user_path_at(dfd, name, 0, &path); - if (!error) { - error = vfs_getattr(path.mnt, path.dentry, stat); - path_put(&path); - } - return error; -} - -int vfs_lstat(char __user *name, struct kstat *stat) -{ - return vfs_lstat_fd(AT_FDCWD, name, stat); -} - -EXPORT_SYMBOL(vfs_lstat); - int vfs_fstat(unsigned int fd, struct kstat *stat) { struct file *f = fget(fd); @@ -106,26 +66,43 @@ int vfs_fstat(unsigned int fd, struct kstat *stat) } return error; } - EXPORT_SYMBOL(vfs_fstat); int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag) { + struct path path; int error = -EINVAL; + int lookup_flags = 0; if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) goto out; - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, stat); - else - error = vfs_stat_fd(dfd, filename, stat); + if (!(flag & AT_SYMLINK_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + + error = user_path_at(dfd, filename, lookup_flags, &path); + if (error) + goto out; + + error = vfs_getattr(path.mnt, path.dentry, stat); + path_put(&path); out: return error; } - EXPORT_SYMBOL(vfs_fstatat); +int vfs_stat(char __user *name, struct kstat *stat) +{ + return vfs_fstatat(AT_FDCWD, name, stat, 0); +} +EXPORT_SYMBOL(vfs_stat); + +int vfs_lstat(char __user *name, struct kstat *stat) +{ + return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); +} +EXPORT_SYMBOL(vfs_lstat); + #ifdef __ARCH_WANT_OLD_STAT @@ -173,23 +150,25 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; - int error = vfs_stat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_old_stat(&stat, statbuf); + error = vfs_stat(filename, &stat); + if (error) + return error; - return error; + return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; - int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_old_stat(&stat, statbuf); + error = vfs_lstat(filename, &stat); + if (error) + return error; - return error; + return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) @@ -258,23 +237,23 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; - int error = vfs_stat_fd(AT_FDCWD, filename, &stat); - - if (!error) - error = cp_new_stat(&stat, statbuf); + int error = vfs_stat(filename, &stat); - return error; + if (error) + return error; + return cp_new_stat(&stat, statbuf); } SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; - int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_new_stat(&stat, statbuf); + error = vfs_lstat(filename, &stat); + if (error) + return error; - return error; + return cp_new_stat(&stat, statbuf); } #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) diff --git a/include/linux/fs.h b/include/linux/fs.h index 257f4d37ad2..8f42b35a756 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2299,8 +2299,6 @@ extern int vfs_readdir(struct file *, filldir_t, void *); extern int vfs_stat(char __user *, struct kstat *); extern int vfs_lstat(char __user *, struct kstat *); -extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); -extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); extern int vfs_fstatat(int , char __user *, struct kstat *, int); -- cgit v1.2.3-70-g09d2 From 38e23c95f92a84fb8505a9f572b8a209c9c372c1 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 9 Apr 2009 20:17:52 +0900 Subject: fs: Mark get_filesystem_list() as __init function. "int get_filesystem_list(char * buf)" is called by only "static void __init get_fs_names(char *page)". We can mark get_filesystem_list() as "__init". Signed-off-by: Tetsuo Handa Signed-off-by: Al Viro --- fs/filesystems.c | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/filesystems.c b/fs/filesystems.c index 1aa70260e6d..a24c58e181d 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -199,7 +199,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2) return retval; } -int get_filesystem_list(char * buf) +int __init get_filesystem_list(char *buf) { int len = 0; struct file_system_type * tmp; diff --git a/include/linux/fs.h b/include/linux/fs.h index 8f42b35a756..5bed436f435 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2448,7 +2448,7 @@ struct ctl_table; int proc_nr_files(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); -int get_filesystem_list(char * buf); +int __init get_filesystem_list(char *buf); #endif /* __KERNEL__ */ #endif /* _LINUX_FS_H */ -- cgit v1.2.3-70-g09d2