From 3a326a2ce88e71d00ac0d133e314a3342a7709f8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 10 Apr 2006 15:18:35 +0200 Subject: [PATCH] introduce a "kernel-internal pipe object" abstraction separate out the 'internal pipe object' abstraction, and make it usable to splice. This cleans up and fixes several aspects of the internal splice APIs and the pipe code: - pipes: the allocation and freeing of pipe_inode_info is now more symmetric and more streamlined with existing kernel practices. - splice: small micro-optimization: less pointer dereferencing in splice methods Signed-off-by: Ingo Molnar Update XFS for the ->splice_read/->splice_write changes. Signed-off-by: Jens Axboe --- include/linux/fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1e9ebaba07b..7e6454454fb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1039,8 +1039,8 @@ struct file_operations { int (*check_flags)(int); int (*dir_notify)(struct file *filp, unsigned long arg); int (*flock) (struct file *, int, struct file_lock *); - ssize_t (*splice_write)(struct inode *, struct file *, size_t, unsigned int); - ssize_t (*splice_read)(struct file *, struct inode *, size_t, unsigned int); + ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int); + ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int); }; struct inode_operations { @@ -1611,8 +1611,8 @@ extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor extern void do_generic_mapping_read(struct address_space *mapping, struct file_ra_state *, struct file *, loff_t *, read_descriptor_t *, read_actor_t); -extern ssize_t generic_file_splice_read(struct file *, struct inode *, size_t, unsigned int); -extern ssize_t generic_file_splice_write(struct inode *, struct file *, size_t, unsigned int); +extern ssize_t generic_file_splice_read(struct file *, struct pipe_inode_info *, size_t, unsigned int); +extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, size_t, unsigned int); extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, -- cgit v1.2.3-70-g09d2 From b92ce55893745e011edae70830b8bc863be881f9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 11 Apr 2006 13:52:07 +0200 Subject: [PATCH] splice: add direct fd <-> fd splicing support It's more efficient for sendfile() emulation. Basically we cache an internal private pipe and just use that as the intermediate area for pages. Direct splicing is not available from sys_splice(), it is only meant to be used for sendfile() emulation. Additional patch from Ingo Molnar to avoid the PIPE_BUFFERS loop at exit for the normal fast path. Signed-off-by: Jens Axboe --- fs/pipe.c | 10 +++- fs/splice.c | 148 ++++++++++++++++++++++++++++++++++++++++------ include/linux/fs.h | 2 + include/linux/pipe_fs_i.h | 1 + include/linux/sched.h | 6 ++ kernel/exit.c | 4 ++ 6 files changed, 150 insertions(+), 21 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/pipe.c b/fs/pipe.c index 705b4869262..036536f072c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -691,12 +691,10 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode) return info; } -void free_pipe_info(struct inode *inode) +void __free_pipe_info(struct pipe_inode_info *info) { int i; - struct pipe_inode_info *info = inode->i_pipe; - inode->i_pipe = NULL; for (i = 0; i < PIPE_BUFFERS; i++) { struct pipe_buffer *buf = info->bufs + i; if (buf->ops) @@ -707,6 +705,12 @@ void free_pipe_info(struct inode *inode) kfree(info); } +void free_pipe_info(struct inode *inode) +{ + __free_pipe_info(inode->i_pipe); + inode->i_pipe = NULL; +} + static struct vfsmount *pipe_mnt __read_mostly; static int pipefs_delete_dentry(struct dentry *dentry) { diff --git a/fs/splice.c b/fs/splice.c index a5326127aad..c47b561edac 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -680,8 +680,7 @@ EXPORT_SYMBOL(generic_splice_sendpage); * Attempt to initiate a splice from pipe to file. */ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, - loff_t __user *off_out, size_t len, - unsigned int flags) + size_t len, unsigned int flags) { loff_t pos; int ret; @@ -692,9 +691,6 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, if (!(out->f_mode & FMODE_WRITE)) return -EBADF; - if (off_out && copy_from_user(&out->f_pos, off_out, sizeof(loff_t))) - return -EFAULT; - pos = out->f_pos; ret = rw_verify_area(WRITE, out, &pos, len); @@ -707,9 +703,8 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, /* * Attempt to initiate a splice from a file to a pipe. */ -static long do_splice_to(struct file *in, loff_t __user *off_in, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) +static long do_splice_to(struct file *in, struct pipe_inode_info *pipe, + size_t len, unsigned int flags) { loff_t pos, isize, left; int ret; @@ -720,9 +715,6 @@ static long do_splice_to(struct file *in, loff_t __user *off_in, if (!(in->f_mode & FMODE_READ)) return -EBADF; - if (off_in && copy_from_user(&in->f_pos, off_in, sizeof(loff_t))) - return -EFAULT; - pos = in->f_pos; ret = rw_verify_area(READ, in, &pos, len); @@ -740,6 +732,118 @@ static long do_splice_to(struct file *in, loff_t __user *off_in, return in->f_op->splice_read(in, pipe, len, flags); } +long do_splice_direct(struct file *in, struct file *out, size_t len, + unsigned int flags) +{ + struct pipe_inode_info *pipe; + long ret, bytes; + umode_t i_mode; + int i; + + /* + * We require the input being a regular file, as we don't want to + * randomly drop data for eg socket -> socket splicing. Use the + * piped splicing for that! + */ + i_mode = in->f_dentry->d_inode->i_mode; + if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) + return -EINVAL; + + /* + * neither in nor out is a pipe, setup an internal pipe attached to + * 'out' and transfer the wanted data from 'in' to 'out' through that + */ + pipe = current->splice_pipe; + if (!pipe) { + pipe = alloc_pipe_info(NULL); + if (!pipe) + return -ENOMEM; + + /* + * We don't have an immediate reader, but we'll read the stuff + * out of the pipe right after the move_to_pipe(). So set + * PIPE_READERS appropriately. + */ + pipe->readers = 1; + + current->splice_pipe = pipe; + } + + /* + * do the splice + */ + ret = 0; + bytes = 0; + + while (len) { + size_t read_len, max_read_len; + + /* + * Do at most PIPE_BUFFERS pages worth of transfer: + */ + max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE)); + + ret = do_splice_to(in, pipe, max_read_len, flags); + if (unlikely(ret < 0)) + goto out_release; + + read_len = ret; + + /* + * NOTE: nonblocking mode only applies to the input. We + * must not do the output in nonblocking mode as then we + * could get stuck data in the internal pipe: + */ + ret = do_splice_from(pipe, out, read_len, + flags & ~SPLICE_F_NONBLOCK); + if (unlikely(ret < 0)) + goto out_release; + + bytes += ret; + len -= ret; + + /* + * In nonblocking mode, if we got back a short read then + * that was due to either an IO error or due to the + * pagecache entry not being there. In the IO error case + * the _next_ splice attempt will produce a clean IO error + * return value (not a short read), so in both cases it's + * correct to break out of the loop here: + */ + if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len)) + break; + } + + pipe->nrbufs = pipe->curbuf = 0; + + return bytes; + +out_release: + /* + * If we did an incomplete transfer we must release + * the pipe buffers in question: + */ + for (i = 0; i < PIPE_BUFFERS; i++) { + struct pipe_buffer *buf = pipe->bufs + i; + + if (buf->ops) { + buf->ops->release(pipe, buf); + buf->ops = NULL; + } + } + pipe->nrbufs = pipe->curbuf = 0; + + /* + * If we transferred some data, return the number of bytes: + */ + if (bytes > 0) + return bytes; + + return ret; +} + +EXPORT_SYMBOL(do_splice_direct); + /* * Determine where to splice to/from. */ @@ -749,25 +853,33 @@ static long do_splice(struct file *in, loff_t __user *off_in, { struct pipe_inode_info *pipe; - if (off_out && out->f_op->llseek == no_llseek) - return -EINVAL; - if (off_in && in->f_op->llseek == no_llseek) - return -EINVAL; - pipe = in->f_dentry->d_inode->i_pipe; if (pipe) { if (off_in) return -ESPIPE; + if (off_out) { + if (out->f_op->llseek == no_llseek) + return -EINVAL; + if (copy_from_user(&out->f_pos, off_out, + sizeof(loff_t))) + return -EFAULT; + } - return do_splice_from(pipe, out, off_out, len, flags); + return do_splice_from(pipe, out, len, flags); } pipe = out->f_dentry->d_inode->i_pipe; if (pipe) { if (off_out) return -ESPIPE; + if (off_in) { + if (in->f_op->llseek == no_llseek) + return -EINVAL; + if (copy_from_user(&in->f_pos, off_in, sizeof(loff_t))) + return -EFAULT; + } - return do_splice_to(in, off_in, pipe, len, flags); + return do_splice_to(in, pipe, len, flags); } return -EINVAL; diff --git a/include/linux/fs.h b/include/linux/fs.h index 7e6454454fb..9e8e2ee353b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1613,6 +1613,8 @@ extern void do_generic_mapping_read(struct address_space *mapping, loff_t *, read_descriptor_t *, read_actor_t); extern ssize_t generic_file_splice_read(struct file *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, size_t, unsigned int); +extern long do_splice_direct(struct file *in, struct file *out, + size_t len, unsigned int flags); extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 9cf99cb34c1..660e9d866e5 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -58,6 +58,7 @@ void pipe_wait(struct pipe_inode_info *pipe); struct pipe_inode_info * alloc_pipe_info(struct inode * inode); void free_pipe_info(struct inode * inode); +void __free_pipe_info(struct pipe_inode_info *); /* * splice is tied to pipes as a transport (at least for now), so we'll just diff --git a/include/linux/sched.h b/include/linux/sched.h index 541f4828f5e..e194ec75833 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -684,6 +684,7 @@ static inline void prefetch_stack(struct task_struct *t) { } struct audit_context; /* See audit.c */ struct mempolicy; +struct pipe_inode_info; enum sleep_type { SLEEP_NORMAL, @@ -882,6 +883,11 @@ struct task_struct { atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; + + /* + * cache last used pipe for splice + */ + struct pipe_inode_info *splice_pipe; }; static inline pid_t process_group(struct task_struct *tsk) diff --git a/kernel/exit.c b/kernel/exit.c index 6c2eeb8f639..1a9787ac617 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -941,6 +942,9 @@ fastcall NORET_TYPE void do_exit(long code) if (tsk->io_context) exit_io_context(); + if (tsk->splice_pipe) + __free_pipe_info(tsk->splice_pipe); + /* PF_DEAD causes final put_task_struct after we schedule. */ preempt_disable(); BUG_ON(tsk->flags & PF_DEAD); -- cgit v1.2.3-70-g09d2 From 88e6faefaeed40a162041d85689e94595756d4c3 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 11 Apr 2006 13:59:36 +0200 Subject: [PATCH] splice: warning fix From: Andrew Morton net/socket.c:148: warning: initialization from incompatible pointer type extern declarations in .c files! Bad boy. Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/fs.h | 13 ++++++++++--- net/socket.c | 4 ---- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9e8e2ee353b..2a629fd88ef 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1611,10 +1611,17 @@ extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor extern void do_generic_mapping_read(struct address_space *mapping, struct file_ra_state *, struct file *, loff_t *, read_descriptor_t *, read_actor_t); -extern ssize_t generic_file_splice_read(struct file *, struct pipe_inode_info *, size_t, unsigned int); -extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, size_t, unsigned int); + +/* fs/splice.c */ +extern ssize_t generic_file_splice_read(struct file *, + struct pipe_inode_info *, size_t, unsigned int); +extern ssize_t generic_file_splice_write(struct pipe_inode_info *, + struct file *, size_t, unsigned int); +extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, + struct file *out, size_t len, unsigned int flags); extern long do_splice_direct(struct file *in, struct file *out, - size_t len, unsigned int flags); + size_t len, unsigned int flags); + extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, diff --git a/net/socket.c b/net/socket.c index b807f360e02..9ed9f652115 100644 --- a/net/socket.c +++ b/net/socket.c @@ -119,10 +119,6 @@ static ssize_t sock_writev(struct file *file, const struct iovec *vector, static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); -extern ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, - size_t len, unsigned int flags); - - /* * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear * in the operation structures but are done directly via the socketcall() multiplexor. -- cgit v1.2.3-70-g09d2