From badf16621c1f9d1ac753be056fce11b43d6e0be5 Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Fri, 9 Sep 2005 13:04:10 -0700 Subject: [PATCH] files: break up files struct In order for the RCU to work, the file table array, sets and their sizes must be updated atomically. Instead of ensuring this through too many memory barriers, we put the arrays and their sizes in a separate structure. This patch takes the first step of putting the file table elements in a separate structure fdtable that is embedded withing files_struct. It also changes all the users to refer to the file table using files_fdtable() macro. Subsequent applciation of RCU becomes easier after this. Signed-off-by: Dipankar Sarma Signed-Off-By: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/select.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/select.c') diff --git a/fs/select.c b/fs/select.c index b80e7eb0ac0..2e56325c73c 100644 --- a/fs/select.c +++ b/fs/select.c @@ -132,11 +132,13 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds) unsigned long *open_fds; unsigned long set; int max; + struct fdtable *fdt; /* handle last in-complete long-word first */ set = ~(~0UL << (n & (__NFDBITS-1))); n /= __NFDBITS; - open_fds = current->files->open_fds->fds_bits+n; + fdt = files_fdtable(current->files); + open_fds = fdt->open_fds->fds_bits+n; max = 0; if (set) { set &= BITS(fds, n); @@ -299,6 +301,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s char *bits; long timeout; int ret, size, max_fdset; + struct fdtable *fdt; timeout = MAX_SCHEDULE_TIMEOUT; if (tvp) { @@ -326,7 +329,8 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s goto out_nofds; /* max_fdset can increase, so grab it once to avoid race */ - max_fdset = current->files->max_fdset; + fdt = files_fdtable(current->files); + max_fdset = fdt->max_fdset; if (n > max_fdset) n = max_fdset; @@ -464,9 +468,11 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti unsigned int i; struct poll_list *head; struct poll_list *walk; + struct fdtable *fdt; /* Do a sanity check on nfds ... */ - if (nfds > current->files->max_fdset && nfds > OPEN_MAX) + fdt = files_fdtable(current->files); + if (nfds > fdt->max_fdset && nfds > OPEN_MAX) return -EINVAL; if (timeout) { -- cgit v1.2.3-70-g09d2 From b835996f628eadb55c5fb222ba46fe9395bf73c7 Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Fri, 9 Sep 2005 13:04:14 -0700 Subject: [PATCH] files: lock-free fd look-up With the use of RCU in files structure, the look-up of files using fds can now be lock-free. The lookup is protected by rcu_read_lock()/rcu_read_unlock(). This patch changes the readers to use lock-free lookup. Signed-off-by: Maneesh Soni Signed-off-by: Ravikiran Thirumalai Signed-off-by: Dipankar Sarma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/kernel/irixioctl.c | 5 +++-- arch/sparc64/solaris/ioctl.c | 7 ++++--- drivers/char/tty_io.c | 4 ++-- fs/fcntl.c | 4 ++-- fs/proc/base.c | 29 +++++++++++++++-------------- fs/select.c | 13 ++++++++++--- net/ipv4/netfilter/ipt_owner.c | 1 + net/ipv6/netfilter/ip6t_owner.c | 1 + security/selinux/hooks.c | 2 +- 9 files changed, 39 insertions(+), 27 deletions(-) (limited to 'fs/select.c') diff --git a/arch/mips/kernel/irixioctl.c b/arch/mips/kernel/irixioctl.c index 4cd3d38a22c..3cdc22346f4 100644 --- a/arch/mips/kernel/irixioctl.c +++ b/arch/mips/kernel/irixioctl.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -33,7 +34,7 @@ static struct tty_struct *get_tty(int fd) struct file *filp; struct tty_struct *ttyp = NULL; - spin_lock(¤t->files->file_lock); + rcu_read_lock(); filp = fcheck(fd); if(filp && filp->private_data) { ttyp = (struct tty_struct *) filp->private_data; @@ -41,7 +42,7 @@ static struct tty_struct *get_tty(int fd) if(ttyp->magic != TTY_MAGIC) ttyp =NULL; } - spin_unlock(¤t->files->file_lock); + rcu_read_unlock(); return ttyp; } diff --git a/arch/sparc64/solaris/ioctl.c b/arch/sparc64/solaris/ioctl.c index 374766455f5..be0a054e3ed 100644 --- a/arch/sparc64/solaris/ioctl.c +++ b/arch/sparc64/solaris/ioctl.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -295,16 +296,16 @@ static inline int solaris_sockmod(unsigned int fd, unsigned int cmd, u32 arg) struct inode *ino; struct fdtable *fdt; /* I wonder which of these tests are superfluous... --patrik */ - spin_lock(¤t->files->file_lock); + rcu_read_lock(); fdt = files_fdtable(current->files); if (! fdt->fd[fd] || ! fdt->fd[fd]->f_dentry || ! (ino = fdt->fd[fd]->f_dentry->d_inode) || ! S_ISSOCK(ino->i_mode)) { - spin_unlock(¤t->files->file_lock); + rcu_read_unlock(); return TBADF; } - spin_unlock(¤t->files->file_lock); + rcu_read_unlock(); switch (cmd & 0xff) { case 109: /* SI_SOCKPARAMS */ diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 0bfc7af6891..e5953f3433f 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -2480,7 +2480,7 @@ static void __do_SAK(void *arg) } task_lock(p); if (p->files) { - spin_lock(&p->files->file_lock); + rcu_read_lock(); fdt = files_fdtable(p->files); for (i=0; i < fdt->max_fds; i++) { filp = fcheck_files(p->files, i); @@ -2495,7 +2495,7 @@ static void __do_SAK(void *arg) break; } } - spin_unlock(&p->files->file_lock); + rcu_read_unlock(); } task_unlock(p); } while_each_task_pid(session, PIDTYPE_SID, p); diff --git a/fs/fcntl.c b/fs/fcntl.c index d2f3ed8acd9..863b46e0d78 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -40,10 +40,10 @@ static inline int get_close_on_exec(unsigned int fd) struct files_struct *files = current->files; struct fdtable *fdt; int res; - spin_lock(&files->file_lock); + rcu_read_lock(); fdt = files_fdtable(files); res = FD_ISSET(fd, fdt->close_on_exec); - spin_unlock(&files->file_lock); + rcu_read_unlock(); return res; } diff --git a/fs/proc/base.c b/fs/proc/base.c index d0087a0b024..23db452ab42 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -283,16 +284,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm files = get_files_struct(task); if (files) { - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); if (file) { *mnt = mntget(file->f_vfsmnt); *dentry = dget(file->f_dentry); - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); return 0; } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } return -ENOENT; @@ -1062,7 +1063,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) files = get_files_struct(p); if (!files) goto out; - spin_lock(&files->file_lock); + rcu_read_lock(); fdt = files_fdtable(files); for (fd = filp->f_pos-2; fd < fdt->max_fds; @@ -1071,7 +1072,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) if (!fcheck_files(files, fd)) continue; - spin_unlock(&files->file_lock); + rcu_read_unlock(); j = NUMBUF; i = fd; @@ -1083,12 +1084,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) ino = fake_ino(tid, PROC_TID_FD_DIR + fd); if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { - spin_lock(&files->file_lock); + rcu_read_lock(); break; } - spin_lock(&files->file_lock); + rcu_read_lock(); } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } out: @@ -1263,9 +1264,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) files = get_files_struct(task); if (files) { - spin_lock(&files->file_lock); + rcu_read_lock(); if (fcheck_files(files, fd)) { - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); if (task_dumpable(task)) { inode->i_uid = task->euid; @@ -1277,7 +1278,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) security_task_to_inode(task, inode); return 1; } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } d_drop(dentry); @@ -1369,7 +1370,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, if (!files) goto out_unlock; inode->i_mode = S_IFLNK; - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); if (!file) goto out_unlock2; @@ -1377,7 +1378,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, inode->i_mode |= S_IRUSR | S_IXUSR; if (file->f_mode & 2) inode->i_mode |= S_IWUSR | S_IXUSR; - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; @@ -1387,7 +1388,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, return NULL; out_unlock2: - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); out_unlock: iput(inode); diff --git a/fs/select.c b/fs/select.c index 2e56325c73c..f10a10317d5 100644 --- a/fs/select.c +++ b/fs/select.c @@ -22,6 +22,7 @@ #include /* for STICKY_TIMEOUTS */ #include #include +#include #include @@ -185,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, long *timeout) int retval, i; long __timeout = *timeout; - spin_lock(¤t->files->file_lock); + rcu_read_lock(); retval = max_select_fd(n, fds); - spin_unlock(¤t->files->file_lock); + rcu_read_unlock(); if (retval < 0) return retval; @@ -329,8 +330,10 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s goto out_nofds; /* max_fdset can increase, so grab it once to avoid race */ + rcu_read_lock(); fdt = files_fdtable(current->files); max_fdset = fdt->max_fdset; + rcu_read_unlock(); if (n > max_fdset) n = max_fdset; @@ -469,10 +472,14 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti struct poll_list *head; struct poll_list *walk; struct fdtable *fdt; + int max_fdset; /* Do a sanity check on nfds ... */ + rcu_read_lock(); fdt = files_fdtable(current->files); - if (nfds > fdt->max_fdset && nfds > OPEN_MAX) + max_fdset = fdt->max_fdset; + rcu_read_unlock(); + if (nfds > max_fdset && nfds > OPEN_MAX) return -EINVAL; if (timeout) { diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index c1889f88262..0cee2862ed8 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 9b91decbfdd..4de4cdad4b7 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index acb5a495a90..f40c8221ec1 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1652,7 +1652,7 @@ static inline void flush_unauthorized_files(struct files_struct * files) continue; } if (devnull) { - atomic_inc(&devnull->f_count); + rcuref_inc(&devnull->f_count); } else { devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR); if (!devnull) { -- cgit v1.2.3-70-g09d2