From dbfcd91f06f0e2d5564b2fd184e9c2a43675f9ab Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Jul 2013 16:01:09 -0700 Subject: ipc: move rcu lock out of ipc_addid This patchset continues the work that began in the sysv ipc semaphore scaling series, see https://lkml.org/lkml/2013/3/20/546 Just like semaphores used to be, sysv shared memory and msg queues also abuse the ipc lock, unnecessarily holding it for operations such as permission and security checks. This patchset mostly deals with mqueues, and while shared mem can be done in a very similar way, I want to get these patches out in the open first. It also does some pending cleanups, mostly focused on the two level locking we have in ipc code, taking care of ipc_addid() and ipcctl_pre_down_nolock() - yes there are still functions that need to be updated as well. This patch: Make all callers explicitly take and release the RCU read lock. This addresses the two level locking seen in newary(), newseg() and newqueue(). For the last two, explicitly unlock the ipc object and the rcu lock, instead of calling the custom shm_unlock and msg_unlock functions. The next patch will deal with the open coded locking for ->perm.lock Signed-off-by: Davidlohr Bueso Cc: Andi Kleen Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'ipc/msg.c') diff --git a/ipc/msg.c b/ipc/msg.c index d0c6d967b39..996feb81924 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -199,9 +199,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) return retval; } - /* - * ipc_addid() locks msq - */ + /* ipc_addid() locks msq upon success. */ id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); if (id < 0) { security_msg_queue_free(msq); @@ -218,7 +216,8 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) INIT_LIST_HEAD(&msq->q_receivers); INIT_LIST_HEAD(&msq->q_senders); - msg_unlock(msq); + spin_unlock(&msq->q_perm.lock); + rcu_read_unlock(); return msq->q_perm.id; } -- cgit v1.2.3-70-g09d2 From cf9d5d78d05bca96df7618dfc3a5ee4414dcae58 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Jul 2013 16:01:11 -0700 Subject: ipc: close open coded spin lock calls Signed-off-by: Davidlohr Bueso Cc: Andi Kleen Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 2 +- ipc/sem.c | 14 +++++++------- ipc/shm.c | 4 ++-- ipc/util.h | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) (limited to 'ipc/msg.c') diff --git a/ipc/msg.c b/ipc/msg.c index 996feb81924..7a3d6aab369 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -216,7 +216,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) INIT_LIST_HEAD(&msq->q_receivers); INIT_LIST_HEAD(&msq->q_senders); - spin_unlock(&msq->q_perm.lock); + ipc_unlock_object(&msq->q_perm); rcu_read_unlock(); return msq->q_perm.id; diff --git a/ipc/sem.c b/ipc/sem.c index 70480a3aa69..92ec6c69bab 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -246,7 +246,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, * their critical section while the array lock is held. */ lock_array: - spin_lock(&sma->sem_perm.lock); + ipc_lock_object(&sma->sem_perm); for (i = 0; i < sma->sem_nsems; i++) { struct sem *sem = sma->sem_base + i; spin_unlock_wait(&sem->lock); @@ -259,7 +259,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, static inline void sem_unlock(struct sem_array *sma, int locknum) { if (locknum == -1) { - spin_unlock(&sma->sem_perm.lock); + ipc_unlock_object(&sma->sem_perm); } else { struct sem *sem = sma->sem_base + locknum; spin_unlock(&sem->lock); @@ -872,7 +872,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) int i; /* Free the existing undo structures for this semaphore set. */ - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { list_del(&un->list_id); spin_lock(&un->ulp->lock); @@ -1070,7 +1070,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, curr = &sma->sem_base[semnum]; - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_for_each_entry(un, &sma->list_id, list_id) un->semadj[semnum] = 0; @@ -1199,7 +1199,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, for (i = 0; i < nsems; i++) sma->sem_base[i].semval = sem_io[i]; - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_for_each_entry(un, &sma->list_id, list_id) { for (i = 0; i < nsems; i++) un->semadj[i] = 0; @@ -1496,7 +1496,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) new->semid = semid; assert_spin_locked(&ulp->lock); list_add_rcu(&new->list_proc, &ulp->list_proc); - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_add(&new->list_id, &sma->list_id); un = new; @@ -1833,7 +1833,7 @@ void exit_sem(struct task_struct *tsk) } /* remove un from the linked lists */ - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_del(&un->list_id); spin_lock(&ulp->lock); diff --git a/ipc/shm.c b/ipc/shm.c index bd2b14ef1bb..e7d51072d1c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -141,7 +141,7 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) { rcu_read_lock(); - spin_lock(&ipcp->shm_perm.lock); + ipc_lock_object(&ipcp->shm_perm); } static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, @@ -545,7 +545,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) ns->shm_tot += numpages; error = shp->shm_perm.id; - spin_unlock(&shp->shm_perm.lock); + ipc_unlock_object(&shp->shm_perm); rcu_read_unlock(); return error; diff --git a/ipc/util.h b/ipc/util.h index da65e8afb8f..b6a6a88f300 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -177,12 +177,12 @@ static inline void ipc_assert_locked_object(struct kern_ipc_perm *perm) static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) { rcu_read_lock(); - spin_lock(&perm->lock); + ipc_lock_object(perm); } static inline void ipc_unlock(struct kern_ipc_perm *perm) { - spin_unlock(&perm->lock); + ipc_unlock_object(perm); rcu_read_unlock(); } -- cgit v1.2.3-70-g09d2 From 7b4cc5d8411bd4e9d61d8714f53859740cf830c2 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Jul 2013 16:01:12 -0700 Subject: ipc: move locking out of ipcctl_pre_down_nolock This function currently acquires both the rw_mutex and the rcu lock on successful lookups, leaving the callers to explicitly unlock them, creating another two level locking situation. Make the callers (including those that still use ipcctl_pre_down()) explicitly lock and unlock the rwsem and rcu lock. Signed-off-by: Davidlohr Bueso Cc: Andi Kleen Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 24 +++++++++++++++++------- ipc/sem.c | 27 ++++++++++++++++----------- ipc/shm.c | 23 +++++++++++++++++------ ipc/util.c | 21 ++++++--------------- 4 files changed, 56 insertions(+), 39 deletions(-) (limited to 'ipc/msg.c') diff --git a/ipc/msg.c b/ipc/msg.c index 7a3d6aab369..f62fa5eed84 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -407,31 +407,38 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, return -EFAULT; } + down_write(&msg_ids(ns).rw_mutex); + rcu_read_lock(); + ipcp = ipcctl_pre_down(ns, &msg_ids(ns), msqid, cmd, &msqid64.msg_perm, msqid64.msg_qbytes); - if (IS_ERR(ipcp)) - return PTR_ERR(ipcp); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + /* the ipc lock is not held upon failure */ + goto out_unlock1; + } msq = container_of(ipcp, struct msg_queue, q_perm); err = security_msg_queue_msgctl(msq, cmd); if (err) - goto out_unlock; + goto out_unlock0; switch (cmd) { case IPC_RMID: + /* freeque unlocks the ipc object and rcu */ freeque(ns, ipcp); goto out_up; case IPC_SET: if (msqid64.msg_qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE)) { err = -EPERM; - goto out_unlock; + goto out_unlock0; } err = ipc_update_perm(&msqid64.msg_perm, ipcp); if (err) - goto out_unlock; + goto out_unlock0; msq->q_qbytes = msqid64.msg_qbytes; @@ -448,8 +455,11 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, default: err = -EINVAL; } -out_unlock: - msg_unlock(msq); + +out_unlock0: + ipc_unlock_object(&msq->q_perm); +out_unlock1: + rcu_read_unlock(); out_up: up_write(&msg_ids(ns).rw_mutex); return err; diff --git a/ipc/sem.c b/ipc/sem.c index 92ec6c69bab..b4b892b5c5f 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1289,39 +1289,44 @@ static int semctl_down(struct ipc_namespace *ns, int semid, return -EFAULT; } + down_write(&sem_ids(ns).rw_mutex); + rcu_read_lock(); + ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, &semid64.sem_perm, 0); - if (IS_ERR(ipcp)) - return PTR_ERR(ipcp); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + /* the ipc lock is not held upon failure */ + goto out_unlock1; + } sma = container_of(ipcp, struct sem_array, sem_perm); err = security_sem_semctl(sma, cmd); - if (err) { - rcu_read_unlock(); - goto out_up; - } + if (err) + goto out_unlock1; - switch(cmd){ + switch (cmd) { case IPC_RMID: sem_lock(sma, NULL, -1); + /* freeary unlocks the ipc object and rcu */ freeary(ns, ipcp); goto out_up; case IPC_SET: sem_lock(sma, NULL, -1); err = ipc_update_perm(&semid64.sem_perm, ipcp); if (err) - goto out_unlock; + goto out_unlock0; sma->sem_ctime = get_seconds(); break; default: - rcu_read_unlock(); err = -EINVAL; - goto out_up; + goto out_unlock1; } -out_unlock: +out_unlock0: sem_unlock(sma, -1); +out_unlock1: rcu_read_unlock(); out_up: up_write(&sem_ids(ns).rw_mutex); diff --git a/ipc/shm.c b/ipc/shm.c index e7d51072d1c..c6b4ad5ce3b 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -757,31 +757,42 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, return -EFAULT; } + down_write(&shm_ids(ns).rw_mutex); + rcu_read_lock(); + ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd, &shmid64.shm_perm, 0); - if (IS_ERR(ipcp)) - return PTR_ERR(ipcp); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + /* the ipc lock is not held upon failure */ + goto out_unlock1; + } shp = container_of(ipcp, struct shmid_kernel, shm_perm); err = security_shm_shmctl(shp, cmd); if (err) - goto out_unlock; + goto out_unlock0; + switch (cmd) { case IPC_RMID: + /* do_shm_rmid unlocks the ipc object and rcu */ do_shm_rmid(ns, ipcp); goto out_up; case IPC_SET: err = ipc_update_perm(&shmid64.shm_perm, ipcp); if (err) - goto out_unlock; + goto out_unlock0; shp->shm_ctim = get_seconds(); break; default: err = -EINVAL; } -out_unlock: - shm_unlock(shp); + +out_unlock0: + ipc_unlock_object(&shp->shm_perm); +out_unlock1: + rcu_read_unlock(); out_up: up_write(&shm_ids(ns).rw_mutex); return err; diff --git a/ipc/util.c b/ipc/util.c index 399821ac0a9..a0c139f3d1f 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -746,8 +746,10 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) * It must be called without any lock held and * - retrieves the ipc with the given id in the given table. * - performs some audit and permission check, depending on the given cmd - * - returns the ipc with both ipc and rw_mutex locks held in case of success + * - returns the ipc with the ipc lock held in case of success * or an err-code without any lock held otherwise. + * + * Call holding the both the rw_mutex and the rcu read lock. */ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, struct ipc_ids *ids, int id, int cmd, @@ -772,13 +774,10 @@ struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, int err = -EPERM; struct kern_ipc_perm *ipcp; - down_write(&ids->rw_mutex); - rcu_read_lock(); - ipcp = ipc_obtain_object_check(ids, id); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); - goto out_up; + goto err; } audit_ipc_obj(ipcp); @@ -789,16 +788,8 @@ struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, euid = current_euid(); if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) || ns_capable(ns->user_ns, CAP_SYS_ADMIN)) - return ipcp; - -out_up: - /* - * Unsuccessful lookup, unlock and return - * the corresponding error. - */ - rcu_read_unlock(); - up_write(&ids->rw_mutex); - + return ipcp; /* successful lookup */ +err: return ERR_PTR(err); } -- cgit v1.2.3-70-g09d2 From 15724ecb7e9bab35fc694c666ad563adba820cc3 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Jul 2013 16:01:13 -0700 Subject: ipc,msg: shorten critical region in msgctl_down Instead of holding the ipc lock for the entire function, use the ipcctl_pre_down_nolock and only acquire the lock for specific commands: RMID and SET. Signed-off-by: Davidlohr Bueso Cc: Andi Kleen Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'ipc/msg.c') diff --git a/ipc/msg.c b/ipc/msg.c index f62fa5eed84..de422ff71c8 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -410,11 +410,10 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, down_write(&msg_ids(ns).rw_mutex); rcu_read_lock(); - ipcp = ipcctl_pre_down(ns, &msg_ids(ns), msqid, cmd, - &msqid64.msg_perm, msqid64.msg_qbytes); + ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, + &msqid64.msg_perm, msqid64.msg_qbytes); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); - /* the ipc lock is not held upon failure */ goto out_unlock1; } @@ -422,10 +421,11 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, err = security_msg_queue_msgctl(msq, cmd); if (err) - goto out_unlock0; + goto out_unlock1; switch (cmd) { case IPC_RMID: + ipc_lock_object(&msq->q_perm); /* freeque unlocks the ipc object and rcu */ freeque(ns, ipcp); goto out_up; @@ -433,9 +433,10 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, if (msqid64.msg_qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE)) { err = -EPERM; - goto out_unlock0; + goto out_unlock1; } + ipc_lock_object(&msq->q_perm); err = ipc_update_perm(&msqid64.msg_perm, ipcp); if (err) goto out_unlock0; @@ -454,6 +455,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, break; default: err = -EINVAL; + goto out_unlock1; } out_unlock0: -- cgit v1.2.3-70-g09d2 From 2cafed30f150f7314f98717b372df8173516cae0 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Jul 2013 16:01:14 -0700 Subject: ipc,msg: introduce msgctl_nolock Similar to semctl, when calling msgctl, the *_INFO and *_STAT commands can be performed without acquiring the ipc object. Add a msgctl_nolock() function and move the logic of *_INFO and *_STAT out of msgctl(). This change still takes the lock and it will be properly lockless in the next patch Signed-off-by: Davidlohr Bueso Cc: Andi Kleen Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) (limited to 'ipc/msg.c') diff --git a/ipc/msg.c b/ipc/msg.c index de422ff71c8..f45be81f6de 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -467,17 +467,11 @@ out_up: return err; } -SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) +static int msgctl_nolock(struct ipc_namespace *ns, int msqid, + int cmd, int version, void __user *buf) { + int err; struct msg_queue *msq; - int err, version; - struct ipc_namespace *ns; - - if (msqid < 0 || cmd < 0) - return -EINVAL; - - version = ipc_parse_version(&cmd); - ns = current->nsproxy->ipc_ns; switch (cmd) { case IPC_INFO: @@ -488,6 +482,7 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) if (!buf) return -EFAULT; + /* * We must not return kernel stack data. * due to padding, it's not enough @@ -519,7 +514,8 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) return -EFAULT; return (max_id < 0) ? 0 : max_id; } - case MSG_STAT: /* msqid is an index rather than a msg queue id */ + + case MSG_STAT: case IPC_STAT: { struct msqid64_ds tbuf; @@ -563,19 +559,42 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) return -EFAULT; return success_return; } - case IPC_SET: - case IPC_RMID: - err = msgctl_down(ns, msqid, cmd, buf, version); - return err; + default: - return -EINVAL; + return -EINVAL; } + return err; out_unlock: msg_unlock(msq); return err; } +SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) +{ + int version; + struct ipc_namespace *ns; + + if (msqid < 0 || cmd < 0) + return -EINVAL; + + version = ipc_parse_version(&cmd); + ns = current->nsproxy->ipc_ns; + + switch (cmd) { + case IPC_INFO: + case MSG_INFO: + case MSG_STAT: /* msqid is an index rather than a msg queue id */ + case IPC_STAT: + return msgctl_nolock(ns, msqid, cmd, version, buf); + case IPC_SET: + case IPC_RMID: + return msgctl_down(ns, msqid, cmd, buf, version); + default: + return -EINVAL; + } +} + static int testmsg(struct msg_msg *msg, long type, int mode) { switch(mode) -- cgit v1.2.3-70-g09d2 From a5001a0d9768568de5d613c3b3a5b9c7721299da Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Jul 2013 16:01:15 -0700 Subject: ipc,msg: introduce lockless functions to obtain the ipc object Add msq_obtain_object() and msq_obtain_object_check(), which will allow us to get the ipc object without acquiring the lock. Just as with semaphores, these functions are basically wrappers around ipc_obtain_object*(). Signed-off-by: Davidlohr Bueso Cc: Andi Kleen Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'ipc/msg.c') diff --git a/ipc/msg.c b/ipc/msg.c index f45be81f6de..c53c1371606 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -166,6 +166,27 @@ static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns, return container_of(ipcp, struct msg_queue, q_perm); } +static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) +{ + struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); + + if (IS_ERR(ipcp)) + return ERR_CAST(ipcp); + + return container_of(ipcp, struct msg_queue, q_perm); +} + +static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, + int id) +{ + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); + + if (IS_ERR(ipcp)) + return ERR_CAST(ipcp); + + return container_of(ipcp, struct msg_queue, q_perm); +} + static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) { ipc_rmid(&msg_ids(ns), &s->q_perm); -- cgit v1.2.3-70-g09d2 From ac0ba20ea6f2201a1589d6dc26ad1a4f0f967bb8 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Jul 2013 16:01:16 -0700 Subject: ipc,msg: make msgctl_nolock lockless While the INFO cmd doesn't take the ipc lock, the STAT commands do acquire it unnecessarily. We can do the permissions and security checks only holding the rcu lock. This function now mimics semctl_nolock(). Signed-off-by: Davidlohr Bueso Cc: Andi Kleen Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'ipc/msg.c') diff --git a/ipc/msg.c b/ipc/msg.c index c53c1371606..c218328b598 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -545,17 +545,25 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid, if (!buf) return -EFAULT; + memset(&tbuf, 0, sizeof(tbuf)); + + rcu_read_lock(); if (cmd == MSG_STAT) { - msq = msg_lock(ns, msqid); - if (IS_ERR(msq)) - return PTR_ERR(msq); + msq = msq_obtain_object(ns, msqid); + if (IS_ERR(msq)) { + err = PTR_ERR(msq); + goto out_unlock; + } success_return = msq->q_perm.id; } else { - msq = msg_lock_check(ns, msqid); - if (IS_ERR(msq)) - return PTR_ERR(msq); + msq = msq_obtain_object_check(ns, msqid); + if (IS_ERR(msq)) { + err = PTR_ERR(msq); + goto out_unlock; + } success_return = 0; } + err = -EACCES; if (ipcperms(ns, &msq->q_perm, S_IRUGO)) goto out_unlock; @@ -564,8 +572,6 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid, if (err) goto out_unlock; - memset(&tbuf, 0, sizeof(tbuf)); - kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); tbuf.msg_stime = msq->q_stime; tbuf.msg_rtime = msq->q_rtime; @@ -575,7 +581,8 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid, tbuf.msg_qbytes = msq->q_qbytes; tbuf.msg_lspid = msq->q_lspid; tbuf.msg_lrpid = msq->q_lrpid; - msg_unlock(msq); + rcu_read_unlock(); + if (copy_msqid_to_user(buf, &tbuf, version)) return -EFAULT; return success_return; @@ -587,7 +594,7 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid, return err; out_unlock: - msg_unlock(msq); + rcu_read_unlock(); return err; } -- cgit v1.2.3-70-g09d2 From 3dd1f784ed6603d7ab1043e51e6371235edf2313 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Jul 2013 16:01:17 -0700 Subject: ipc,msg: shorten critical region in msgsnd do_msgsnd() is another function that does too many things with the ipc object lock acquired. Take it only when needed when actually updating msq. Signed-off-by: Davidlohr Bueso Cc: Andi Kleen Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) (limited to 'ipc/msg.c') diff --git a/ipc/msg.c b/ipc/msg.c index c218328b598..f2a1a8f30cd 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -698,10 +698,11 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, msg->m_type = mtype; msg->m_ts = msgsz; - msq = msg_lock_check(ns, msqid); + rcu_read_lock(); + msq = msq_obtain_object_check(ns, msqid); if (IS_ERR(msq)) { err = PTR_ERR(msq); - goto out_free; + goto out_unlock1; } for (;;) { @@ -709,11 +710,11 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, err = -EACCES; if (ipcperms(ns, &msq->q_perm, S_IWUGO)) - goto out_unlock_free; + goto out_unlock1; err = security_msg_queue_msgsnd(msq, msg, msgflg); if (err) - goto out_unlock_free; + goto out_unlock1; if (msgsz + msq->q_cbytes <= msq->q_qbytes && 1 + msq->q_qnum <= msq->q_qbytes) { @@ -723,32 +724,41 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, /* queue full, wait: */ if (msgflg & IPC_NOWAIT) { err = -EAGAIN; - goto out_unlock_free; + goto out_unlock1; } + + ipc_lock_object(&msq->q_perm); ss_add(msq, &s); if (!ipc_rcu_getref(msq)) { err = -EIDRM; - goto out_unlock_free; + goto out_unlock0; } - msg_unlock(msq); + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); schedule(); - ipc_lock_by_ptr(&msq->q_perm); + rcu_read_lock(); + ipc_lock_object(&msq->q_perm); + ipc_rcu_putref(msq); if (msq->q_perm.deleted) { err = -EIDRM; - goto out_unlock_free; + goto out_unlock0; } + ss_del(&s); if (signal_pending(current)) { err = -ERESTARTNOHAND; - goto out_unlock_free; + goto out_unlock0; } + + ipc_unlock_object(&msq->q_perm); } + ipc_lock_object(&msq->q_perm); msq->q_lspid = task_tgid_vnr(current); msq->q_stime = get_seconds(); @@ -764,9 +774,10 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, err = 0; msg = NULL; -out_unlock_free: - msg_unlock(msq); -out_free: +out_unlock0: + ipc_unlock_object(&msq->q_perm); +out_unlock1: + rcu_read_unlock(); if (msg != NULL) free_msg(msg); return err; -- cgit v1.2.3-70-g09d2 From 41a0d523d0f626e9da0dc01de47f1b89058033cf Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Jul 2013 16:01:18 -0700 Subject: ipc,msg: shorten critical region in msgrcv do_msgrcv() is the last msg queue function that abuses the ipc lock Take it only when needed when actually updating msq. Signed-off-by: Davidlohr Bueso Cc: Andi Kleen Cc: Rik van Riel Tested-by: Sedat Dilek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 58 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 26 deletions(-) (limited to 'ipc/msg.c') diff --git a/ipc/msg.c b/ipc/msg.c index f2a1a8f30cd..a3c0dc40a0c 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -885,21 +885,19 @@ static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) return ERR_PTR(-EAGAIN); } - -long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, - int msgflg, +long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, long (*msg_handler)(void __user *, struct msg_msg *, size_t)) { - struct msg_queue *msq; - struct msg_msg *msg; int mode; + struct msg_queue *msq; struct ipc_namespace *ns; - struct msg_msg *copy = NULL; + struct msg_msg *msg, *copy = NULL; ns = current->nsproxy->ipc_ns; if (msqid < 0 || (long) bufsz < 0) return -EINVAL; + if (msgflg & MSG_COPY) { copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); if (IS_ERR(copy)) @@ -907,8 +905,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, } mode = convert_mode(&msgtyp, msgflg); - msq = msg_lock_check(ns, msqid); + rcu_read_lock(); + msq = msq_obtain_object_check(ns, msqid); if (IS_ERR(msq)) { + rcu_read_unlock(); free_copy(copy); return PTR_ERR(msq); } @@ -918,10 +918,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, msg = ERR_PTR(-EACCES); if (ipcperms(ns, &msq->q_perm, S_IRUGO)) - goto out_unlock; + goto out_unlock1; + ipc_lock_object(&msq->q_perm); msg = find_msg(msq, &msgtyp, mode); - if (!IS_ERR(msg)) { /* * Found a suitable message. @@ -929,7 +929,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, */ if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { msg = ERR_PTR(-E2BIG); - goto out_unlock; + goto out_unlock0; } /* * If we are copying, then do not unlink message and do @@ -937,8 +937,9 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, */ if (msgflg & MSG_COPY) { msg = copy_msg(msg, copy); - goto out_unlock; + goto out_unlock0; } + list_del(&msg->m_list); msq->q_qnum--; msq->q_rtime = get_seconds(); @@ -947,14 +948,16 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, atomic_sub(msg->m_ts, &ns->msg_bytes); atomic_dec(&ns->msg_hdrs); ss_wakeup(&msq->q_senders, 0); - msg_unlock(msq); - break; + + goto out_unlock0; } + /* No message waiting. Wait for a message */ if (msgflg & IPC_NOWAIT) { msg = ERR_PTR(-ENOMSG); - goto out_unlock; + goto out_unlock0; } + list_add_tail(&msr_d.r_list, &msq->q_receivers); msr_d.r_tsk = current; msr_d.r_msgtype = msgtyp; @@ -965,8 +968,9 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, msr_d.r_maxsize = bufsz; msr_d.r_msg = ERR_PTR(-EAGAIN); current->state = TASK_INTERRUPTIBLE; - msg_unlock(msq); + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); schedule(); /* Lockless receive, part 1: @@ -977,7 +981,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, * Prior to destruction, expunge_all(-EIRDM) changes r_msg. * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. * rcu_read_lock() prevents preemption between reading r_msg - * and the spin_lock() inside ipc_lock_by_ptr(). + * and acquiring the q_perm.lock in ipc_lock_object(). */ rcu_read_lock(); @@ -996,32 +1000,34 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, * If there is a message or an error then accept it without * locking. */ - if (msg != ERR_PTR(-EAGAIN)) { - rcu_read_unlock(); - break; - } + if (msg != ERR_PTR(-EAGAIN)) + goto out_unlock1; /* Lockless receive, part 3: * Acquire the queue spinlock. */ - ipc_lock_by_ptr(&msq->q_perm); - rcu_read_unlock(); + ipc_lock_object(&msq->q_perm); /* Lockless receive, part 4: * Repeat test after acquiring the spinlock. */ msg = (struct msg_msg*)msr_d.r_msg; if (msg != ERR_PTR(-EAGAIN)) - goto out_unlock; + goto out_unlock0; list_del(&msr_d.r_list); if (signal_pending(current)) { msg = ERR_PTR(-ERESTARTNOHAND); -out_unlock: - msg_unlock(msq); - break; + goto out_unlock0; } + + ipc_unlock_object(&msq->q_perm); } + +out_unlock0: + ipc_unlock_object(&msq->q_perm); +out_unlock1: + rcu_read_unlock(); if (IS_ERR(msg)) { free_copy(copy); return PTR_ERR(msg); -- cgit v1.2.3-70-g09d2 From 9ad66ae65fc8d3e7e3344310fb0aa835910264fe Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Jul 2013 16:01:19 -0700 Subject: ipc: remove unused functions We can now drop the msg_lock and msg_lock_check functions along with a bogus comment introduced previously in semctl_down. Signed-off-by: Davidlohr Bueso Cc: Andi Kleen Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 25 ------------------------- ipc/sem.c | 1 - 2 files changed, 26 deletions(-) (limited to 'ipc/msg.c') diff --git a/ipc/msg.c b/ipc/msg.c index a3c0dc40a0c..bd60d7e159e 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -141,31 +141,6 @@ void __init msg_init(void) IPC_MSG_IDS, sysvipc_msg_proc_show); } -/* - * msg_lock_(check_) routines are called in the paths where the rw_mutex - * is not held. - */ -static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id) -{ - struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id); - - if (IS_ERR(ipcp)) - return (struct msg_queue *)ipcp; - - return container_of(ipcp, struct msg_queue, q_perm); -} - -static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns, - int id) -{ - struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id); - - if (IS_ERR(ipcp)) - return (struct msg_queue *)ipcp; - - return container_of(ipcp, struct msg_queue, q_perm); -} - static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) { struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); diff --git a/ipc/sem.c b/ipc/sem.c index b4b892b5c5f..d3ad3573bc6 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1296,7 +1296,6 @@ static int semctl_down(struct ipc_namespace *ns, int semid, &semid64.sem_perm, 0); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); - /* the ipc lock is not held upon failure */ goto out_unlock1; } -- cgit v1.2.3-70-g09d2