diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-09 13:33:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-09 13:33:36 -0700 |
commit | a82a729f04232ccd0b59406574ba4cf20027a49d (patch) | |
tree | da5912344b00ed60a1a653fc2442db7425db289d /ipc | |
parent | 899dd388853071f5c8848545209d4e2c5d95b1d9 (diff) | |
parent | 27daabd9b6a157c34a6e7a7f509fa26866e6420f (diff) |
Merge branch 'akpm' (updates from Andrew Morton)
Merge second patch-bomb from Andrew Morton:
- misc fixes
- audit stuff
- fanotify/inotify/dnotify things
- most of the rest of MM. The new cache shrinker code from Glauber and
Dave Chinner probably isn't quite stabilized yet.
- ptrace
- ipc
- partitions
- reboot cleanups
- add LZ4 decompressor, use it for kernel compression
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (118 commits)
lib/scatterlist: error handling in __sg_alloc_table()
scsi_debug: fix do_device_access() with wrap around range
crypto: talitos: use sg_pcopy_to_buffer()
lib/scatterlist: introduce sg_pcopy_from_buffer() and sg_pcopy_to_buffer()
lib/scatterlist: factor out sg_miter_get_next_page() from sg_miter_next()
crypto: add lz4 Cryptographic API
lib: add lz4 compressor module
arm: add support for LZ4-compressed kernel
lib: add support for LZ4-compressed kernel
decompressor: add LZ4 decompressor module
lib: add weak clz/ctz functions
reboot: move arch/x86 reboot= handling to generic kernel
reboot: arm: change reboot_mode to use enum reboot_mode
reboot: arm: prepare reboot_mode for moving to generic kernel code
reboot: arm: remove unused restart_mode fields from some arm subarchs
reboot: unicore32: prepare reboot_mode for moving to generic kernel code
reboot: x86: prepare reboot_mode for moving to generic kernel code
reboot: checkpatch.pl the new kernel/reboot.c file
reboot: move shutdown/reboot related functions to kernel/reboot.c
reboot: remove -stable friendly PF_THREAD_BOUND define
...
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/mqueue.c | 2 | ||||
-rw-r--r-- | ipc/msg.c | 226 | ||||
-rw-r--r-- | ipc/sem.c | 442 | ||||
-rw-r--r-- | ipc/shm.c | 34 | ||||
-rw-r--r-- | ipc/util.c | 36 | ||||
-rw-r--r-- | ipc/util.h | 22 |
6 files changed, 503 insertions, 259 deletions
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index e4e47f64744..ae1996d3c53 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -823,6 +823,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, error = ro; goto out; } + audit_inode_parent_hidden(name, root); filp = do_create(ipc_ns, root->d_inode, &path, oflag, mode, u_attr ? &attr : NULL); @@ -868,6 +869,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) if (IS_ERR(name)) return PTR_ERR(name); + audit_inode_parent_hidden(name, mnt->mnt_root); err = mnt_want_write(mnt); if (err) goto out_name; diff --git a/ipc/msg.c b/ipc/msg.c index d0c6d967b39..bd60d7e159e 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -141,27 +141,23 @@ void __init msg_init(void) IPC_MSG_IDS, sysvipc_msg_proc_show); } -/* - * msg_lock_(check_) routines are called in the paths where the rw_mutex - * is not held. - */ -static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id) +static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) { - struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id); + struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); if (IS_ERR(ipcp)) - return (struct msg_queue *)ipcp; + return ERR_CAST(ipcp); return container_of(ipcp, struct msg_queue, q_perm); } -static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns, - int id) +static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, + int id) { - struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id); + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); if (IS_ERR(ipcp)) - return (struct msg_queue *)ipcp; + return ERR_CAST(ipcp); return container_of(ipcp, struct msg_queue, q_perm); } @@ -199,9 +195,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) return retval; } - /* - * ipc_addid() locks msq - */ + /* ipc_addid() locks msq upon success. */ id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); if (id < 0) { security_msg_queue_free(msq); @@ -218,7 +212,8 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) INIT_LIST_HEAD(&msq->q_receivers); INIT_LIST_HEAD(&msq->q_senders); - msg_unlock(msq); + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); return msq->q_perm.id; } @@ -408,31 +403,39 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, return -EFAULT; } - ipcp = ipcctl_pre_down(ns, &msg_ids(ns), msqid, cmd, - &msqid64.msg_perm, msqid64.msg_qbytes); - if (IS_ERR(ipcp)) - return PTR_ERR(ipcp); + down_write(&msg_ids(ns).rw_mutex); + rcu_read_lock(); + + ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, + &msqid64.msg_perm, msqid64.msg_qbytes); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + goto out_unlock1; + } msq = container_of(ipcp, struct msg_queue, q_perm); err = security_msg_queue_msgctl(msq, cmd); if (err) - goto out_unlock; + goto out_unlock1; switch (cmd) { case IPC_RMID: + ipc_lock_object(&msq->q_perm); + /* freeque unlocks the ipc object and rcu */ freeque(ns, ipcp); goto out_up; case IPC_SET: if (msqid64.msg_qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE)) { err = -EPERM; - goto out_unlock; + goto out_unlock1; } + ipc_lock_object(&msq->q_perm); err = ipc_update_perm(&msqid64.msg_perm, ipcp); if (err) - goto out_unlock; + goto out_unlock0; msq->q_qbytes = msqid64.msg_qbytes; @@ -448,25 +451,23 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, break; default: err = -EINVAL; + goto out_unlock1; } -out_unlock: - msg_unlock(msq); + +out_unlock0: + ipc_unlock_object(&msq->q_perm); +out_unlock1: + rcu_read_unlock(); out_up: up_write(&msg_ids(ns).rw_mutex); return err; } -SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) +static int msgctl_nolock(struct ipc_namespace *ns, int msqid, + int cmd, int version, void __user *buf) { + int err; struct msg_queue *msq; - int err, version; - struct ipc_namespace *ns; - - if (msqid < 0 || cmd < 0) - return -EINVAL; - - version = ipc_parse_version(&cmd); - ns = current->nsproxy->ipc_ns; switch (cmd) { case IPC_INFO: @@ -477,6 +478,7 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) if (!buf) return -EFAULT; + /* * We must not return kernel stack data. * due to padding, it's not enough @@ -508,7 +510,8 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) return -EFAULT; return (max_id < 0) ? 0 : max_id; } - case MSG_STAT: /* msqid is an index rather than a msg queue id */ + + case MSG_STAT: case IPC_STAT: { struct msqid64_ds tbuf; @@ -517,17 +520,25 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) if (!buf) return -EFAULT; + memset(&tbuf, 0, sizeof(tbuf)); + + rcu_read_lock(); if (cmd == MSG_STAT) { - msq = msg_lock(ns, msqid); - if (IS_ERR(msq)) - return PTR_ERR(msq); + msq = msq_obtain_object(ns, msqid); + if (IS_ERR(msq)) { + err = PTR_ERR(msq); + goto out_unlock; + } success_return = msq->q_perm.id; } else { - msq = msg_lock_check(ns, msqid); - if (IS_ERR(msq)) - return PTR_ERR(msq); + msq = msq_obtain_object_check(ns, msqid); + if (IS_ERR(msq)) { + err = PTR_ERR(msq); + goto out_unlock; + } success_return = 0; } + err = -EACCES; if (ipcperms(ns, &msq->q_perm, S_IRUGO)) goto out_unlock; @@ -536,8 +547,6 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) if (err) goto out_unlock; - memset(&tbuf, 0, sizeof(tbuf)); - kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); tbuf.msg_stime = msq->q_stime; tbuf.msg_rtime = msq->q_rtime; @@ -547,24 +556,48 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) tbuf.msg_qbytes = msq->q_qbytes; tbuf.msg_lspid = msq->q_lspid; tbuf.msg_lrpid = msq->q_lrpid; - msg_unlock(msq); + rcu_read_unlock(); + if (copy_msqid_to_user(buf, &tbuf, version)) return -EFAULT; return success_return; } - case IPC_SET: - case IPC_RMID: - err = msgctl_down(ns, msqid, cmd, buf, version); - return err; + default: - return -EINVAL; + return -EINVAL; } + return err; out_unlock: - msg_unlock(msq); + rcu_read_unlock(); return err; } +SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) +{ + int version; + struct ipc_namespace *ns; + + if (msqid < 0 || cmd < 0) + return -EINVAL; + + version = ipc_parse_version(&cmd); + ns = current->nsproxy->ipc_ns; + + switch (cmd) { + case IPC_INFO: + case MSG_INFO: + case MSG_STAT: /* msqid is an index rather than a msg queue id */ + case IPC_STAT: + return msgctl_nolock(ns, msqid, cmd, version, buf); + case IPC_SET: + case IPC_RMID: + return msgctl_down(ns, msqid, cmd, buf, version); + default: + return -EINVAL; + } +} + static int testmsg(struct msg_msg *msg, long type, int mode) { switch(mode) @@ -640,10 +673,11 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, msg->m_type = mtype; msg->m_ts = msgsz; - msq = msg_lock_check(ns, msqid); + rcu_read_lock(); + msq = msq_obtain_object_check(ns, msqid); if (IS_ERR(msq)) { err = PTR_ERR(msq); - goto out_free; + goto out_unlock1; } for (;;) { @@ -651,11 +685,11 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, err = -EACCES; if (ipcperms(ns, &msq->q_perm, S_IWUGO)) - goto out_unlock_free; + goto out_unlock1; err = security_msg_queue_msgsnd(msq, msg, msgflg); if (err) - goto out_unlock_free; + goto out_unlock1; if (msgsz + msq->q_cbytes <= msq->q_qbytes && 1 + msq->q_qnum <= msq->q_qbytes) { @@ -665,32 +699,41 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, /* queue full, wait: */ if (msgflg & IPC_NOWAIT) { err = -EAGAIN; - goto out_unlock_free; + goto out_unlock1; } + + ipc_lock_object(&msq->q_perm); ss_add(msq, &s); if (!ipc_rcu_getref(msq)) { err = -EIDRM; - goto out_unlock_free; + goto out_unlock0; } - msg_unlock(msq); + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); schedule(); - ipc_lock_by_ptr(&msq->q_perm); + rcu_read_lock(); + ipc_lock_object(&msq->q_perm); + ipc_rcu_putref(msq); if (msq->q_perm.deleted) { err = -EIDRM; - goto out_unlock_free; + goto out_unlock0; } + ss_del(&s); if (signal_pending(current)) { err = -ERESTARTNOHAND; - goto out_unlock_free; + goto out_unlock0; } + + ipc_unlock_object(&msq->q_perm); } + ipc_lock_object(&msq->q_perm); msq->q_lspid = task_tgid_vnr(current); msq->q_stime = get_seconds(); @@ -706,9 +749,10 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, err = 0; msg = NULL; -out_unlock_free: - msg_unlock(msq); -out_free: +out_unlock0: + ipc_unlock_object(&msq->q_perm); +out_unlock1: + rcu_read_unlock(); if (msg != NULL) free_msg(msg); return err; @@ -816,21 +860,19 @@ static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) return ERR_PTR(-EAGAIN); } - -long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, - int msgflg, +long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, long (*msg_handler)(void __user *, struct msg_msg *, size_t)) { - struct msg_queue *msq; - struct msg_msg *msg; int mode; + struct msg_queue *msq; struct ipc_namespace *ns; - struct msg_msg *copy = NULL; + struct msg_msg *msg, *copy = NULL; ns = current->nsproxy->ipc_ns; if (msqid < 0 || (long) bufsz < 0) return -EINVAL; + if (msgflg & MSG_COPY) { copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); if (IS_ERR(copy)) @@ -838,8 +880,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, } mode = convert_mode(&msgtyp, msgflg); - msq = msg_lock_check(ns, msqid); + rcu_read_lock(); + msq = msq_obtain_object_check(ns, msqid); if (IS_ERR(msq)) { + rcu_read_unlock(); free_copy(copy); return PTR_ERR(msq); } @@ -849,10 +893,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, msg = ERR_PTR(-EACCES); if (ipcperms(ns, &msq->q_perm, S_IRUGO)) - goto out_unlock; + goto out_unlock1; + ipc_lock_object(&msq->q_perm); msg = find_msg(msq, &msgtyp, mode); - if (!IS_ERR(msg)) { /* * Found a suitable message. @@ -860,7 +904,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, */ if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { msg = ERR_PTR(-E2BIG); - goto out_unlock; + goto out_unlock0; } /* * If we are copying, then do not unlink message and do @@ -868,8 +912,9 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, */ if (msgflg & MSG_COPY) { msg = copy_msg(msg, copy); - goto out_unlock; + goto out_unlock0; } + list_del(&msg->m_list); msq->q_qnum--; msq->q_rtime = get_seconds(); @@ -878,14 +923,16 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, atomic_sub(msg->m_ts, &ns->msg_bytes); atomic_dec(&ns->msg_hdrs); ss_wakeup(&msq->q_senders, 0); - msg_unlock(msq); - break; + + goto out_unlock0; } + /* No message waiting. Wait for a message */ if (msgflg & IPC_NOWAIT) { msg = ERR_PTR(-ENOMSG); - goto out_unlock; + goto out_unlock0; } + list_add_tail(&msr_d.r_list, &msq->q_receivers); msr_d.r_tsk = current; msr_d.r_msgtype = msgtyp; @@ -896,8 +943,9 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, msr_d.r_maxsize = bufsz; msr_d.r_msg = ERR_PTR(-EAGAIN); current->state = TASK_INTERRUPTIBLE; - msg_unlock(msq); + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); schedule(); /* Lockless receive, part 1: @@ -908,7 +956,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, * Prior to destruction, expunge_all(-EIRDM) changes r_msg. * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. * rcu_read_lock() prevents preemption between reading r_msg - * and the spin_lock() inside ipc_lock_by_ptr(). + * and acquiring the q_perm.lock in ipc_lock_object(). */ rcu_read_lock(); @@ -927,32 +975,34 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, * If there is a message or an error then accept it without * locking. */ - if (msg != ERR_PTR(-EAGAIN)) { - rcu_read_unlock(); - break; - } + if (msg != ERR_PTR(-EAGAIN)) + goto out_unlock1; /* Lockless receive, part 3: * Acquire the queue spinlock. */ - ipc_lock_by_ptr(&msq->q_perm); - rcu_read_unlock(); + ipc_lock_object(&msq->q_perm); /* Lockless receive, part 4: * Repeat test after acquiring the spinlock. */ msg = (struct msg_msg*)msr_d.r_msg; if (msg != ERR_PTR(-EAGAIN)) - goto out_unlock; + goto out_unlock0; list_del(&msr_d.r_list); if (signal_pending(current)) { msg = ERR_PTR(-ERESTARTNOHAND); -out_unlock: - msg_unlock(msq); - break; + goto out_unlock0; } + + ipc_unlock_object(&msq->q_perm); } + +out_unlock0: + ipc_unlock_object(&msq->q_perm); +out_unlock1: + rcu_read_unlock(); if (IS_ERR(msg)) { free_copy(copy); return PTR_ERR(msg); diff --git a/ipc/sem.c b/ipc/sem.c index 70480a3aa69..41088899783 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -95,8 +95,12 @@ struct sem { int semval; /* current value */ int sempid; /* pid of last operation */ spinlock_t lock; /* spinlock for fine-grained semtimedop */ - struct list_head sem_pending; /* pending single-sop operations */ -}; + struct list_head pending_alter; /* pending single-sop operations */ + /* that alter the semaphore */ + struct list_head pending_const; /* pending single-sop operations */ + /* that do not alter the semaphore*/ + time_t sem_otime; /* candidate for sem_otime */ +} ____cacheline_aligned_in_smp; /* One queue for each sleeping process in the system. */ struct sem_queue { @@ -150,12 +154,15 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ /* - * linked list protection: + * Locking: * sem_undo.id_next, - * sem_array.sem_pending{,last}, - * sem_array.sem_undo: sem_lock() for read/write + * sem_array.complex_count, + * sem_array.pending{_alter,_cont}, + * sem_array.sem_undo: global sem_lock() for read/write * sem_undo.proc_next: only "current" is allowed to read/write that field. * + * sem_array.sem_base[i].pending_{const,alter}: + * global or semaphore sem_lock() for read/write */ #define sc_semmsl sem_ctls[0] @@ -189,6 +196,53 @@ void __init sem_init (void) IPC_SEM_IDS, sysvipc_sem_proc_show); } +/** + * unmerge_queues - unmerge queues, if possible. + * @sma: semaphore array + * + * The function unmerges the wait queues if complex_count is 0. + * It must be called prior to dropping the global semaphore array lock. + */ +static void unmerge_queues(struct sem_array *sma) +{ + struct sem_queue *q, *tq; + + /* complex operations still around? */ + if (sma->complex_count) + return; + /* + * We will switch back to simple mode. + * Move all pending operation back into the per-semaphore + * queues. + */ + list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { + struct sem *curr; + curr = &sma->sem_base[q->sops[0].sem_num]; + + list_add_tail(&q->list, &curr->pending_alter); + } + INIT_LIST_HEAD(&sma->pending_alter); +} + +/** + * merge_queues - Merge single semop queues into global queue + * @sma: semaphore array + * + * This function merges all per-semaphore queues into the global queue. + * It is necessary to achieve FIFO ordering for the pending single-sop + * operations when a multi-semop operation must sleep. + * Only the alter operations must be moved, the const operations can stay. + */ +static void merge_queues(struct sem_array *sma) +{ + int i; + for (i = 0; i < sma->sem_nsems; i++) { + struct sem *sem = sma->sem_base + i; + + list_splice_init(&sem->pending_alter, &sma->pending_alter); + } +} + /* * If the request contains only one semaphore operation, and there are * no complex transactions pending, lock only the semaphore involved. @@ -246,7 +300,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, * their critical section while the array lock is held. */ lock_array: - spin_lock(&sma->sem_perm.lock); + ipc_lock_object(&sma->sem_perm); for (i = 0; i < sma->sem_nsems; i++) { struct sem *sem = sma->sem_base + i; spin_unlock_wait(&sem->lock); @@ -259,7 +313,8 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, static inline void sem_unlock(struct sem_array *sma, int locknum) { if (locknum == -1) { - spin_unlock(&sma->sem_perm.lock); + unmerge_queues(sma); + ipc_unlock_object(&sma->sem_perm); } else { struct sem *sem = sma->sem_base + locknum; spin_unlock(&sem->lock); @@ -337,7 +392,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) * Without the check/retry algorithm a lockless wakeup is possible: * - queue.status is initialized to -EINTR before blocking. * - wakeup is performed by - * * unlinking the queue entry from sma->sem_pending + * * unlinking the queue entry from the pending list * * setting queue.status to IN_WAKEUP * This is the notification for the blocked thread that a * result value is imminent. @@ -418,12 +473,14 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) sma->sem_base = (struct sem *) &sma[1]; for (i = 0; i < nsems; i++) { - INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); + INIT_LIST_HEAD(&sma->sem_base[i].pending_alter); + INIT_LIST_HEAD(&sma->sem_base[i].pending_const); spin_lock_init(&sma->sem_base[i].lock); } sma->complex_count = 0; - INIT_LIST_HEAD(&sma->sem_pending); + INIT_LIST_HEAD(&sma->pending_alter); + INIT_LIST_HEAD(&sma->pending_const); INIT_LIST_HEAD(&sma->list_id); sma->sem_nsems = nsems; sma->sem_ctime = get_seconds(); @@ -482,12 +539,19 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); } -/* - * Determine whether a sequence of semaphore operations would succeed - * all at once. Return 0 if yes, 1 if need to sleep, else return error code. +/** perform_atomic_semop - Perform (if possible) a semaphore operation + * @sma: semaphore array + * @sops: array with operations that should be checked + * @nsems: number of sops + * @un: undo array + * @pid: pid that did the change + * + * Returns 0 if the operation was possible. + * Returns 1 if the operation is impossible, the caller must sleep. + * Negative values are error codes. */ -static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, +static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops, int nsops, struct sem_undo *un, int pid) { int result, sem_op; @@ -609,60 +673,132 @@ static void unlink_queue(struct sem_array *sma, struct sem_queue *q) * update_queue is O(N^2) when it restarts scanning the whole queue of * waiting operations. Therefore this function checks if the restart is * really necessary. It is called after a previously waiting operation - * was completed. + * modified the array. + * Note that wait-for-zero operations are handled without restart. */ static int check_restart(struct sem_array *sma, struct sem_queue *q) { - struct sem *curr; - struct sem_queue *h; - - /* if the operation didn't modify the array, then no restart */ - if (q->alter == 0) - return 0; - - /* pending complex operations are too difficult to analyse */ - if (sma->complex_count) + /* pending complex alter operations are too difficult to analyse */ + if (!list_empty(&sma->pending_alter)) return 1; /* we were a sleeping complex operation. Too difficult */ if (q->nsops > 1) return 1; - curr = sma->sem_base + q->sops[0].sem_num; + /* It is impossible that someone waits for the new value: + * - complex operations always restart. + * - wait-for-zero are handled seperately. + * - q is a previously sleeping simple operation that + * altered the array. It must be a decrement, because + * simple increments never sleep. + * - If there are older (higher priority) decrements + * in the queue, then they have observed the original + * semval value and couldn't proceed. The operation + * decremented to value - thus they won't proceed either. + */ + return 0; +} + +/** + * wake_const_ops(sma, semnum, pt) - Wake up non-alter tasks + * @sma: semaphore array. + * @semnum: semaphore that was modified. + * @pt: list head for the tasks that must be woken up. + * + * wake_const_ops must be called after a semaphore in a semaphore array + * was set to 0. If complex const operations are pending, wake_const_ops must + * be called with semnum = -1, as well as with the number of each modified + * semaphore. + * The tasks that must be woken up are added to @pt. The return code + * is stored in q->pid. + * The function returns 1 if at least one operation was completed successfully. + */ +static int wake_const_ops(struct sem_array *sma, int semnum, + struct list_head *pt) +{ + struct sem_queue *q; + struct list_head *walk; + struct list_head *pending_list; + int semop_completed = 0; - /* No-one waits on this queue */ - if (list_empty(&curr->sem_pending)) - return 0; + if (semnum == -1) + pending_list = &sma->pending_const; + else + pending_list = &sma->sem_base[semnum].pending_const; + + walk = pending_list->next; + while (walk != pending_list) { + int error; + + q = container_of(walk, struct sem_queue, list); + walk = walk->next; + + error = perform_atomic_semop(sma, q->sops, q->nsops, + q->undo, q->pid); - /* the new semaphore value */ - if (curr->semval) { - /* It is impossible that someone waits for the new value: - * - q is a previously sleeping simple operation that - * altered the array. It must be a decrement, because - * simple increments never sleep. - * - The value is not 0, thus wait-for-zero won't proceed. - * - If there are older (higher priority) decrements - * in the queue, then they have observed the original - * semval value and couldn't proceed. The operation - * decremented to value - thus they won't proceed either. + if (error <= 0) { + /* operation completed, remove from queue & wakeup */ + + unlink_queue(sma, q); + + wake_up_sem_queue_prepare(pt, q, error); + if (error == 0) + semop_completed = 1; + } + } + return semop_completed; +} + +/** + * do_smart_wakeup_zero(sma, sops, nsops, pt) - wakeup all wait for zero tasks + * @sma: semaphore array + * @sops: operations that were performed + * @nsops: number of operations + * @pt: list head of the tasks that must be woken up. + * + * do_smart_wakeup_zero() checks all required queue for wait-for-zero + * operations, based on the actual changes that were performed on the + * semaphore array. + * The function returns 1 if at least one operation was completed successfully. + */ +static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, + int nsops, struct list_head *pt) +{ + int i; + int semop_completed = 0; + int got_zero = 0; + + /* first: the per-semaphore queues, if known */ + if (sops) { + for (i = 0; i < nsops; i++) { + int num = sops[i].sem_num; + + if (sma->sem_base[num].semval == 0) { + got_zero = 1; + semop_completed |= wake_const_ops(sma, num, pt); + } + } + } else { + /* + * No sops means modified semaphores not known. + * Assume all were changed. */ - BUG_ON(q->sops[0].sem_op >= 0); - return 0; + for (i = 0; i < sma->sem_nsems; i++) { + if (sma->sem_base[i].semval == 0) { + got_zero = 1; + semop_completed |= wake_const_ops(sma, i, pt); + } + } } /* - * semval is 0. Check if there are wait-for-zero semops. - * They must be the first entries in the per-semaphore queue + * If one of the modified semaphores got 0, + * then check the global queue, too. */ - h = list_first_entry(&curr->sem_pending, struct sem_queue, list); - BUG_ON(h->nsops != 1); - BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num); - - /* Yes, there is a wait-for-zero semop. Restart */ - if (h->sops[0].sem_op == 0) - return 1; + if (got_zero) + semop_completed |= wake_const_ops(sma, -1, pt); - /* Again - no-one is waiting for the new value. */ - return 0; + return semop_completed; } @@ -678,6 +814,8 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q) * semaphore. * The tasks that must be woken up are added to @pt. The return code * is stored in q->pid. + * The function internally checks if const operations can now succeed. + * * The function return 1 if at least one semop was completed successfully. */ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) @@ -688,9 +826,9 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) int semop_completed = 0; if (semnum == -1) - pending_list = &sma->sem_pending; + pending_list = &sma->pending_alter; else - pending_list = &sma->sem_base[semnum].sem_pending; + pending_list = &sma->sem_base[semnum].pending_alter; again: walk = pending_list->next; @@ -702,16 +840,15 @@ again: /* If we are scanning the single sop, per-semaphore list of * one semaphore and that semaphore is 0, then it is not - * necessary to scan the "alter" entries: simple increments + * necessary to scan further: simple increments * that affect only one entry succeed immediately and cannot * be in the per semaphore pending queue, and decrements * cannot be successful if the value is already 0. */ - if (semnum != -1 && sma->sem_base[semnum].semval == 0 && - q->alter) + if (semnum != -1 && sma->sem_base[semnum].semval == 0) break; - error = try_atomic_semop(sma, q->sops, q->nsops, + error = perform_atomic_semop(sma, q->sops, q->nsops, q->undo, q->pid); /* Does q->sleeper still need to sleep? */ @@ -724,6 +861,7 @@ again: restart = 0; } else { semop_completed = 1; + do_smart_wakeup_zero(sma, q->sops, q->nsops, pt); restart = check_restart(sma, q); } @@ -742,8 +880,8 @@ again: * @otime: force setting otime * @pt: list head of the tasks that must be woken up. * - * do_smart_update() does the required called to update_queue, based on the - * actual changes that were performed on the semaphore array. + * do_smart_update() does the required calls to update_queue and wakeup_zero, + * based on the actual changes that were performed on the semaphore array. * Note that the function does not do the actual wake-up: the caller is * responsible for calling wake_up_sem_queue_do(@pt). * It is safe to perform this call after dropping all locks. @@ -752,49 +890,46 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop int otime, struct list_head *pt) { int i; - int progress; - - progress = 1; -retry_global: - if (sma->complex_count) { - if (update_queue(sma, -1, pt)) { - progress = 1; - otime = 1; - sops = NULL; - } - } - if (!progress) - goto done; - if (!sops) { - /* No semops; something special is going on. */ - for (i = 0; i < sma->sem_nsems; i++) { - if (update_queue(sma, i, pt)) { - otime = 1; - progress = 1; - } - } - goto done_checkretry; - } + otime |= do_smart_wakeup_zero(sma, sops, nsops, pt); - /* Check the semaphores that were modified. */ - for (i = 0; i < nsops; i++) { - if (sops[i].sem_op > 0 || - (sops[i].sem_op < 0 && - sma->sem_base[sops[i].sem_num].semval == 0)) - if (update_queue(sma, sops[i].sem_num, pt)) { - otime = 1; - progress = 1; + if (!list_empty(&sma->pending_alter)) { + /* semaphore array uses the global queue - just process it. */ + otime |= update_queue(sma, -1, pt); + } else { + if (!sops) { + /* + * No sops, thus the modified semaphores are not + * known. Check all. + */ + for (i = 0; i < sma->sem_nsems; i++) + otime |= update_queue(sma, i, pt); + } else { + /* + * Check the semaphores that were increased: + * - No complex ops, thus all sleeping ops are + * decrease. + * - if we decreased the value, then any sleeping + * semaphore ops wont be able to run: If the + * previous value was too small, then the new + * value will be too small, too. + */ + for (i = 0; i < nsops; i++) { + if (sops[i].sem_op > 0) { + otime |= update_queue(sma, + sops[i].sem_num, pt); + } } + } } -done_checkretry: - if (progress) { - progress = 0; - goto retry_global; + if (otime) { + if (sops == NULL) { + sma->sem_base[0].sem_otime = get_seconds(); + } else { + sma->sem_base[sops[0].sem_num].sem_otime = + get_seconds(); + } } -done: - if (otime) - sma->sem_otime = get_seconds(); } @@ -813,14 +948,14 @@ static int count_semncnt (struct sem_array * sma, ushort semnum) struct sem_queue * q; semncnt = 0; - list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { + list_for_each_entry(q, &sma->sem_base[semnum].pending_alter, list) { struct sembuf * sops = q->sops; BUG_ON(sops->sem_num != semnum); if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT)) semncnt++; } - list_for_each_entry(q, &sma->sem_pending, list) { + list_for_each_entry(q, &sma->pending_alter, list) { struct sembuf * sops = q->sops; int nsops = q->nsops; int i; @@ -839,14 +974,14 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) struct sem_queue * q; semzcnt = 0; - list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { + list_for_each_entry(q, &sma->sem_base[semnum].pending_const, list) { struct sembuf * sops = q->sops; BUG_ON(sops->sem_num != semnum); if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT)) semzcnt++; } - list_for_each_entry(q, &sma->sem_pending, list) { + list_for_each_entry(q, &sma->pending_const, list) { struct sembuf * sops = q->sops; int nsops = q->nsops; int i; @@ -872,7 +1007,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) int i; /* Free the existing undo structures for this semaphore set. */ - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { list_del(&un->list_id); spin_lock(&un->ulp->lock); @@ -884,13 +1019,22 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) /* Wake up all pending processes and let them fail with EIDRM. */ INIT_LIST_HEAD(&tasks); - list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { + list_for_each_entry_safe(q, tq, &sma->pending_const, list) { + unlink_queue(sma, q); + wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + } + + list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { unlink_queue(sma, q); wake_up_sem_queue_prepare(&tasks, q, -EIDRM); } for (i = 0; i < sma->sem_nsems; i++) { struct sem *sem = sma->sem_base + i; - list_for_each_entry_safe(q, tq, &sem->sem_pending, list) { + list_for_each_entry_safe(q, tq, &sem->pending_const, list) { + unlink_queue(sma, q); + wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + } + list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { unlink_queue(sma, q); wake_up_sem_queue_prepare(&tasks, q, -EIDRM); } @@ -931,6 +1075,21 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, } } +static time_t get_semotime(struct sem_array *sma) +{ + int i; + time_t res; + + res = sma->sem_base[0].sem_otime; + for (i = 1; i < sma->sem_nsems; i++) { + time_t to = sma->sem_base[i].sem_otime; + + if (to > res) + res = to; + } + return res; +} + static int semctl_nolock(struct ipc_namespace *ns, int semid, int cmd, int version, void __user *p) { @@ -1004,9 +1163,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, goto out_unlock; kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); - tbuf.sem_otime = sma->sem_otime; - tbuf.sem_ctime = sma->sem_ctime; - tbuf.sem_nsems = sma->sem_nsems; + tbuf.sem_otime = get_semotime(sma); + tbuf.sem_ctime = sma->sem_ctime; + tbuf.sem_nsems = sma->sem_nsems; rcu_read_unlock(); if (copy_semid_to_user(p, &tbuf, version)) return -EFAULT; @@ -1070,7 +1229,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, curr = &sma->sem_base[semnum]; - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_for_each_entry(un, &sma->list_id, list_id) un->semadj[semnum] = 0; @@ -1199,7 +1358,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, for (i = 0; i < nsems; i++) sma->sem_base[i].semval = sem_io[i]; - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_for_each_entry(un, &sma->list_id, list_id) { for (i = 0; i < nsems; i++) un->semadj[i] = 0; @@ -1289,39 +1448,43 @@ static int semctl_down(struct ipc_namespace *ns, int semid, return -EFAULT; } + down_write(&sem_ids(ns).rw_mutex); + rcu_read_lock(); + ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, &semid64.sem_perm, 0); - if (IS_ERR(ipcp)) - return PTR_ERR(ipcp); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + goto out_unlock1; + } sma = container_of(ipcp, struct sem_array, sem_perm); err = security_sem_semctl(sma, cmd); - if (err) { - rcu_read_unlock(); - goto out_up; - } + if (err) + goto out_unlock1; - switch(cmd){ + switch (cmd) { case IPC_RMID: sem_lock(sma, NULL, -1); + /* freeary unlocks the ipc object and rcu */ freeary(ns, ipcp); goto out_up; case IPC_SET: sem_lock(sma, NULL, -1); err = ipc_update_perm(&semid64.sem_perm, ipcp); if (err) - goto out_unlock; + goto out_unlock0; sma->sem_ctime = get_seconds(); break; default: - rcu_read_unlock(); err = -EINVAL; - goto out_up; + goto out_unlock1; } -out_unlock: +out_unlock0: sem_unlock(sma, -1); +out_unlock1: rcu_read_unlock(); out_up: up_write(&sem_ids(ns).rw_mutex); @@ -1496,7 +1659,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) new->semid = semid; assert_spin_locked(&ulp->lock); list_add_rcu(&new->list_proc, &ulp->list_proc); - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_add(&new->list_id, &sma->list_id); un = new; @@ -1533,7 +1696,6 @@ static int get_queue_result(struct sem_queue *q) return error; } - SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, unsigned, nsops, const struct timespec __user *, timeout) { @@ -1631,7 +1793,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, if (un && un->semid == -1) goto out_unlock_free; - error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); + error = perform_atomic_semop(sma, sops, nsops, un, + task_tgid_vnr(current)); if (error <= 0) { if (alter && error == 0) do_smart_update(sma, sops, nsops, 1, &tasks); @@ -1653,15 +1816,27 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, struct sem *curr; curr = &sma->sem_base[sops->sem_num]; - if (alter) - list_add_tail(&queue.list, &curr->sem_pending); - else - list_add(&queue.list, &curr->sem_pending); + if (alter) { + if (sma->complex_count) { + list_add_tail(&queue.list, + &sma->pending_alter); + } else { + + list_add_tail(&queue.list, + &curr->pending_alter); + } + } else { + list_add_tail(&queue.list, &curr->pending_const); + } } else { + if (!sma->complex_count) + merge_queues(sma); + if (alter) - list_add_tail(&queue.list, &sma->sem_pending); + list_add_tail(&queue.list, &sma->pending_alter); else - list_add(&queue.list, &sma->sem_pending); + list_add_tail(&queue.list, &sma->pending_const); + sma->complex_count++; } @@ -1833,7 +2008,7 @@ void exit_sem(struct task_struct *tsk) } /* remove un from the linked lists */ - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_del(&un->list_id); spin_lock(&ulp->lock); @@ -1882,6 +2057,9 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) { struct user_namespace *user_ns = seq_user_ns(s); struct sem_array *sma = it; + time_t sem_otime; + + sem_otime = get_semotime(sma); return seq_printf(s, "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", @@ -1893,7 +2071,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) from_kgid_munged(user_ns, sma->sem_perm.gid), from_kuid_munged(user_ns, sma->sem_perm.cuid), from_kgid_munged(user_ns, sma->sem_perm.cgid), - sma->sem_otime, + sem_otime, sma->sem_ctime); } #endif diff --git a/ipc/shm.c b/ipc/shm.c index 7e199fa1960..c6b4ad5ce3b 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -141,7 +141,7 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) { rcu_read_lock(); - spin_lock(&ipcp->shm_perm.lock); + ipc_lock_object(&ipcp->shm_perm); } static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, @@ -491,10 +491,10 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) sprintf (name, "SYSV%08x", key); if (shmflg & SHM_HUGETLB) { - struct hstate *hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) - & SHM_HUGE_MASK); + struct hstate *hs; size_t hugesize; + hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); if (!hs) { error = -EINVAL; goto no_file; @@ -535,6 +535,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_nattch = 0; shp->shm_file = file; shp->shm_creator = current; + /* * shmid gets reported as "inode#" in /proc/pid/maps. * proc-ps tools use this. Changing this will break them. @@ -543,7 +544,9 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) ns->shm_tot += numpages; error = shp->shm_perm.id; - shm_unlock(shp); + + ipc_unlock_object(&shp->shm_perm); + rcu_read_unlock(); return error; no_id: @@ -754,31 +757,42 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, return -EFAULT; } + down_write(&shm_ids(ns).rw_mutex); + rcu_read_lock(); + ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd, &shmid64.shm_perm, 0); - if (IS_ERR(ipcp)) - return PTR_ERR(ipcp); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + /* the ipc lock is not held upon failure */ + goto out_unlock1; + } shp = container_of(ipcp, struct shmid_kernel, shm_perm); err = security_shm_shmctl(shp, cmd); if (err) - goto out_unlock; + goto out_unlock0; + switch (cmd) { case IPC_RMID: + /* do_shm_rmid unlocks the ipc object and rcu */ do_shm_rmid(ns, ipcp); goto out_up; case IPC_SET: err = ipc_update_perm(&shmid64.shm_perm, ipcp); if (err) - goto out_unlock; + goto out_unlock0; shp->shm_ctim = get_seconds(); break; default: err = -EINVAL; } -out_unlock: - shm_unlock(shp); + +out_unlock0: + ipc_unlock_object(&shp->shm_perm); +out_unlock1: + rcu_read_unlock(); out_up: up_write(&shm_ids(ns).rw_mutex); return err; diff --git a/ipc/util.c b/ipc/util.c index 809ec5ec812..4704223bfad 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -246,9 +246,8 @@ int ipc_get_maxid(struct ipc_ids *ids) * is returned. The 'new' entry is returned in a locked state on success. * On failure the entry is not locked and a negative err-code is returned. * - * Called with ipc_ids.rw_mutex held as a writer. + * Called with writer ipc_ids.rw_mutex held. */ - int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) { kuid_t euid; @@ -469,9 +468,7 @@ void ipc_free(void* ptr, int size) struct ipc_rcu { struct rcu_head rcu; atomic_t refcount; - /* "void *" makes sure alignment of following data is sane. */ - void *data[0]; -}; +} ____cacheline_aligned_in_smp; /** * ipc_rcu_alloc - allocate ipc and rcu space @@ -489,12 +486,14 @@ void *ipc_rcu_alloc(int size) if (unlikely(!out)) return NULL; atomic_set(&out->refcount, 1); - return out->data; + return out + 1; } int ipc_rcu_getref(void *ptr) { - return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu, data)->refcount); + struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; + + return atomic_inc_not_zero(&p->refcount); } /** @@ -508,7 +507,7 @@ static void ipc_schedule_free(struct rcu_head *head) void ipc_rcu_putref(void *ptr) { - struct ipc_rcu *p = container_of(ptr, struct ipc_rcu, data); + struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; if (!atomic_dec_and_test(&p->refcount)) return; @@ -747,8 +746,10 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) * It must be called without any lock held and * - retrieves the ipc with the given id in the given table. * - performs some audit and permission check, depending on the given cmd - * - returns the ipc with both ipc and rw_mutex locks held in case of success + * - returns the ipc with the ipc lock held in case of success * or an err-code without any lock held otherwise. + * + * Call holding the both the rw_mutex and the rcu read lock. */ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, struct ipc_ids *ids, int id, int cmd, @@ -773,13 +774,10 @@ struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, int err = -EPERM; struct kern_ipc_perm *ipcp; - down_write(&ids->rw_mutex); - rcu_read_lock(); - ipcp = ipc_obtain_object_check(ids, id); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); - goto out_up; + goto err; } audit_ipc_obj(ipcp); @@ -790,16 +788,8 @@ struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, euid = current_euid(); if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) || ns_capable(ns->user_ns, CAP_SYS_ADMIN)) - return ipcp; - -out_up: - /* - * Unsuccessful lookup, unlock and return - * the corresponding error. - */ - rcu_read_unlock(); - up_write(&ids->rw_mutex); - + return ipcp; /* successful lookup */ +err: return ERR_PTR(err); } diff --git a/ipc/util.h b/ipc/util.h index 2b0bdd5d92c..b6a6a88f300 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -159,21 +159,31 @@ static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid) return uid / SEQ_MULTIPLIER != ipcp->seq; } -static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) +static inline void ipc_lock_object(struct kern_ipc_perm *perm) { - rcu_read_lock(); spin_lock(&perm->lock); } -static inline void ipc_unlock(struct kern_ipc_perm *perm) +static inline void ipc_unlock_object(struct kern_ipc_perm *perm) { spin_unlock(&perm->lock); - rcu_read_unlock(); } -static inline void ipc_lock_object(struct kern_ipc_perm *perm) +static inline void ipc_assert_locked_object(struct kern_ipc_perm *perm) { - spin_lock(&perm->lock); + assert_spin_locked(&perm->lock); +} + +static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) +{ + rcu_read_lock(); + ipc_lock_object(perm); +} + +static inline void ipc_unlock(struct kern_ipc_perm *perm) +{ + ipc_unlock_object(perm); + rcu_read_unlock(); } struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id); |