From 98f842e675f96ffac96e6c50315790912b2812be Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 15 Jun 2011 10:21:48 -0700 Subject: proc: Usable inode numbers for the namespace file descriptors. Assign a unique proc inode to each namespace, and use that inode number to ensure we only allocate at most one proc inode for every namespace in proc. A single proc inode per namespace allows userspace to test to see if two processes are in the same namespace. This has been a long requested feature and only blocked because a naive implementation would put the id in a global space and would ultimately require having a namespace for the names of namespaces, making migration and certain virtualization tricks impossible. We still don't have per superblock inode numbers for proc, which appears necessary for application unaware checkpoint/restart and migrations (if the application is using namespace file descriptors) but that is now allowd by the design if it becomes important. I have preallocated the ipc and uts initial proc inode numbers so their structures can be statically initialized. Signed-off-by: Eric W. Biederman --- ipc/msgutil.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'ipc/msgutil.c') diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 26143d377c9..6471f1bdae9 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "util.h" @@ -30,6 +31,7 @@ DEFINE_SPINLOCK(mq_lock); struct ipc_namespace init_ipc_ns = { .count = ATOMIC_INIT(1), .user_ns = &init_user_ns, + .proc_inum = PROC_IPC_INIT_INO, }; atomic_t nr_ipc_ns = ATOMIC_INIT(1); -- cgit v1.2.3-70-g09d2 From 4a674f34ba04a002244edaf891b5da7fc1473ae8 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:55 -0800 Subject: ipc: introduce message queue copy feature This patch is required for checkpoint/restore in userspace. c/r requires some way to get all pending IPC messages without deleting them from the queue (checkpoint can fail and in this case tasks will be resumed, so queue have to be valid). To achive this, new operation flag MSG_COPY for sys_msgrcv() system call was introduced. If this flag was specified, then mtype is interpreted as number of the message to copy. If MSG_COPY is set, then kernel will allocate dummy message with passed size, and then use new copy_msg() helper function to copy desired message (instead of unlinking it from the queue). Notes: 1) Return -ENOSYS if MSG_COPY is specified, but CONFIG_CHECKPOINT_RESTORE is not set. Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/msg.h | 1 + ipc/msg.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++-- ipc/msgutil.c | 38 ++++++++++++++++++++++++++++ ipc/util.h | 1 + 4 files changed, 102 insertions(+), 2 deletions(-) (limited to 'ipc/msgutil.c') diff --git a/include/uapi/linux/msg.h b/include/uapi/linux/msg.h index 78dbd2f996a..22d95c6854e 100644 --- a/include/uapi/linux/msg.h +++ b/include/uapi/linux/msg.h @@ -10,6 +10,7 @@ /* msgrcv options */ #define MSG_NOERROR 010000 /* no error if message is too big */ #define MSG_EXCEPT 020000 /* recv any msg except of specified type.*/ +#define MSG_COPY 040000 /* copy (not remove) all queue messages */ /* Obsolete, used only for backwards compatibility and libc5 compiles */ struct msqid_ds { diff --git a/ipc/msg.c b/ipc/msg.c index cefc24f46e3..d20ffc7d3f2 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -769,6 +769,45 @@ static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) return msgsz; } +#ifdef CONFIG_CHECKPOINT_RESTORE +static inline struct msg_msg *fill_copy(unsigned long copy_nr, + unsigned long msg_nr, + struct msg_msg *msg, + struct msg_msg *copy) +{ + if (copy_nr == msg_nr) + return copy_msg(msg, copy); + return NULL; +} + +static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, + int msgflg, long *msgtyp, + unsigned long *copy_number) +{ + struct msg_msg *copy; + + *copy_number = *msgtyp; + *msgtyp = 0; + /* + * Create dummy message to copy real message to. + */ + copy = load_msg(buf, bufsz); + if (!IS_ERR(copy)) + copy->m_ts = bufsz; + return copy; +} + +static inline void free_copy(int msgflg, struct msg_msg *copy) +{ + if (msgflg & MSG_COPY) + free_msg(copy); +} +#else +#define free_copy(msgflg, copy) do {} while (0) +#define prepare_copy(buf, sz, msgflg, msgtyp, copy_nr) ERR_PTR(-ENOSYS) +#define fill_copy(copy_nr, msg_nr, msg, copy) NULL +#endif + long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, long (*msg_handler)(void __user *, struct msg_msg *, size_t)) @@ -777,19 +816,29 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, struct msg_msg *msg; int mode; struct ipc_namespace *ns; + struct msg_msg *copy; + unsigned long __maybe_unused copy_number; if (msqid < 0 || (long) bufsz < 0) return -EINVAL; + if (msgflg & MSG_COPY) { + copy = prepare_copy(buf, bufsz, msgflg, &msgtyp, ©_number); + if (IS_ERR(copy)) + return PTR_ERR(copy); + } mode = convert_mode(&msgtyp, msgflg); ns = current->nsproxy->ipc_ns; msq = msg_lock_check(ns, msqid); - if (IS_ERR(msq)) + if (IS_ERR(msq)) { + free_copy(msgflg, copy); return PTR_ERR(msq); + } for (;;) { struct msg_receiver msr_d; struct list_head *tmp; + long msg_counter = 0; msg = ERR_PTR(-EACCES); if (ipcperms(ns, &msq->q_perm, S_IRUGO)) @@ -809,8 +858,15 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, if (mode == SEARCH_LESSEQUAL && walk_msg->m_type != 1) { msgtyp = walk_msg->m_type - 1; + } else if (msgflg & MSG_COPY) { + msg = fill_copy(copy_number, + msg_counter, + walk_msg, copy); + if (msg) + break; } else break; + msg_counter++; } tmp = tmp->next; } @@ -823,6 +879,8 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, msg = ERR_PTR(-E2BIG); goto out_unlock; } + if (msgflg & MSG_COPY) + goto out_unlock; list_del(&msg->m_list); msq->q_qnum--; msq->q_rtime = get_seconds(); @@ -906,8 +964,10 @@ out_unlock: break; } } - if (IS_ERR(msg)) + if (IS_ERR(msg)) { + free_copy(msgflg, copy); return PTR_ERR(msg); + } bufsz = msg_handler(buf, msg, bufsz); free_msg(msg); diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 6471f1bdae9..7eecdad40ef 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -102,7 +102,45 @@ out_err: free_msg(msg); return ERR_PTR(err); } +#ifdef CONFIG_CHECKPOINT_RESTORE +struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) +{ + struct msg_msgseg *dst_pseg, *src_pseg; + int len = src->m_ts; + int alen; + + BUG_ON(dst == NULL); + if (src->m_ts > dst->m_ts) + return ERR_PTR(-EINVAL); + + alen = len; + if (alen > DATALEN_MSG) + alen = DATALEN_MSG; + + dst->next = NULL; + dst->security = NULL; + memcpy(dst + 1, src + 1, alen); + + len -= alen; + dst_pseg = dst->next; + src_pseg = src->next; + while (len > 0) { + alen = len; + if (alen > DATALEN_SEG) + alen = DATALEN_SEG; + memcpy(dst_pseg + 1, src_pseg + 1, alen); + dst_pseg = dst_pseg->next; + len -= alen; + src_pseg = src_pseg->next; + } + + dst->m_type = src->m_type; + dst->m_ts = src->m_ts; + + return dst; +} +#endif int store_msg(void __user *dest, struct msg_msg *msg, int len) { int alen; diff --git a/ipc/util.h b/ipc/util.h index a61e0ca2bff..eeb79a1fbd8 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -140,6 +140,7 @@ int ipc_parse_version (int *cmd); extern void free_msg(struct msg_msg *msg); extern struct msg_msg *load_msg(const void __user *src, int len); +extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst); extern int store_msg(void __user *dest, struct msg_msg *msg, int len); extern void recompute_msgmni(struct ipc_namespace *); -- cgit v1.2.3-70-g09d2 From 51eeacaa07d1372a7bc9612548ffe6cd846f4f2f Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:35:01 -0800 Subject: ipc: simplify message copying Remove the redundant and confusing fill_copy(). Also add copy_msg() check for error. In this case exit from the function have to be done instead of break, because further code interprets any error as EAGAIN. Also define copy_msg() for the case when CONFIG_CHECKPOINT_RESTORE is disabled. Signed-off-by: Stanislav Kinsbursky Cc: "Eric W. Biederman" Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 25 +++++++++---------------- ipc/msgutil.c | 5 +++++ 2 files changed, 14 insertions(+), 16 deletions(-) (limited to 'ipc/msgutil.c') diff --git a/ipc/msg.c b/ipc/msg.c index 038a7d79eb0..8493e1d7e35 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -770,16 +770,6 @@ static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) } #ifdef CONFIG_CHECKPOINT_RESTORE -static inline struct msg_msg *fill_copy(unsigned long copy_nr, - unsigned long msg_nr, - struct msg_msg *msg, - struct msg_msg *copy) -{ - if (copy_nr == msg_nr) - return copy_msg(msg, copy); - return NULL; -} - static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, int msgflg, long *msgtyp, unsigned long *copy_number) @@ -803,8 +793,6 @@ static inline void free_copy(struct msg_msg *copy) free_msg(copy); } #else -#define fill_copy(copy_nr, msg_nr, msg, copy) NULL - static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, int msgflg, long *msgtyp, unsigned long *copy_number) @@ -868,11 +856,16 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, walk_msg->m_type != 1) { msgtyp = walk_msg->m_type - 1; } else if (msgflg & MSG_COPY) { - msg = fill_copy(copy_number, - msg_counter, - walk_msg, copy); - if (msg) + if (copy_number == msg_counter) { + /* + * Found requested message. + * Copy it. + */ + msg = copy_msg(msg, copy); + if (IS_ERR(msg)) + goto out_unlock; break; + } } else break; msg_counter++; diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 7eecdad40ef..ebfcbfa8b7f 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -140,6 +140,11 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) return dst; } +#else +struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) +{ + return ERR_PTR(-ENOSYS); +} #endif int store_msg(void __user *dest, struct msg_msg *msg, int len) { -- cgit v1.2.3-70-g09d2