From 961a828df64979d2a9faeeeee043391670a193b9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Jan 2012 22:57:37 -0500 Subject: SUNRPC: Fix potential races in xprt_lock_write_next() We have to ensure that the wake up from the waitqueue and the assignment of xprt->snd_task are atomic. We can do this by assigning the snd_task while under the waitqueue spinlock. Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) (limited to 'net/sunrpc/sched.c') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 3341d896278..f982dfe5399 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -422,7 +422,7 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* * Wake up the next task on a priority queue. */ -static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue) +static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *queue) { struct list_head *q; struct rpc_task *task; @@ -467,30 +467,54 @@ new_queue: new_owner: rpc_set_waitqueue_owner(queue, task->tk_owner); out: - rpc_wake_up_task_queue_locked(queue, task); return task; } +static struct rpc_task *__rpc_find_next_queued(struct rpc_wait_queue *queue) +{ + if (RPC_IS_PRIORITY(queue)) + return __rpc_find_next_queued_priority(queue); + if (!list_empty(&queue->tasks[0])) + return list_first_entry(&queue->tasks[0], struct rpc_task, u.tk_wait.list); + return NULL; +} + /* - * Wake up the next task on the wait queue. + * Wake up the first task on the wait queue. */ -struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) +struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue, + bool (*func)(struct rpc_task *, void *), void *data) { struct rpc_task *task = NULL; - dprintk("RPC: wake_up_next(%p \"%s\")\n", + dprintk("RPC: wake_up_first(%p \"%s\")\n", queue, rpc_qname(queue)); spin_lock_bh(&queue->lock); - if (RPC_IS_PRIORITY(queue)) - task = __rpc_wake_up_next_priority(queue); - else { - task_for_first(task, &queue->tasks[0]) + task = __rpc_find_next_queued(queue); + if (task != NULL) { + if (func(task, data)) rpc_wake_up_task_queue_locked(queue, task); + else + task = NULL; } spin_unlock_bh(&queue->lock); return task; } +EXPORT_SYMBOL_GPL(rpc_wake_up_first); + +static bool rpc_wake_up_next_func(struct rpc_task *task, void *data) +{ + return true; +} + +/* + * Wake up the next task on the wait queue. +*/ +struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue) +{ + return rpc_wake_up_first(queue, rpc_wake_up_next_func, NULL); +} EXPORT_SYMBOL_GPL(rpc_wake_up_next); /** -- cgit v1.2.3-70-g09d2 From 82b0a4c3c171b180629696e8d1d5f52516f711e6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Jan 2012 14:52:23 -0500 Subject: SUNRPC: Add trace events to the sunrpc subsystem Add declarations to allow tracing of RPC call creation, running, sleeping, and destruction. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 2 +- include/trace/events/sunrpc.h | 124 ++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/sched.c | 12 ++++ 3 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 include/trace/events/sunrpc.h (limited to 'net/sunrpc/sched.c') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index bd337f990a4..f7b2df5252b 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -271,7 +271,7 @@ static inline int rpc_task_has_priority(struct rpc_task *task, unsigned char pri } #ifdef RPC_DEBUG -static inline const char * rpc_qname(struct rpc_wait_queue *q) +static inline const char * rpc_qname(const struct rpc_wait_queue *q) { return ((q && q->name) ? q->name : "unknown"); } diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h new file mode 100644 index 00000000000..51cc9490919 --- /dev/null +++ b/include/trace/events/sunrpc.h @@ -0,0 +1,124 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sunrpc + +#if !defined(_TRACE_SUNRPC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SUNRPC_H + +#include +#include +#include + +DECLARE_EVENT_CLASS(rpc_task_running, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), + + TP_ARGS(clnt, task, action), + + TP_STRUCT__entry( + __field(const struct rpc_clnt *, clnt) + __field(const struct rpc_task *, task) + __field(const void *, action) + __field(unsigned long, runstate) + __field(int, status) + __field(unsigned short, flags) + ), + + TP_fast_assign( + __entry->clnt = clnt; + __entry->task = task; + __entry->action = action; + __entry->runstate = task->tk_runstate; + __entry->status = task->tk_status; + __entry->flags = task->tk_flags; + ), + + TP_printk("task:%p@%p flags=%4.4x state=%4.4lx status=%d action=%pf", + __entry->task, + __entry->clnt, + __entry->flags, + __entry->runstate, + __entry->status, + __entry->action + ) +); + +DEFINE_EVENT(rpc_task_running, rpc_task_begin, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), + + TP_ARGS(clnt, task, action) + +); + +DEFINE_EVENT(rpc_task_running, rpc_task_run_action, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), + + TP_ARGS(clnt, task, action) + +); + +DEFINE_EVENT(rpc_task_running, rpc_task_complete, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), + + TP_ARGS(clnt, task, action) + +); + +DECLARE_EVENT_CLASS(rpc_task_queued, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), + + TP_ARGS(clnt, task, q), + + TP_STRUCT__entry( + __field(const struct rpc_clnt *, clnt) + __field(const struct rpc_task *, task) + __field(const struct rpc_wait_queue *, queue) + __field(unsigned long, timeout) + __field(unsigned long, runstate) + __field(int, status) + __field(unsigned short, flags) + ), + + TP_fast_assign( + __entry->clnt = clnt; + __entry->task = task; + __entry->queue = q; + __entry->timeout = task->tk_timeout; + __entry->runstate = task->tk_runstate; + __entry->status = task->tk_status; + __entry->flags = task->tk_flags; + ), + + TP_printk("task:%p@%p flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s", + __entry->task, + __entry->clnt, + __entry->flags, + __entry->runstate, + __entry->status, + __entry->timeout, + rpc_qname(__entry->queue) + ) +); + +DEFINE_EVENT(rpc_task_queued, rpc_task_sleep, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), + + TP_ARGS(clnt, task, q) + +); + +DEFINE_EVENT(rpc_task_queued, rpc_task_wakeup, + + TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), + + TP_ARGS(clnt, task, q) + +); + +#endif /* _TRACE_SUNRPC_H */ + +#include diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f982dfe5399..d79c63df49b 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -28,6 +28,9 @@ #define RPCDBG_FACILITY RPCDBG_SCHED #endif +#define CREATE_TRACE_POINTS +#include + /* * RPC slabs and memory pools */ @@ -251,6 +254,8 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task) static void rpc_set_active(struct rpc_task *task) { + trace_rpc_task_begin(task->tk_client, task, NULL); + rpc_task_set_debuginfo(task); set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); } @@ -267,6 +272,8 @@ static int rpc_complete_task(struct rpc_task *task) unsigned long flags; int ret; + trace_rpc_task_complete(task->tk_client, task, NULL); + spin_lock_irqsave(&wq->lock, flags); clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); ret = atomic_dec_and_test(&task->tk_count); @@ -324,6 +331,8 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); + trace_rpc_task_sleep(task->tk_client, task, q); + __rpc_add_wait_queue(q, task, queue_priority); BUG_ON(task->tk_callback != NULL); @@ -378,6 +387,8 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task return; } + trace_rpc_task_wakeup(task->tk_client, task, queue); + __rpc_remove_wait_queue(queue, task); rpc_make_runnable(task); @@ -701,6 +712,7 @@ static void __rpc_execute(struct rpc_task *task) if (do_action == NULL) break; } + trace_rpc_task_run_action(task->tk_client, task, task->tk_action); do_action(task); /* -- cgit v1.2.3-70-g09d2 From 2f09c24216cd789653eb8efbf8be88409eb8d581 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Feb 2012 22:01:15 -0500 Subject: SUNRPC: Ensure that we can trace waitqueues when !defined(CONFIG_SYSCTL) The tracepoint code relies on the queue->name being defined in order to be able to display the name of the waitqueue on which an RPC task is sleeping. Reported-by: Randy Dunlap Reported-by: Steven Rostedt Signed-off-by: Trond Myklebust Acked-by: Steven Rostedt Acked-by: Randy Dunlap --- include/linux/sunrpc/debug.h | 3 +++ include/linux/sunrpc/sched.h | 15 +++++++++++++-- net/sunrpc/sched.c | 4 +--- 3 files changed, 17 insertions(+), 5 deletions(-) (limited to 'net/sunrpc/sched.c') diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index c2786f20016..2a11eb278f6 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -34,6 +34,9 @@ #ifdef CONFIG_SYSCTL #define RPC_DEBUG #endif +#ifdef CONFIG_TRACEPOINTS +#define RPC_TRACEPOINTS +#endif /* #define RPC_PROFILE */ /* diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index f7b2df5252b..22dfc24013b 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -195,7 +195,7 @@ struct rpc_wait_queue { unsigned char nr; /* # tasks remaining for cookie */ unsigned short qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; -#ifdef RPC_DEBUG +#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) const char * name; #endif }; @@ -270,11 +270,22 @@ static inline int rpc_task_has_priority(struct rpc_task *task, unsigned char pri return (task->tk_priority + RPC_PRIORITY_LOW == prio); } -#ifdef RPC_DEBUG +#if defined(RPC_DEBUG) || defined (RPC_TRACEPOINTS) static inline const char * rpc_qname(const struct rpc_wait_queue *q) { return ((q && q->name) ? q->name : "unknown"); } + +static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, + const char *name) +{ + q->name = name; +} +#else +static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, + const char *name) +{ +} #endif #endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index d79c63df49b..1c570a81096 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -208,9 +208,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c queue->qlen = 0; setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); INIT_LIST_HEAD(&queue->timer_list.list); -#ifdef RPC_DEBUG - queue->name = qname; -#endif + rpc_assign_waitqueue_name(queue, qname); } void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname) -- cgit v1.2.3-70-g09d2 From 540a0f7584169651f485e8ab67461fcb06934e38 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Mar 2012 13:39:35 -0400 Subject: SUNRPC: We must not use list_for_each_entry_safe() in rpc_wake_up() The problem is that for the case of priority queues, we have to assume that __rpc_remove_wait_queue_priority will move new elements from the tk_wait.links lists into the queue->tasks[] list. We therefore cannot use list_for_each_entry_safe() on queue->tasks[], since that will skip these new tasks that __rpc_remove_wait_queue_priority is adding. Without this fix, rpc_wake_up and rpc_wake_up_status will both fail to wake up all functions on priority wait queues, which can result in some nasty hangs. Reported-by: Andy Adamson Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- net/sunrpc/sched.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'net/sunrpc/sched.c') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 1c570a81096..994cfea2bad 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -534,14 +534,18 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next); */ void rpc_wake_up(struct rpc_wait_queue *queue) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); rpc_wake_up_task_queue_locked(queue, task); + } if (head == &queue->tasks[0]) break; head--; @@ -559,13 +563,16 @@ EXPORT_SYMBOL_GPL(rpc_wake_up); */ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) { + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); task->tk_status = status; rpc_wake_up_task_queue_locked(queue, task); } -- cgit v1.2.3-70-g09d2