From 73c101011926c5832e6e141682180c4debe2cf45 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Mar 2011 13:19:51 +0100 Subject: block: initial patch for on-stack per-task plugging This patch adds support for creating a queuing context outside of the queue itself. This enables us to batch up pieces of IO before grabbing the block device queue lock and submitting them to the IO scheduler. The context is created on the stack of the process and assigned in the task structure, so that we can auto-unplug it if we hit a schedule event. The current queue plugging happens implicitly if IO is submitted to an empty device, yet callers have to remember to unplug that IO when they are going to wait for it. This is an ugly API and has caused bugs in the past. Additionally, it requires hacks in the vm (->sync_page() callback) to handle that logic. By switching to an explicit plugging scheme we make the API a lot nicer and can get rid of the ->sync_page() hack in the vm. Signed-off-by: Jens Axboe --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 777d8a5ed06..96ac2264374 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -99,6 +99,7 @@ struct robust_list_head; struct bio_list; struct fs_struct; struct perf_event_context; +struct blk_plug; /* * List of flags we want to share for kernel threads, @@ -1429,6 +1430,11 @@ struct task_struct { /* stacked block device info */ struct bio_list *bio_list; +#ifdef CONFIG_BLOCK +/* stack plugging */ + struct blk_plug *plug; +#endif + /* VM state */ struct reclaim_state *reclaim_state; -- cgit v1.2.3-70-g09d2 From 207205a2ba2655652fe46a60b49838af6c16a919 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Mar 2011 16:30:44 -0700 Subject: kthread: NUMA aware kthread_create_on_node() All kthreads being created from a single helper task, they all use memory from a single node for their kernel stack and task struct. This patch suite creates kthread_create_on_node(), adding a 'cpu' parameter to parameters already used by kthread_create(). This parameter serves in allocating memory for the new kthread on its memory node if possible. Signed-off-by: Eric Dumazet Acked-by: David S. Miller Reviewed-by: Andi Kleen Acked-by: Rusty Russell Cc: Tejun Heo Cc: Tony Luck Cc: Fenghua Yu Cc: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 14 ++++++++++---- include/linux/sched.h | 1 + kernel/fork.c | 3 ++- kernel/kthread.c | 31 +++++++++++++++++++++++++------ 4 files changed, 38 insertions(+), 11 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 7ff16f7d3ed..1e923e5e88e 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -4,10 +4,15 @@ #include #include -struct task_struct *kthread_create(int (*threadfn)(void *data), - void *data, - const char namefmt[], ...) - __attribute__((format(printf, 3, 4))); +struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), + void *data, + int node, + const char namefmt[], ...) + __attribute__((format(printf, 4, 5))); + +#define kthread_create(threadfn, data, namefmt, arg...) \ + kthread_create_on_node(threadfn, data, -1, namefmt, ##arg) + /** * kthread_run - create and wake a thread. @@ -34,6 +39,7 @@ void *kthread_data(struct task_struct *k); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; +extern int tsk_fork_get_node(struct task_struct *tsk); /* * Simple work processor based on kthread. diff --git a/include/linux/sched.h b/include/linux/sched.h index c15936fe998..4b601be3dac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1471,6 +1471,7 @@ struct task_struct { #ifdef CONFIG_NUMA struct mempolicy *mempolicy; /* Protected by alloc_lock */ short il_next; + short pref_node_fork; #endif atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; diff --git a/kernel/fork.c b/kernel/fork.c index cbc6adc6e89..a8f64f8ec7e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -254,7 +255,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) struct task_struct *tsk; struct thread_info *ti; unsigned long *stackend; - int node = numa_node_id(); + int node = tsk_fork_get_node(orig); int err; prepare_to_copy(orig); diff --git a/kernel/kthread.c b/kernel/kthread.c index c55afba990a..684ab3f7dd7 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -27,6 +27,7 @@ struct kthread_create_info /* Information passed to kthread() from kthreadd. */ int (*threadfn)(void *data); void *data; + int node; /* Result passed back to kthread_create() from kthreadd. */ struct task_struct *result; @@ -98,10 +99,23 @@ static int kthread(void *_create) do_exit(ret); } +/* called from do_fork() to get node information for about to be created task */ +int tsk_fork_get_node(struct task_struct *tsk) +{ +#ifdef CONFIG_NUMA + if (tsk == kthreadd_task) + return tsk->pref_node_fork; +#endif + return numa_node_id(); +} + static void create_kthread(struct kthread_create_info *create) { int pid; +#ifdef CONFIG_NUMA + current->pref_node_fork = create->node; +#endif /* We want our own signal handler (we take no signals by default). */ pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); if (pid < 0) { @@ -111,15 +125,18 @@ static void create_kthread(struct kthread_create_info *create) } /** - * kthread_create - create a kthread. + * kthread_create_on_node - create a kthread. * @threadfn: the function to run until signal_pending(current). * @data: data ptr for @threadfn. + * @node: memory node number. * @namefmt: printf-style name for the thread. * * Description: This helper function creates and names a kernel * thread. The thread will be stopped: use wake_up_process() to start * it. See also kthread_run(). * + * If thread is going to be bound on a particular cpu, give its node + * in @node, to get NUMA affinity for kthread stack, or else give -1. * When woken, the thread will run @threadfn() with @data as its * argument. @threadfn() can either call do_exit() directly if it is a * standalone thread for which noone will call kthread_stop(), or @@ -129,15 +146,17 @@ static void create_kthread(struct kthread_create_info *create) * * Returns a task_struct or ERR_PTR(-ENOMEM). */ -struct task_struct *kthread_create(int (*threadfn)(void *data), - void *data, - const char namefmt[], - ...) +struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), + void *data, + int node, + const char namefmt[], + ...) { struct kthread_create_info create; create.threadfn = threadfn; create.data = data; + create.node = node; init_completion(&create.done); spin_lock(&kthread_create_lock); @@ -164,7 +183,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), } return create.result; } -EXPORT_SYMBOL(kthread_create); +EXPORT_SYMBOL(kthread_create_on_node); /** * kthread_bind - bind a just-created kthread to a cpu. -- cgit v1.2.3-70-g09d2 From e815f0a84fc9a98e5cc3ef0b520122e5e18520e7 Mon Sep 17 00:00:00 2001 From: Jonathan Neuschäfer Date: Mon, 21 Mar 2011 20:24:47 +0100 Subject: sched.h: Fix a typo ("its") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sentence uses the possessive pronoun, which is spelled without an apostrophe. Signed-off-by: Jonathan Neuschäfer Cc: Jiri Kosina Cc: Peter Zijlstra LKML-Reference: <1300735487-2406-1-git-send-email-j.neuschaefer@gmx.net> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index c15936fe998..e89f1292c30 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -516,7 +516,7 @@ struct thread_group_cputimer { struct autogroup; /* - * NOTE! "signal_struct" does not have it's own + * NOTE! "signal_struct" does not have its own * locking, because a shared signal_struct always * implies a shared sighand_struct, so locking * sighand_struct is always a proper superset of -- cgit v1.2.3-70-g09d2 From 7ffd4ca7a2cdd7a18f0b499a4e9e0e7cf36ba018 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Mar 2011 16:42:35 -0700 Subject: memcg: convert uncharge batching from bytes to page granularity We never uncharge subpage quantities. Signed-off-by: Johannes Weiner Acked-by: KAMEZAWA Hiroyuki Cc: Daisuke Nishimura Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ++-- mm/memcontrol.c | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4b601be3dac..98fc7ed4b19 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1524,8 +1524,8 @@ struct task_struct { struct memcg_batch_info { int do_batch; /* incremented when batch uncharge started */ struct mem_cgroup *memcg; /* target memcg of uncharge */ - unsigned long bytes; /* uncharged usage */ - unsigned long memsw_bytes; /* uncharged mem+swap usage */ + unsigned long nr_pages; /* uncharged usage */ + unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ } memcg_batch; #endif }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 91120a04f93..9dfbed2aacc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2579,9 +2579,9 @@ __do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype, if (batch->memcg != mem) goto direct_uncharge; /* remember freed charge and uncharge it later */ - batch->bytes += PAGE_SIZE; + batch->nr_pages++; if (uncharge_memsw) - batch->memsw_bytes += PAGE_SIZE; + batch->memsw_nr_pages++; return; direct_uncharge: res_counter_uncharge(&mem->res, page_size); @@ -2708,8 +2708,8 @@ void mem_cgroup_uncharge_start(void) /* We can do nest. */ if (current->memcg_batch.do_batch == 1) { current->memcg_batch.memcg = NULL; - current->memcg_batch.bytes = 0; - current->memcg_batch.memsw_bytes = 0; + current->memcg_batch.nr_pages = 0; + current->memcg_batch.memsw_nr_pages = 0; } } @@ -2730,10 +2730,12 @@ void mem_cgroup_uncharge_end(void) * This "batch->memcg" is valid without any css_get/put etc... * bacause we hide charges behind us. */ - if (batch->bytes) - res_counter_uncharge(&batch->memcg->res, batch->bytes); - if (batch->memsw_bytes) - res_counter_uncharge(&batch->memcg->memsw, batch->memsw_bytes); + if (batch->nr_pages) + res_counter_uncharge(&batch->memcg->res, + batch->nr_pages * PAGE_SIZE); + if (batch->memsw_nr_pages) + res_counter_uncharge(&batch->memcg->memsw, + batch->memsw_nr_pages * PAGE_SIZE); memcg_oom_recover(batch->memcg); /* forget this pointer (for sanity check) */ batch->memcg = NULL; -- cgit v1.2.3-70-g09d2