From 0b8f1efad30bd58f89961b82dfe68b9edf8fd2ac Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Dec 2008 18:58:31 -0800 Subject: sparse irq_desc[] array: core kernel and x86 changes Impact: new feature Problem on distro kernels: irq_desc[NR_IRQS] takes megabytes of RAM with NR_CPUS set to large values. The goal is to be able to scale up to much larger NR_IRQS value without impacting the (important) common case. To solve this, we generalize irq_desc[NR_IRQS] to an (optional) array of irq_desc pointers. When CONFIG_SPARSE_IRQ=y is used, we use kzalloc_node to get irq_desc, this also makes the IRQ descriptors NUMA-local (to the site that calls request_irq()). This gets rid of the irq_cfg[] static array on x86 as well: irq_cfg now uses desc->chip_data for x86 to store irq_cfg. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- fs/proc/stat.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 81904f07679..a13431ab7c6 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -27,6 +27,7 @@ static int show_stat(struct seq_file *p, void *v) u64 sum = 0; struct timespec boottime; unsigned int per_irq_sum; + struct irq_desc *desc; user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; @@ -44,10 +45,11 @@ static int show_stat(struct seq_file *p, void *v) softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); - - for_each_irq_nr(j) + for_each_irq_desc(j, desc) { + if (!desc) + continue; sum += kstat_irqs_cpu(j, i); - + } sum += arch_irq_stat_cpu(i); } sum += arch_irq_stat(); @@ -90,11 +92,14 @@ static int show_stat(struct seq_file *p, void *v) seq_printf(p, "intr %llu", (unsigned long long)sum); /* sum again ? it could be updated? */ - for_each_irq_nr(j) { + for (j = 0; j < NR_IRQS; j++) { + desc = irq_to_desc(j); per_irq_sum = 0; - for_each_possible_cpu(i) - per_irq_sum += kstat_irqs_cpu(j, i); + if (desc) { + for_each_possible_cpu(i) + per_irq_sum += kstat_irqs_cpu(j, i); + } seq_printf(p, " %u", per_irq_sum); } -- cgit v1.2.3-70-g09d2 From 240d367b4e6c6e3c5075e034db14dba60a6f5fa7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 8 Dec 2008 14:06:17 -0800 Subject: sparseirq: fix Alpha build failure Impact: build fix on Alpha -tip testing found this build failure on the Alpha defconfig: /home/mingo/tip/fs/proc/stat.c: In function 'show_stat': /home/mingo/tip/fs/proc/stat.c:48: error: implicit declaration of function 'for_each_irq_desc' /home/mingo/tip/fs/proc/stat.c:48: error: expected ';' before '{' token can not use irq_desc() in stat.c on older architectures. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- fs/proc/stat.c | 20 +++++++++++++------- include/linux/irq.h | 9 --------- include/linux/irqnr.h | 19 ++++++++++++++++--- 3 files changed, 29 insertions(+), 19 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index a13431ab7c6..3cb9492801c 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -45,9 +45,12 @@ static int show_stat(struct seq_file *p, void *v) softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); - for_each_irq_desc(j, desc) { + for_each_irq_nr(j) { +#ifdef CONFIG_SPARSE_IRQ + desc = irq_to_desc(j); if (!desc) continue; +#endif sum += kstat_irqs_cpu(j, i); } sum += arch_irq_stat_cpu(i); @@ -92,14 +95,17 @@ static int show_stat(struct seq_file *p, void *v) seq_printf(p, "intr %llu", (unsigned long long)sum); /* sum again ? it could be updated? */ - for (j = 0; j < NR_IRQS; j++) { - desc = irq_to_desc(j); + for_each_irq_nr(j) { per_irq_sum = 0; - - if (desc) { - for_each_possible_cpu(i) - per_irq_sum += kstat_irqs_cpu(j, i); +#ifdef CONFIG_SPARSE_IRQ + desc = irq_to_desc(j); + if (!desc) { + seq_printf(p, " %u", per_irq_sum); + continue; } +#endif + for_each_possible_cpu(i) + per_irq_sum += kstat_irqs_cpu(j, i); seq_printf(p, " %u", per_irq_sum); } diff --git a/include/linux/irq.h b/include/linux/irq.h index 63b00439d4d..b5749db3e5a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -198,7 +198,6 @@ extern void arch_init_copy_chip_data(struct irq_desc *old_desc, extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); #ifndef CONFIG_SPARSE_IRQ - extern struct irq_desc irq_desc[NR_IRQS]; static inline struct irq_desc *irq_to_desc(unsigned int irq) @@ -210,14 +209,6 @@ static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) return irq_to_desc(irq); } -#ifdef CONFIG_GENERIC_HARDIRQS -# define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \ - irq >= 0; irq--, desc--) -#endif - #else extern struct irq_desc *irq_to_desc(unsigned int irq); diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 7a299e989f8..13754f81358 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -8,9 +8,22 @@ # define for_each_irq_desc(irq, desc) \ for (irq = 0; irq < nr_irqs; irq++) -static inline early_sparse_irq_init(void) -{ -} +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs - 1; irq >= 0; irq--) +#else +#ifndef CONFIG_SPARSE_IRQ + +struct irq_desc; +extern int nr_irqs; +# define for_each_irq_desc(irq, desc) \ + for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \ + irq >= 0; irq--, desc--) #endif +#endif + +#define for_each_irq_nr(irq) \ + for (irq = 0; irq < nr_irqs; irq++) #endif -- cgit v1.2.3-70-g09d2 From 49c50342c728344b79c8f9e8293637fe80ef5ad5 Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Tue, 9 Dec 2008 13:14:21 -0800 Subject: pagemap: fix 32-bit pagemap regression The large pages fix from bcf8039ed45 broke 32-bit pagemap by pulling the pagemap entry code out into a function with the wrong return type. Pagemap entries are 64 bits on all systems and unsigned long is only 32 bits on 32-bit systems. Signed-off-by: Matt Mackall Reported-by: Doug Graham Cc: Alexey Dobriyan Cc: Dave Hansen Cc: [2.6.26.x, 2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index b770c095e45..3a8bdd7f575 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -557,9 +557,9 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); } -static unsigned long pte_to_pagemap_entry(pte_t pte) +static u64 pte_to_pagemap_entry(pte_t pte) { - unsigned long pme = 0; + u64 pme = 0; if (is_swap_pte(pte)) pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; -- cgit v1.2.3-70-g09d2 From 9c24624727f6d6c460e45762a408ca5f5b9b8ef2 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 9 Dec 2008 13:14:27 -0800 Subject: KSYM_SYMBOL_LEN fixes Miles Lane tailing /sys files hit a BUG which Pekka Enberg has tracked to my 966c8c12dc9e77f931e2281ba25d2f0244b06949 sprint_symbol(): use less stack exposing a bug in slub's list_locations() - kallsyms_lookup() writes a 0 to namebuf[KSYM_NAME_LEN-1], but that was beyond the end of page provided. The 100 slop which list_locations() allows at end of page looks roughly enough for all the other stuff it might print after the symbol before it checks again: break out KSYM_SYMBOL_LEN earlier than before. Latencytop and ftrace and are using KSYM_NAME_LEN buffers where they need KSYM_SYMBOL_LEN buffers, and vmallocinfo a 2*KSYM_NAME_LEN buffer where it wants a KSYM_SYMBOL_LEN buffer: fix those before anyone copies them. [akpm@linux-foundation.org: ftrace.h needs module.h] Signed-off-by: Hugh Dickins Cc: Christoph Lameter Cc Miles Lane Acked-by: Pekka Enberg Acked-by: Steven Rostedt Acked-by: Frederic Weisbecker Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 2 +- include/linux/ftrace.h | 3 ++- kernel/latencytop.c | 2 +- mm/slub.c | 2 +- mm/vmalloc.c | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/base.c b/fs/proc/base.c index 486cf3fe713..d4677603c88 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -371,7 +371,7 @@ static int lstats_show_proc(struct seq_file *m, void *v) task->latency_record[i].time, task->latency_record[i].max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { - char sym[KSYM_NAME_LEN]; + char sym[KSYM_SYMBOL_LEN]; char *c; if (!task->latency_record[i].backtrace[q]) break; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 703eb53cfa2..9c5bc6be2b0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #ifdef CONFIG_FUNCTION_TRACER @@ -231,7 +232,7 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } struct boot_trace { pid_t caller; - char func[KSYM_NAME_LEN]; + char func[KSYM_SYMBOL_LEN]; int result; unsigned long long duration; /* usecs */ ktime_t calltime; diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 5e7b45c5692..449db466bdb 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -191,7 +191,7 @@ static int lstats_show(struct seq_file *m, void *v) latency_record[i].time, latency_record[i].max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { - char sym[KSYM_NAME_LEN]; + char sym[KSYM_SYMBOL_LEN]; char *c; if (!latency_record[i].backtrace[q]) break; diff --git a/mm/slub.c b/mm/slub.c index 749588a50a5..a2cd47d89e0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3597,7 +3597,7 @@ static int list_locations(struct kmem_cache *s, char *buf, for (i = 0; i < t.count; i++) { struct location *l = &t.loc[i]; - if (len > PAGE_SIZE - 100) + if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100) break; len += sprintf(buf + len, "%7ld ", l->count); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f3f6e075856..1ddb77ba399 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1717,7 +1717,7 @@ static int s_show(struct seq_file *m, void *p) v->addr, v->addr + v->size, v->size); if (v->caller) { - char buff[2 * KSYM_NAME_LEN]; + char buff[KSYM_SYMBOL_LEN]; seq_putc(m, ' '); sprint_symbol(buff, (unsigned long)v->caller); -- cgit v1.2.3-70-g09d2 From 13bd41bc227a48d6cf8992a3286bf6eba3c71a0c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 16 Dec 2008 00:23:34 -0800 Subject: proc: enclose desc variable of show_stat() in CONFIG_SPARSE_IRQ Impact: restructure code to fix compiler warning commit 240d367b4e6c6e3c5075e034db14dba60a6f5fa7 moved desc usage point into #ifdef CONFIG_SPARSE_IRQ. Eliminate the desc variable, otherwise following warning happens: fs/proc/stat.c: In function 'show_stat': fs/proc/stat.c:31: warning: unused variable 'desc' [ akpm: cleaned up the patch to remove #ifdef ] Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- fs/proc/stat.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 3cb9492801c..3bb1cf1e742 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -27,7 +27,6 @@ static int show_stat(struct seq_file *p, void *v) u64 sum = 0; struct timespec boottime; unsigned int per_irq_sum; - struct irq_desc *desc; user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; @@ -47,8 +46,7 @@ static int show_stat(struct seq_file *p, void *v) guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); for_each_irq_nr(j) { #ifdef CONFIG_SPARSE_IRQ - desc = irq_to_desc(j); - if (!desc) + if (!irq_to_desc(j)) continue; #endif sum += kstat_irqs_cpu(j, i); @@ -98,8 +96,7 @@ static int show_stat(struct seq_file *p, void *v) for_each_irq_nr(j) { per_irq_sum = 0; #ifdef CONFIG_SPARSE_IRQ - desc = irq_to_desc(j); - if (!desc) { + if (!irq_to_desc(j)) { seq_printf(p, " %u", per_irq_sum); continue; } -- cgit v1.2.3-70-g09d2