From 48a1b10aff588833b73994704c47bbd0deb73e9c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 11 Dec 2008 00:15:01 -0800 Subject: x86, sparseirq: move irq_desc according to smp_affinity, v7 Impact: improve NUMA handling by migrating irq_desc on smp_affinity changes if CONFIG_NUMA_MIGRATE_IRQ_DESC is set: - make irq_desc to go with affinity aka irq_desc moving etc - call move_irq_desc in irq_complete_move() - legacy irq_desc is not moved, because they are allocated via static array for logical apic mode, need to add move_desc_in_progress_in_same_domain, otherwise it will not be moved ==> also could need two phases to get irq_desc moved. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- kernel/irq/Makefile | 1 + kernel/irq/chip.c | 12 ++++- kernel/irq/handle.c | 15 ++++-- kernel/irq/internals.h | 5 ++ kernel/irq/numa_migrate.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 153 insertions(+), 7 deletions(-) create mode 100644 kernel/irq/numa_migrate.c (limited to 'kernel/irq') diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 681c52dbfe2..4dd5b1edac9 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -3,3 +3,4 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o +obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 8e4fce4a1b1..de210f4b7a9 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -353,6 +353,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) spin_lock(&desc->lock); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -430,6 +431,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) desc->status &= ~IRQ_INPROGRESS; out: desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); spin_unlock(&desc->lock); } @@ -466,12 +468,14 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) !desc->action)) { desc->status |= (IRQ_PENDING | IRQ_MASKED); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); goto out_unlock; } kstat_incr_irqs_this_cpu(irq, desc); /* Start handling the irq */ desc->chip->ack(irq); + desc = irq_remap_to_desc(irq, desc); /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; @@ -532,8 +536,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) if (!noirqdebug) note_interrupt(irq, desc, action_ret); - if (desc->chip->eoi) + if (desc->chip->eoi) { desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); + } } void @@ -568,8 +574,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, /* Uninstall? */ if (handle == handle_bad_irq) { - if (desc->chip != &no_irq_chip) + if (desc->chip != &no_irq_chip) { mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); + } desc->status |= IRQ_DISABLED; desc->depth = 1; } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 8aa09547f5e..f1a23069c20 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -23,7 +23,7 @@ /* * lockdep: we want to handle all irq_desc locks as a single lock-class: */ -static struct lock_class_key irq_desc_lock_class; +struct lock_class_key irq_desc_lock_class; /** * handle_bad_irq - handle spurious and unhandled irqs @@ -73,7 +73,7 @@ static struct irq_desc irq_desc_init = { #endif }; -static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) +void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) { unsigned long bytes; char *ptr; @@ -113,7 +113,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) /* * Protect the sparse_irqs: */ -static DEFINE_SPINLOCK(sparse_irq_lock); +DEFINE_SPINLOCK(sparse_irq_lock); struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly; @@ -337,8 +337,11 @@ unsigned int __do_IRQ(unsigned int irq) /* * No locking required for CPU-local interrupts: */ - if (desc->chip->ack) + if (desc->chip->ack) { desc->chip->ack(irq); + /* get new one */ + desc = irq_remap_to_desc(irq, desc); + } if (likely(!(desc->status & IRQ_DISABLED))) { action_ret = handle_IRQ_event(irq, desc->action); if (!noirqdebug) @@ -349,8 +352,10 @@ unsigned int __do_IRQ(unsigned int irq) } spin_lock(&desc->lock); - if (desc->chip->ack) + if (desc->chip->ack) { desc->chip->ack(irq); + desc = irq_remap_to_desc(irq, desc); + } /* * REPLAY is when Linux resends an IRQ that was dropped earlier * WAITING is used by probe to mark irqs that are being tested diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 64c1c7253da..e6d0a43cc12 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -13,6 +13,11 @@ extern void compat_irq_chip_set_default_handler(struct irq_desc *desc); extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unsigned long flags); +extern struct lock_class_key irq_desc_lock_class; +extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); +extern spinlock_t sparse_irq_lock; +extern struct irq_desc *irq_desc_ptrs[NR_IRQS]; + #ifdef CONFIG_PROC_FS extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); extern void register_handler_proc(unsigned int irq, struct irqaction *action); diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c new file mode 100644 index 00000000000..0178e229699 --- /dev/null +++ b/kernel/irq/numa_migrate.c @@ -0,0 +1,127 @@ +/* + * linux/kernel/irq/handle.c + * + * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar + * Copyright (C) 2005-2006, Thomas Gleixner, Russell King + * + * This file contains the core interrupt handling code. + * + * Detailed information is available in Documentation/DocBook/genericirq + * + */ + +#include +#include +#include +#include +#include + +#include "internals.h" + +static void init_copy_kstat_irqs(struct irq_desc *old_desc, + struct irq_desc *desc, + int cpu, int nr) +{ + unsigned long bytes; + + init_kstat_irqs(desc, cpu, nr); + + if (desc->kstat_irqs != old_desc->kstat_irqs) { + /* Compute how many bytes we need per irq and allocate them */ + bytes = nr * sizeof(unsigned int); + + memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes); + } +} + +static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) +{ + if (old_desc->kstat_irqs == desc->kstat_irqs) + return; + + kfree(old_desc->kstat_irqs); + old_desc->kstat_irqs = NULL; +} + +static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, + struct irq_desc *desc, int cpu) +{ + memcpy(desc, old_desc, sizeof(struct irq_desc)); + desc->cpu = cpu; + lockdep_set_class(&desc->lock, &irq_desc_lock_class); + init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); + arch_init_copy_chip_data(old_desc, desc, cpu); +} + +static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) +{ + free_kstat_irqs(old_desc, desc); + arch_free_chip_data(old_desc, desc); +} + +static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, + int cpu) +{ + struct irq_desc *desc; + unsigned int irq; + unsigned long flags; + int node; + + irq = old_desc->irq; + + spin_lock_irqsave(&sparse_irq_lock, flags); + + /* We have to check it to avoid races with another CPU */ + desc = irq_desc_ptrs[irq]; + + if (desc && old_desc != desc) + goto out_unlock; + + node = cpu_to_node(cpu); + desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); + printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n", + irq, cpu, node); + if (!desc) { + printk(KERN_ERR "can not get new irq_desc for moving\n"); + /* still use old one */ + desc = old_desc; + goto out_unlock; + } + init_copy_one_irq_desc(irq, old_desc, desc, cpu); + + irq_desc_ptrs[irq] = desc; + + /* free the old one */ + free_one_irq_desc(old_desc, desc); + kfree(old_desc); + +out_unlock: + spin_unlock_irqrestore(&sparse_irq_lock, flags); + + return desc; +} + +struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) +{ + int old_cpu; + int node, old_node; + + /* those all static, do move them */ + if (desc->irq < NR_IRQS_LEGACY) + return desc; + + old_cpu = desc->cpu; + printk(KERN_DEBUG + "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu); + if (old_cpu != cpu) { + node = cpu_to_node(cpu); + old_node = cpu_to_node(old_cpu); + if (old_node != node) + desc = __real_move_irq_desc(desc, cpu); + else + desc->cpu = cpu; + } + + return desc; +} + -- cgit v1.2.3-70-g09d2