summaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel
diff options
context:
space:
mode:
authorAndi Kleen <andi@basil.nowhere.org>2006-11-21 10:22:09 +0100
committerAndi Kleen <andi@basil.nowhere.org>2006-11-21 10:22:09 +0100
commit1b7f6a626f0ff511c3840678466cbfe1d62c0b29 (patch)
tree415e8c838c0067bff384afb8a2c91e5f7c6d11d3 /arch/x86_64/kernel
parentb3edc9cec07ade41aaf1804f7c9e876afa90c862 (diff)
parent3f5a6ca31c334011fd929501a078424c0d3f71be (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r--arch/x86_64/kernel/e820.c4
-rw-r--r--arch/x86_64/kernel/early-quirks.c8
-rw-r--r--arch/x86_64/kernel/io_apic.c169
-rw-r--r--arch/x86_64/kernel/process.c7
-rw-r--r--arch/x86_64/kernel/smp.c3
-rw-r--r--arch/x86_64/kernel/smpboot.c7
-rw-r--r--arch/x86_64/kernel/time.c11
-rw-r--r--arch/x86_64/kernel/traps.c6
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S8
-rw-r--r--arch/x86_64/kernel/vsyscall.c47
10 files changed, 187 insertions, 83 deletions
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index a75c829c2b0..6fe191c5808 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -278,7 +278,7 @@ e820_register_active_regions(int nid, unsigned long start_pfn,
>> PAGE_SHIFT;
/* Skip map entries smaller than a page */
- if (ei_startpfn > ei_endpfn)
+ if (ei_startpfn >= ei_endpfn)
continue;
/* Check if end_pfn_map should be updated */
@@ -594,7 +594,9 @@ static int __init parse_memmap_opt(char *p)
* size before original memory map is
* reset.
*/
+ e820_register_active_regions(0, 0, -1UL);
saved_max_pfn = e820_end_of_ram();
+ remove_all_active_ranges();
#endif
end_pfn_map = 0;
e820.nr_map = 0;
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index 2b1245d8625..68273bff58c 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -45,7 +45,13 @@ static void nvidia_bugs(void)
/*
* All timer overrides on Nvidia are
* wrong unless HPET is enabled.
+ * Unfortunately that's not true on many Asus boards.
+ * We don't know yet how to detect this automatically, but
+ * at least allow a command line override.
*/
+ if (acpi_use_timer_override)
+ return;
+
nvidia_hpet_detected = 0;
acpi_table_parse(ACPI_HPET, nvidia_hpet_check);
if (nvidia_hpet_detected == 0) {
@@ -53,6 +59,8 @@ static void nvidia_bugs(void)
printk(KERN_INFO "Nvidia board "
"detected. Ignoring ACPI "
"timer override.\n");
+ printk(KERN_INFO "If you got timer trouble "
+ "try acpi_use_timer_override\n");
}
#endif
/* RED-PEN skip them on mptables too? */
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index b000017e4b5..14654e68241 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -63,7 +63,7 @@ int timer_over_8254 __initdata = 1;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+DEFINE_SPINLOCK(vector_lock);
/*
* # of IRQ routing registers
@@ -88,6 +88,52 @@ static struct irq_pin_list {
short apic, pin, next;
} irq_2_pin[PIN_MAP_SIZE];
+struct io_apic {
+ unsigned int index;
+ unsigned int unused[3];
+ unsigned int data;
+};
+
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+{
+ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+}
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
+ writel(reg, &io_apic->index);
+ return readl(&io_apic->data);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
+ writel(reg, &io_apic->index);
+ writel(value, &io_apic->data);
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int value)
+{
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
+ writel(value, &io_apic->data);
+}
+
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
+ readl(&io_apic->data);
+}
+
#define __DO_ACTION(R, ACTION, FINAL) \
\
{ \
@@ -126,12 +172,34 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
return eu.entry;
}
+/*
+ * When we write a new IO APIC routing entry, we need to write the high
+ * word first! If the mask bit in the low word is clear, we will enable
+ * the interrupt, and we need to make sure the entry is fully populated
+ * before that happens.
+ */
static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
unsigned long flags;
union entry_union eu;
eu.entry = e;
spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * When we mask an IO APIC routing entry, we need to write the low
+ * word first, in order to set the mask bit before we change the
+ * high bits!
+ */
+static void ioapic_mask_entry(int apic, int pin)
+{
+ unsigned long flags;
+ union entry_union eu = { .entry.mask = 1 };
+
+ spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
io_apic_write(apic, 0x11 + 2*pin, eu.w2);
spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -256,9 +324,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
/*
* Disable it in the IO-APIC irq-routing table:
*/
- memset(&entry, 0, sizeof(entry));
- entry.mask = 1;
- ioapic_write_entry(apic, pin, entry);
+ ioapic_mask_entry(apic, pin);
}
static void clear_IO_APIC (void)
@@ -612,15 +678,15 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
* Also, we've got to be careful not to trash gate
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
- static struct {
- int vector;
- int offset;
- } pos[NR_CPUS] = { [ 0 ... NR_CPUS - 1] = {FIRST_DEVICE_VECTOR, 0} };
+ static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
int old_vector = -1;
int cpu;
BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
+ /* Only try and allocate irqs on cpus that are present */
+ cpus_and(mask, mask, cpu_online_map);
+
if (irq_vector[irq] > 0)
old_vector = irq_vector[irq];
if (old_vector > 0) {
@@ -630,15 +696,15 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
}
for_each_cpu_mask(cpu, mask) {
- cpumask_t domain;
- int first, new_cpu;
+ cpumask_t domain, new_mask;
+ int new_cpu;
int vector, offset;
domain = vector_allocation_domain(cpu);
- first = first_cpu(domain);
+ cpus_and(new_mask, domain, cpu_online_map);
- vector = pos[first].vector;
- offset = pos[first].offset;
+ vector = current_vector;
+ offset = current_offset;
next:
vector += 8;
if (vector >= FIRST_SYSTEM_VECTOR) {
@@ -646,24 +712,24 @@ next:
offset = (offset + 1) % 8;
vector = FIRST_DEVICE_VECTOR + offset;
}
- if (unlikely(pos[first].vector == vector))
+ if (unlikely(current_vector == vector))
continue;
if (vector == IA32_SYSCALL_VECTOR)
goto next;
- for_each_cpu_mask(new_cpu, domain)
+ for_each_cpu_mask(new_cpu, new_mask)
if (per_cpu(vector_irq, new_cpu)[vector] != -1)
goto next;
/* Found one! */
- for_each_cpu_mask(new_cpu, domain) {
- pos[new_cpu].vector = vector;
- pos[new_cpu].offset = offset;
- }
+ current_vector = vector;
+ current_offset = offset;
if (old_vector >= 0) {
+ cpumask_t old_mask;
int old_cpu;
- for_each_cpu_mask(old_cpu, irq_domain[irq])
+ cpus_and(old_mask, irq_domain[irq], cpu_online_map);
+ for_each_cpu_mask(old_cpu, old_mask)
per_cpu(vector_irq, old_cpu)[old_vector] = -1;
}
- for_each_cpu_mask(new_cpu, domain)
+ for_each_cpu_mask(new_cpu, new_mask)
per_cpu(vector_irq, new_cpu)[vector] = irq;
irq_vector[irq] = vector;
irq_domain[irq] = domain;
@@ -684,6 +750,32 @@ static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
return vector;
}
+void __setup_vector_irq(int cpu)
+{
+ /* Initialize vector_irq on a new cpu */
+ /* This function must be called with vector_lock held */
+ unsigned long flags;
+ int irq, vector;
+
+
+ /* Mark the inuse vectors */
+ for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
+ if (!cpu_isset(cpu, irq_domain[irq]))
+ continue;
+ vector = irq_vector[irq];
+ per_cpu(vector_irq, cpu)[vector] = irq;
+ }
+ /* Mark the free vectors */
+ for (vector = 0; vector < NR_VECTORS; ++vector) {
+ irq = per_cpu(vector_irq, cpu)[vector];
+ if (irq < 0)
+ continue;
+ if (!cpu_isset(cpu, irq_domain[irq]))
+ per_cpu(vector_irq, cpu)[vector] = -1;
+ }
+}
+
+
extern void (*interrupt[NR_IRQS])(void);
static struct irq_chip ioapic_chip;
@@ -698,9 +790,11 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
trigger == IOAPIC_LEVEL)
set_irq_chip_and_handler_name(irq, &ioapic_chip,
handle_fasteoi_irq, "fasteoi");
- else
+ else {
+ irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
set_irq_chip_and_handler_name(irq, &ioapic_chip,
handle_edge_irq, "edge");
+ }
}
static void __init setup_IO_APIC_irqs(void)
@@ -1863,18 +1957,16 @@ void arch_teardown_msi_irq(unsigned int irq)
static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
{
- u32 low, high;
- low = read_ht_irq_low(irq);
- high = read_ht_irq_high(irq);
+ struct ht_irq_msg msg;
+ fetch_ht_irq_msg(irq, &msg);
- low &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
- high &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+ msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
- low |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
- high |= HT_IRQ_HIGH_DEST_ID(dest);
+ msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
- write_ht_irq_low(irq, low);
- write_ht_irq_high(irq, high);
+ write_ht_irq_msg(irq, &msg);
}
static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
@@ -1895,7 +1987,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
dest = cpu_mask_to_apicid(tmp);
- target_ht_irq(irq, dest, vector & 0xff);
+ target_ht_irq(irq, dest, vector);
set_native_irq_info(irq, mask);
}
#endif
@@ -1918,14 +2010,15 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
if (vector >= 0) {
- u32 low, high;
+ struct ht_irq_msg msg;
unsigned dest;
dest = cpu_mask_to_apicid(tmp);
- high = HT_IRQ_HIGH_DEST_ID(dest);
+ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
- low = HT_IRQ_LOW_BASE |
+ msg.address_lo =
+ HT_IRQ_LOW_BASE |
HT_IRQ_LOW_DEST_ID(dest) |
HT_IRQ_LOW_VECTOR(vector) |
((INT_DEST_MODE == 0) ?
@@ -1934,10 +2027,10 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
HT_IRQ_LOW_RQEOI_EDGE |
((INT_DELIVERY_MODE != dest_LowestPrio) ?
HT_IRQ_LOW_MT_FIXED :
- HT_IRQ_LOW_MT_ARBITRATED);
+ HT_IRQ_LOW_MT_ARBITRATED) |
+ HT_IRQ_LOW_IRQ_MASKED;
- write_ht_irq_low(irq, low);
- write_ht_irq_high(irq, high);
+ write_ht_irq_msg(irq, &msg);
set_irq_chip_and_handler_name(irq, &ht_irq_chip,
handle_edge_irq, "edge");
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 49f7fac6229..7451a4c43c1 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -88,9 +88,8 @@ void enter_idle(void)
static void __exit_idle(void)
{
- if (read_pda(isidle) == 0)
+ if (test_and_clear_bit_pda(0, isidle) == 0)
return;
- write_pda(isidle, 0);
atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
}
@@ -145,7 +144,7 @@ static void poll_idle (void)
void cpu_idle_wait(void)
{
unsigned int cpu, this_cpu = get_cpu();
- cpumask_t map;
+ cpumask_t map, tmp = current->cpus_allowed;
set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
put_cpu();
@@ -168,6 +167,8 @@ void cpu_idle_wait(void)
}
cpus_and(map, map, cpu_online_map);
} while (!cpus_empty(map));
+
+ set_cpus_allowed(current, tmp);
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 4f67697f503..9f74c883568 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -376,9 +376,8 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
/* prevent preemption and reschedule on another processor */
int me = get_cpu();
if (cpu == me) {
- WARN_ON(1);
put_cpu();
- return -EBUSY;
+ return 0;
}
spin_lock_bh(&call_lock);
__smp_call_function_single(cpu, func, info, nonatomic, wait);
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 7b7a6870288..62c2e747af5 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -581,12 +581,16 @@ void __cpuinit start_secondary(void)
* smp_call_function().
*/
lock_ipi_call_lock();
+ spin_lock(&vector_lock);
+ /* Setup the per cpu irq handling data structures */
+ __setup_vector_irq(smp_processor_id());
/*
* Allow the master to continue.
*/
cpu_set(smp_processor_id(), cpu_online_map);
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+ spin_unlock(&vector_lock);
unlock_ipi_call_lock();
cpu_idle();
@@ -799,7 +803,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
cpu, node);
}
-
alternatives_smp_switch(1);
c_idle.idle = get_idle_for_cpu(cpu);
@@ -1246,8 +1249,10 @@ int __cpu_disable(void)
local_irq_disable();
remove_siblinginfo(cpu);
+ spin_lock(&vector_lock);
/* It's now safe to remove this processor from the online map */
cpu_clear(cpu, cpu_online_map);
+ spin_unlock(&vector_lock);
remove_cpu_from_maps();
fixup_irqs(cpu_online_map);
return 0;
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 88722f11ca1..e3ef544d2cf 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -876,15 +876,6 @@ static struct irqaction irq0 = {
timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
};
-static int __cpuinit
-time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
-{
- unsigned cpu = (unsigned long) hcpu;
- if (action == CPU_ONLINE)
- vsyscall_set_cpu(cpu);
- return NOTIFY_DONE;
-}
-
void __init time_init(void)
{
if (nohpet)
@@ -925,8 +916,6 @@ void __init time_init(void)
vxtime.last_tsc = get_cycles_sync();
set_cyc2ns_scale(cpu_khz);
setup_irq(0, &irq0);
- hotcpu_notifier(time_cpu_notifier, 0);
- time_cpu_notifier(NULL, CPU_ONLINE, (void *)(long)smp_processor_id());
#ifndef CONFIG_SMP
time_init_gtod();
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 7819022a8db..a153d0a01b7 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -290,6 +290,12 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
if (tsk && tsk != current)
stack = (unsigned long *)tsk->thread.rsp;
}
+ /*
+ * Align the stack pointer on word boundary, later loops
+ * rely on that (and corruption / debug info bugs can cause
+ * unaligned values here):
+ */
+ stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
/*
* Print function call entries within a stack. 'cond' is the
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 1283614c9b2..edb24aa714b 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -175,13 +175,7 @@ SECTIONS
__setup_end = .;
__initcall_start = .;
.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
- *(.initcall1.init)
- *(.initcall2.init)
- *(.initcall3.init)
- *(.initcall4.init)
- *(.initcall5.init)
- *(.initcall6.init)
- *(.initcall7.init)
+ INITCALLS
}
__initcall_end = .;
__con_initcall_start = .;
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index a98b460af6a..92546c1526f 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -27,6 +27,9 @@
#include <linux/jiffies.h>
#include <linux/sysctl.h>
#include <linux/getcpu.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/notifier.h>
#include <asm/vsyscall.h>
#include <asm/pgtable.h>
@@ -243,32 +246,17 @@ static ctl_table kernel_root_table2[] = {
#endif
-static void __cpuinit write_rdtscp_cb(void *info)
-{
- write_rdtscp_aux((unsigned long)info);
-}
-
-void __cpuinit vsyscall_set_cpu(int cpu)
+/* Assume __initcall executes before all user space. Hopefully kmod
+ doesn't violate that. We'll find out if it does. */
+static void __cpuinit vsyscall_set_cpu(int cpu)
{
unsigned long *d;
unsigned long node = 0;
#ifdef CONFIG_NUMA
node = cpu_to_node[cpu];
#endif
- if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
- void *info = (void *)((node << 12) | cpu);
- /* Can happen on preemptive kernel */
- if (get_cpu() == cpu)
- write_rdtscp_cb(info);
-#ifdef CONFIG_SMP
- else {
- /* the notifier is unfortunately not executed on the
- target CPU */
- smp_call_function_single(cpu,write_rdtscp_cb,info,0,1);
- }
-#endif
- put_cpu();
- }
+ if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
+ write_rdtscp_aux((node << 12) | cpu);
/* Store cpu number in limit so that it can be loaded quickly
in user space in vgetcpu.
@@ -280,6 +268,23 @@ void __cpuinit vsyscall_set_cpu(int cpu)
*d |= (node >> 4) << 48;
}
+static void __cpuinit cpu_vsyscall_init(void *arg)
+{
+ /* preemption should be already off */
+ vsyscall_set_cpu(raw_smp_processor_id());
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int __cpuinit
+cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
+{
+ long cpu = (long)arg;
+ if (action == CPU_ONLINE)
+ smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
+ return NOTIFY_DONE;
+}
+#endif
+
static void __init map_vsyscall(void)
{
extern char __vsyscall_0;
@@ -299,6 +304,8 @@ static int __init vsyscall_init(void)
#ifdef CONFIG_SYSCTL
register_sysctl_table(kernel_root_table2, 0);
#endif
+ on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
+ hotcpu_notifier(cpu_vsyscall_notifier, 0);
return 0;
}