diff options
Diffstat (limited to 'arch/x86')
101 files changed, 1707 insertions, 1320 deletions
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 index c624193740f..7ff02063b85 100644 --- a/arch/x86/kernel/Makefile_32 +++ b/arch/x86/kernel/Makefile_32 @@ -7,7 +7,7 @@ extra-y := head_32.o init_task_32.o vmlinux.lds obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \ pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\ - quirks.o i8237.o topology.o alternative.o i8253_32.o tsc_32.o + quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ @@ -37,9 +37,9 @@ obj-$(CONFIG_EFI) += efi_32.o efi_stub_32.o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_HPET_TIMER) += hpet_32.o +obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o -obj-$(CONFIG_MGEODE_LX) += geode_32.o +obj-$(CONFIG_MGEODE_LX) += geode_32.o mfgpt_32.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o obj-$(CONFIG_PARAVIRT) += paravirt_32.o diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index 3ab017a0a3b..43da66213a4 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 @@ -8,8 +8,8 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \ x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \ setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \ - pci-dma_64.o pci-nommu_64.o alternative.o hpet_64.o tsc_64.o bugs_64.o \ - perfctr-watchdog.o + pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \ + perfctr-watchdog.o i8253.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_X86_MCE) += mce_64.o therm_throt.o diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index bd72d94e713..11b03d3c6fd 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -10,6 +10,7 @@ #include <asm/pgtable.h> #include <asm/mce.h> #include <asm/nmi.h> +#include <asm/vsyscall.h> #define MAX_PATCH_LEN (255-1) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 925758dbca0..09b82093bc7 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -25,6 +25,7 @@ #include <linux/sysdev.h> #include <linux/module.h> #include <linux/ioport.h> +#include <linux/clockchips.h> #include <asm/atomic.h> #include <asm/smp.h> @@ -39,12 +40,9 @@ #include <asm/hpet.h> #include <asm/apic.h> -int apic_mapped; int apic_verbosity; -int apic_runs_main_timer; -int apic_calibrate_pmtmr __initdata; - -int disable_apic_timer __initdata; +int disable_apic_timer __cpuinitdata; +static int apic_calibrate_pmtmr __initdata; /* Local APIC timer works in C2? */ int local_apic_timer_c2_ok; @@ -56,14 +54,78 @@ static struct resource lapic_resource = { .flags = IORESOURCE_MEM | IORESOURCE_BUSY, }; +static unsigned int calibration_result; + +static int lapic_next_event(unsigned long delta, + struct clock_event_device *evt); +static void lapic_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt); + +static void lapic_timer_broadcast(cpumask_t mask); + +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen); + +static struct clock_event_device lapic_clockevent = { + .name = "lapic", + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT + | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, + .shift = 32, + .set_mode = lapic_timer_setup, + .set_next_event = lapic_next_event, + .broadcast = lapic_timer_broadcast, + .rating = 100, + .irq = -1, +}; +static DEFINE_PER_CPU(struct clock_event_device, lapic_events); + +static int lapic_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + apic_write(APIC_TMICT, delta); + return 0; +} + +static void lapic_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + unsigned long flags; + unsigned int v; + + /* Lapic used as dummy for broadcast ? */ + if (evt->features & CLOCK_EVT_FEAT_DUMMY) + return; + + local_irq_save(flags); + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_ONESHOT: + __setup_APIC_LVTT(calibration_result, + mode != CLOCK_EVT_MODE_PERIODIC, 1); + break; + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + v = apic_read(APIC_LVTT); + v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, v); + break; + case CLOCK_EVT_MODE_RESUME: + /* Nothing to do here */ + break; + } + + local_irq_restore(flags); +} + /* - * cpu_mask that denotes the CPUs that needs timer interrupt coming in as - * IPIs in place of local APIC timers + * Local APIC timer broadcast function */ -static cpumask_t timer_interrupt_broadcast_ipi_mask; - -/* Using APIC to generate smp_local_timer_interrupt? */ -int using_apic_timer __read_mostly = 0; +static void lapic_timer_broadcast(cpumask_t mask) +{ +#ifdef CONFIG_SMP + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); +#endif +} static void apic_pm_activate(void); @@ -184,7 +246,10 @@ void disconnect_bsp_APIC(int virt_wire_setup) apic_write(APIC_SPIV, value); if (!virt_wire_setup) { - /* For LVT0 make it edge triggered, active high, external and enabled */ + /* + * For LVT0 make it edge triggered, active high, + * external and enabled + */ value = apic_read(APIC_LVT0); value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | @@ -420,10 +485,12 @@ void __cpuinit setup_local_APIC (void) value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; if (!smp_processor_id() && !value) { value = APIC_DM_EXTINT; - apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", smp_processor_id()); + apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", + smp_processor_id()); } else { value = APIC_DM_EXTINT | APIC_LVT_MASKED; - apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", smp_processor_id()); + apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", + smp_processor_id()); } apic_write(APIC_LVT0, value); @@ -706,8 +773,8 @@ void __init init_apic_mappings(void) apic_phys = mp_lapic_addr; set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - apic_mapped = 1; - apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); + apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + APIC_BASE, apic_phys); /* Put local APIC into the resource map. */ lapic_resource.start = apic_phys; @@ -730,12 +797,14 @@ void __init init_apic_mappings(void) if (smp_found_config) { ioapic_phys = mp_ioapics[i].mpc_apicaddr; } else { - ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = (unsigned long) + alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n", - __fix_to_virt(idx), ioapic_phys); + apic_printk(APIC_VERBOSE, + "mapped IOAPIC to %016lx (%016lx)\n", + __fix_to_virt(idx), ioapic_phys); idx++; if (ioapic_res != NULL) { @@ -758,16 +827,14 @@ void __init init_apic_mappings(void) * P5 APIC double write bug. */ -#define APIC_DIVISOR 16 - -static void __setup_APIC_LVTT(unsigned int clocks) +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { unsigned int lvtt_value, tmp_value; - int cpu = smp_processor_id(); - lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; - - if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) + lvtt_value = LOCAL_TIMER_VECTOR; + if (!oneshot) + lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!irqen) lvtt_value |= APIC_LVT_MASKED; apic_write(APIC_LVTT, lvtt_value); @@ -780,44 +847,18 @@ static void __setup_APIC_LVTT(unsigned int clocks) & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | APIC_TDR_DIV_16); - apic_write(APIC_TMICT, clocks/APIC_DIVISOR); + if (!oneshot) + apic_write(APIC_TMICT, clocks); } -static void setup_APIC_timer(unsigned int clocks) +static void setup_APIC_timer(void) { - unsigned long flags; + struct clock_event_device *levt = &__get_cpu_var(lapic_events); - local_irq_save(flags); + memcpy(levt, &lapic_clockevent, sizeof(*levt)); + levt->cpumask = cpumask_of_cpu(smp_processor_id()); - /* wait for irq slice */ - if (hpet_address && hpet_use_timer) { - u32 trigger = hpet_readl(HPET_T0_CMP); - while (hpet_readl(HPET_T0_CMP) == trigger) - /* do nothing */ ; - } else { - int c1, c2; - outb_p(0x00, 0x43); - c2 = inb_p(0x40); - c2 |= inb_p(0x40) << 8; - do { - c1 = c2; - outb_p(0x00, 0x43); - c2 = inb_p(0x40); - c2 |= inb_p(0x40) << 8; - } while (c2 - c1 < 300); - } - __setup_APIC_LVTT(clocks); - /* Turn off PIT interrupt if we use APIC timer as main timer. - Only works with the PM timer right now - TBD fix it for HPET too. */ - if ((pmtmr_ioport != 0) && - smp_processor_id() == boot_cpu_id && - apic_runs_main_timer == 1 && - !cpu_isset(boot_cpu_id, timer_interrupt_broadcast_ipi_mask)) { - stop_timer_interrupt(); - apic_runs_main_timer++; - } - local_irq_restore(flags); + clockevents_register_device(levt); } /* @@ -835,17 +876,22 @@ static void setup_APIC_timer(unsigned int clocks) #define TICK_COUNT 100000000 -static int __init calibrate_APIC_clock(void) +static void __init calibrate_APIC_clock(void) { unsigned apic, apic_start; unsigned long tsc, tsc_start; int result; + + local_irq_disable(); + /* * Put whatever arbitrary (but long enough) timeout * value into the APIC clock, we just want to get the * counter running for calibration. + * + * No interrupt enable ! */ - __setup_APIC_LVTT(4000000000); + __setup_APIC_LVTT(250000000, 0, 0); apic_start = apic_read(APIC_TMCCT); #ifdef CONFIG_X86_PM_TIMER @@ -867,123 +913,88 @@ static int __init calibrate_APIC_clock(void) result = (apic_start - apic) * 1000L * tsc_khz / (tsc - tsc_start); } - printk("result %d\n", result); + local_irq_enable(); + + printk(KERN_DEBUG "APIC timer calibration result %d\n", result); printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", result / 1000 / 1000, result / 1000 % 1000); - return result * APIC_DIVISOR / HZ; -} + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); -static unsigned int calibration_result; + calibration_result = result / HZ; +} void __init setup_boot_APIC_clock (void) { + /* + * The local apic timer can be disabled via the kernel commandline. + * Register the lapic timer as a dummy clock event source on SMP + * systems, so the broadcast mechanism is used. On UP systems simply + * ignore it. + */ if (disable_apic_timer) { printk(KERN_INFO "Disabling APIC timer\n"); + /* No broadcast on UP ! */ + if (num_possible_cpus() > 1) + setup_APIC_timer(); return; } printk(KERN_INFO "Using local APIC timer interrupts.\n"); - using_apic_timer = 1; - - local_irq_disable(); + calibrate_APIC_clock(); - calibration_result = calibrate_APIC_clock(); /* - * Now set up the timer for real. + * If nmi_watchdog is set to IO_APIC, we need the + * PIT/HPET going. Otherwise register lapic as a dummy + * device. */ - setup_APIC_timer(calibration_result); - - local_irq_enable(); -} - -void __cpuinit setup_secondary_APIC_clock(void) -{ - local_irq_disable(); /* FIXME: Do we need this? --RR */ - setup_APIC_timer(calibration_result); - local_irq_enable(); -} - -void disable_APIC_timer(void) -{ - if (using_apic_timer) { - unsigned long v; + if (nmi_watchdog != NMI_IO_APIC) + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + else + printk(KERN_WARNING "APIC timer registered as dummy," + " due to nmi_watchdog=1!\n"); - v = apic_read(APIC_LVTT); - /* - * When an illegal vector value (0-15) is written to an LVT - * entry and delivery mode is Fixed, the APIC may signal an - * illegal vector error, with out regard to whether the mask - * bit is set or whether an interrupt is actually seen on input. - * - * Boot sequence might call this function when the LVTT has - * '0' vector value. So make sure vector field is set to - * valid value. - */ - v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, v); - } + setup_APIC_timer(); } -void enable_APIC_timer(void) +/* + * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the + * C1E flag only in the secondary CPU, so when we detect the wreckage + * we already have enabled the boot CPU local apic timer. Check, if + * disable_apic_timer is set and the DUMMY flag is cleared. If yes, + * set the DUMMY flag again and force the broadcast mode in the + * clockevents layer. + */ +void __cpuinit check_boot_apic_timer_broadcast(void) { - int cpu = smp_processor_id(); + struct clock_event_device *levt = &per_cpu(lapic_events, boot_cpu_id); - if (using_apic_timer && - !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { - unsigned long v; + if (!disable_apic_timer || + (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) + return; - v = apic_read(APIC_LVTT); - apic_write(APIC_LVTT, v & ~APIC_LVT_MASKED); - } -} + printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n"); + lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; + levt->features |= CLOCK_EVT_FEAT_DUMMY; -void switch_APIC_timer_to_ipi(void *cpumask) -{ - cpumask_t mask = *(cpumask_t *)cpumask; - int cpu = smp_processor_id(); - - if (cpu_isset(cpu, mask) && - !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { - disable_APIC_timer(); - cpu_set(cpu, timer_interrupt_broadcast_ipi_mask); - } + local_irq_enable(); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id); + local_irq_disable(); } -EXPORT_SYMBOL(switch_APIC_timer_to_ipi); -void smp_send_timer_broadcast_ipi(void) +void __cpuinit setup_secondary_APIC_clock(void) { - int cpu = smp_processor_id(); - cpumask_t mask; - - cpus_and(mask, cpu_online_map, timer_interrupt_broadcast_ipi_mask); - - if (cpu_isset(cpu, mask)) { - cpu_clear(cpu, mask); - add_pda(apic_timer_irqs, 1); - smp_local_timer_interrupt(); - } - - if (!cpus_empty(mask)) { - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); - } + check_boot_apic_timer_broadcast(); + setup_APIC_timer(); } -void switch_ipi_to_APIC_timer(void *cpumask) -{ - cpumask_t mask = *(cpumask_t *)cpumask; - int cpu = smp_processor_id(); - - if (cpu_isset(cpu, mask) && - cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { - cpu_clear(cpu, timer_interrupt_broadcast_ipi_mask); - enable_APIC_timer(); - } -} -EXPORT_SYMBOL(switch_ipi_to_APIC_timer); - int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; @@ -997,8 +1008,6 @@ void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector, apic_write(reg, v); } -#undef APIC_DIVISOR - /* * Local timer interrupt handler. It does both profiling and * process statistics/rescheduling. @@ -1011,22 +1020,34 @@ void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector, void smp_local_timer_interrupt(void) { - profile_tick(CPU_PROFILING); -#ifdef CONFIG_SMP - update_process_times(user_mode(get_irq_regs())); -#endif - if (apic_runs_main_timer > 1 && smp_processor_id() == boot_cpu_id) - main_timer_handler(); + int cpu = smp_processor_id(); + struct clock_event_device *evt = &per_cpu(lapic_events, cpu); + /* - * We take the 'long' return path, and there every subsystem - * grabs the appropriate locks (kernel lock/ irq lock). + * Normally we should not be here till LAPIC has been initialized but + * in some cases like kdump, its possible that there is a pending LAPIC + * timer interrupt from previous kernel's context and is delivered in + * new kernel the moment interrupts are enabled. * - * We might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. + * Interrupts are enabled early and LAPIC is setup much later, hence + * its possible that when we get here evt->event_handler is NULL. + * Check for event_handler being NULL and discard the interrupt as + * spurious. + */ + if (!evt->event_handler) { + printk(KERN_WARNING + "Spurious LAPIC timer interrupt on cpu %d\n", cpu); + /* Switch it off */ + lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); + return; + } + + /* + * the NMI deadlock-detector uses this. */ + add_pda(apic_timer_irqs, 1); + + evt->event_handler(evt); } /* @@ -1042,11 +1063,6 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); /* - * the NMI deadlock-detector uses this. - */ - add_pda(apic_timer_irqs, 1); - - /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. */ @@ -1225,29 +1241,13 @@ static __init int setup_noapictimer(char *str) disable_apic_timer = 1; return 1; } - -static __init int setup_apicmaintimer(char *str) -{ - apic_runs_main_timer = 1; - nohpet = 1; - return 1; -} -__setup("apicmaintimer", setup_apicmaintimer); - -static __init int setup_noapicmaintimer(char *str) -{ - apic_runs_main_timer = -1; - return 1; -} -__setup("noapicmaintimer", setup_noapicmaintimer); +__setup("noapictimer", setup_noapictimer); static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; notsc_setup(NULL); - return setup_apicmaintimer(NULL); + return 0; } __setup("apicpmtimer", setup_apicpmtimer); -__setup("noapictimer", setup_noapictimer); - diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/bugs_64.c index 4e5e9d364d6..9a189cef640 100644 --- a/arch/x86/kernel/bugs_64.c +++ b/arch/x86/kernel/bugs_64.c @@ -1,6 +1,4 @@ /* - * arch/x86_64/kernel/bugs.c - * * Copyright (C) 1994 Linus Torvalds * Copyright (C) 2000 SuSE */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 59266f03d1c..205fd5ba57f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1,6 +1,4 @@ /* - * arch/i386/cpu/bugs.c - * * Copyright (C) 1994 Linus Torvalds * * Cyrix stuff, June 1998 by: diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index b6434a7ef8b..2ca43ba32bc 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -595,7 +595,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) dmi_check_system(sw_any_bug_dmi_table); if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; - policy->cpus = cpu_core_map[cpu]; + policy->cpus = per_cpu(cpu_core_map, cpu); } #endif @@ -646,7 +646,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = perf->states[i].transition_latency * 1000; } - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; data->max_freq = perf->states[0].core_frequency * 1000; /* table init */ diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c index 66acd503991..32f0bda3fc9 100644 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -363,7 +363,6 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) policy->cur = nforce2_get(policy->cpu); policy->min = policy->cpuinfo.min_freq; policy->max = policy->cpuinfo.max_freq; - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; return 0; } diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c index f43d98e11cc..c11baaf9f2b 100644 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c @@ -253,7 +253,6 @@ static int eps_cpu_init(struct cpufreq_policy *policy) f_table[k].frequency = CPUFREQ_TABLE_END; } - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */ policy->cur = fsb * current_multiplier; diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index f317276afa7..1e7ae7dafcf 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -219,7 +219,6 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) } /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; policy->cur = elanfreq_get_cpu_frequency(0); diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index 461dabc4e49..ed2bda127c4 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c @@ -420,7 +420,6 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) policy->min = maxfreq / POLICY_MIN_DIV; policy->max = maxfreq; policy->cur = curfreq; - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; policy->cpuinfo.min_freq = maxfreq / max_duration; policy->cpuinfo.max_freq = maxfreq; policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index f0cce3c2dc3..5045f5d583c 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -710,6 +710,10 @@ static int enable_arbiter_disable(void) reg = 0x78; dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, NULL); + /* Find PM133/VT8605 host bridge */ + if (dev == NULL) + dev = pci_get_device(PCI_VENDOR_ID_VIA, + PCI_DEVICE_ID_VIA_8605_0, NULL); /* Find CLE266 host bridge */ if (dev == NULL) { reg = 0x76; @@ -918,7 +922,6 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0)) longhaul_setup_voltagescaling(); - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; policy->cpuinfo.transition_latency = 200000; /* nsec */ policy->cur = calc_speed(longhaul_get_cpu_mult()); diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 4c76b511e19..793eae854f4 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -200,7 +200,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) unsigned int i; #ifdef CONFIG_SMP - policy->cpus = cpu_sibling_map[policy->cpu]; + policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); #endif /* Errata workaround */ @@ -229,7 +229,6 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; policy->cpuinfo.transition_latency = 1000000; /* assumed */ policy->cur = stock_freq; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index f89524051e4..6d028533931 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -160,7 +160,6 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) } /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; policy->cur = busfreq * max_multiplier; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index ca3e1d34188..7decd6a50ff 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -637,8 +637,6 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", minimum_speed/1000, maximum_speed/1000); - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency); policy->cur = powernow_get(0); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 34ed53a0673..c06ac680c9c 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -57,7 +57,7 @@ static struct powernow_k8_data *powernow_data[NR_CPUS]; static int cpu_family = CPU_OPTERON; #ifndef CONFIG_SMP -static cpumask_t cpu_core_map[1]; +DEFINE_PER_CPU(cpumask_t, cpu_core_map); #endif /* Return a frequency in MHz, given an input fid */ @@ -76,7 +76,10 @@ static u32 find_khz_freq_from_fid(u32 fid) /* Return a frequency in MHz, given an input fid and did */ static u32 find_freq_from_fiddid(u32 fid, u32 did) { - return 100 * (fid + 0x10) >> did; + if (current_cpu_data.x86 == 0x10) + return 100 * (fid + 0x10) >> did; + else + return 100 * (fid + 0x8) >> did; } static u32 find_khz_freq_from_fiddid(u32 fid, u32 did) @@ -664,7 +667,7 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); data->powernow_table = powernow_table; - if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) print_basics(data); for (j = 0; j < data->numps; j++) @@ -818,7 +821,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) /* fill in data */ data->numps = data->acpi_data.state_count; - if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) print_basics(data); powernow_k8_acpi_pst_values(data, 0); @@ -1208,11 +1211,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) /* run on any CPU again */ set_cpus_allowed(current, oldmask); - pol->governor = CPUFREQ_DEFAULT_GOVERNOR; if (cpu_family == CPU_HW_PSTATE) pol->cpus = cpumask_of_cpu(pol->cpu); else - pol->cpus = cpu_core_map[pol->cpu]; + pol->cpus = per_cpu(cpu_core_map, pol->cpu); data->available_cores = &(pol->cpus); /* Take a crude guess here. @@ -1279,7 +1281,7 @@ static unsigned int powernowk8_get (unsigned int cpu) cpumask_t oldmask = current->cpus_allowed; unsigned int khz = 0; - data = powernow_data[first_cpu(cpu_core_map[cpu])]; + data = powernow_data[first_cpu(per_cpu(cpu_core_map, cpu))]; if (!data) return -EINVAL; @@ -1325,21 +1327,16 @@ static struct cpufreq_driver cpufreq_amd64_driver = { static int __cpuinit powernowk8_init(void) { unsigned int i, supported_cpus = 0; - unsigned int booted_cores = 1; for_each_online_cpu(i) { if (check_supported_cpu(i)) supported_cpus++; } -#ifdef CONFIG_SMP - booted_cores = cpu_data[0].booted_cores; -#endif - if (supported_cpus == num_online_cpus()) { printk(KERN_INFO PFX "Found %d %s " "processors (%d cpu cores) (" VERSION ")\n", - supported_cpus/booted_cores, + num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); return cpufreq_register_driver(&cpufreq_amd64_driver); } diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c index b8fb4b521c6..d9f3e90a7ae 100644 --- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c +++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c @@ -111,7 +111,6 @@ static int sc520_freq_cpu_init(struct cpufreq_policy *policy) return -ENODEV; /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; policy->cpuinfo.transition_latency = 1000000; /* 1ms */ policy->cur = sc520_freq_get_cpu_frequency(0); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 6c5dc2c85ae..811d4743854 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -393,7 +393,6 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) freq = get_cur_freq(policy->cpu); - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ policy->cur = freq; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index a5b2346faf1..14d68aa301e 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -322,7 +322,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) /* only run on CPU to be set, or on its sibling */ #ifdef CONFIG_SMP - policy->cpus = cpu_sibling_map[policy->cpu]; + policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); #endif cpus_allowed = current->cpus_allowed; @@ -348,7 +348,6 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) (speed / 1000)); /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; policy->cur = speed; result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index e1c509aa305..f2b5a621d27 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -290,7 +290,6 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) (speed / 1000)); /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; policy->cur = speed; diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 1e31b6caffb..879a0f789b1 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -122,7 +122,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_X86_HT if (c->x86_max_cores * smp_num_siblings > 1) { seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, n))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 5c2faa10e9f..f4548c93ccf 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -11,8 +11,6 @@ * ----------------------------------------------------------------------- */ /* - * cpuid.c - * * x86 CPUID access device * * This device is accessed by lseek() to the appropriate CPUID level diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index 3f532df488b..32e75d0731a 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c @@ -1,5 +1,5 @@ /* - * kernel/crash_dump.c - Memory preserving reboot related code. + * Memory preserving reboot related code. * * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) * Copyright (C) IBM Corporation, 2004. All rights reserved diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 942deac4d43..15e6c6bc4a4 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -1,5 +1,5 @@ /* - * kernel/crash_dump.c - Memory preserving reboot related code. + * Memory preserving reboot related code. * * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) * Copyright (C) IBM Corporation, 2004. All rights reserved diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 290b7bc82da..8099fea0a72 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -251,6 +251,7 @@ check_userspace: jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -338,6 +339,7 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx @@ -377,6 +379,7 @@ syscall_call: call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) # store the return value syscall_exit: + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -467,6 +470,7 @@ work_pending: jz work_notifysig work_resched: call schedule + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1d232e5f565..f1cacd4897f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -244,6 +244,7 @@ ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: flagmask */ sysret_check: + LOCKDEP_SYS_EXIT GET_THREAD_INFO(%rcx) cli TRACE_IRQS_OFF @@ -333,6 +334,7 @@ int_ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ int_with_check: + LOCKDEP_SYS_EXIT_IRQ GET_THREAD_INFO(%rcx) movl threadinfo_flags(%rcx),%edx andl %edi,%edx @@ -544,11 +546,13 @@ exit_intr: retint_with_reschedule: movl $_TIF_WORK_MASK,%edi retint_check: + LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz retint_careful -retint_swapgs: + +retint_swapgs: /* return to user-space */ /* * The iretq could re-enable interrupts: */ @@ -557,7 +561,7 @@ retint_swapgs: swapgs jmp restore_args -retint_restore_args: +retint_restore_args: /* return to kernel space */ cli /* * The iretq could re-enable interrupts: @@ -866,26 +870,21 @@ error_sti: movq ORIG_RAX(%rsp),%rsi /* get error code */ movq $-1,ORIG_RAX(%rsp) call *%rax - /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ -error_exit: - movl %ebx,%eax + /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ +error_exit: + movl %ebx,%eax RESTORE_REST cli TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax jne retint_kernel + LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi andl %edi,%edx jnz retint_careful - /* - * The iret might restore flags: - */ - TRACE_IRQS_IRETQ - swapgs - RESTORE_ARGS 0,8,0 - jmp iret_label + jmp retint_swapgs CFI_ENDPROC error_kernelspace: diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c index 41e8aec4c61..f12d8c5d980 100644 --- a/arch/x86/kernel/geode_32.c +++ b/arch/x86/kernel/geode_32.c @@ -145,10 +145,14 @@ EXPORT_SYMBOL_GPL(geode_gpio_setup_event); static int __init geode_southbridge_init(void) { + int timers; + if (!is_geode()) return -ENODEV; init_lbars(); + timers = geode_mfgpt_detect(); + printk(KERN_INFO "geode: %d MFGPT timers available.\n", timers); return 0; } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 6c34bdd22e2..8561f626eda 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -1,5 +1,5 @@ /* - * linux/arch/x86_64/kernel/head64.c -- prepare to run common code + * prepare to run common code * * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE */ diff --git a/arch/x86/kernel/hpet_32.c b/arch/x86/kernel/hpet.c index 533d4932bc7..f8367074da0 100644 --- a/arch/x86/kernel/hpet_32.c +++ b/arch/x86/kernel/hpet.c @@ -1,5 +1,6 @@ #include <linux/clocksource.h> #include <linux/clockchips.h> +#include <linux/delay.h> #include <linux/errno.h> #include <linux/hpet.h> #include <linux/init.h> @@ -7,11 +8,11 @@ #include <linux/pm.h> #include <linux/delay.h> +#include <asm/fixmap.h> #include <asm/hpet.h> +#include <asm/i8253.h> #include <asm/io.h> -extern struct clock_event_device *global_clock_event; - #define HPET_MASK CLOCKSOURCE_MASK(32) #define HPET_SHIFT 22 @@ -22,9 +23,9 @@ extern struct clock_event_device *global_clock_event; * HPET address is set in acpi/boot.c, when an ACPI entry exists */ unsigned long hpet_address; -static void __iomem * hpet_virt_address; +static void __iomem *hpet_virt_address; -static inline unsigned long hpet_readl(unsigned long a) +unsigned long hpet_readl(unsigned long a) { return readl(hpet_virt_address + a); } @@ -34,6 +35,36 @@ static inline void hpet_writel(unsigned long d, unsigned long a) writel(d, hpet_virt_address + a); } +#ifdef CONFIG_X86_64 + +#include <asm/pgtable.h> + +static inline void hpet_set_mapping(void) +{ + set_fixmap_nocache(FIX_HPET_BASE, hpet_address); + __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE); + hpet_virt_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE); +} + +static inline void hpet_clear_mapping(void) +{ + hpet_virt_address = NULL; +} + +#else + +static inline void hpet_set_mapping(void) +{ + hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); +} + +static inline void hpet_clear_mapping(void) +{ + iounmap(hpet_virt_address); + hpet_virt_address = NULL; +} +#endif + /* * HPET command line enable / disable */ @@ -49,6 +80,13 @@ static int __init hpet_setup(char* str) } __setup("hpet=", hpet_setup); +static int __init disable_hpet(char *str) +{ + boot_hpet_disable = 1; + return 1; +} +__setup("nohpet", disable_hpet); + static inline int is_hpet_capable(void) { return (!boot_hpet_disable && hpet_address); @@ -83,7 +121,7 @@ static void hpet_reserve_platform_timers(unsigned long id) memset(&hd, 0, sizeof (hd)); hd.hd_phys_address = hpet_address; - hd.hd_address = hpet_virt_address; + hd.hd_address = hpet; hd.hd_nirqs = nrtimers; hd.hd_flags = HPET_DATA_PLATFORM; hpet_reserve_timer(&hd, 0); @@ -111,9 +149,9 @@ static void hpet_reserve_platform_timers(unsigned long id) { } */ static unsigned long hpet_period; -static void hpet_set_mode(enum clock_event_mode mode, +static void hpet_legacy_set_mode(enum clock_event_mode mode, struct clock_event_device *evt); -static int hpet_next_event(unsigned long delta, +static int hpet_legacy_next_event(unsigned long delta, struct clock_event_device *evt); /* @@ -122,10 +160,11 @@ static int hpet_next_event(unsigned long delta, static struct clock_event_device hpet_clockevent = { .name = "hpet", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = hpet_set_mode, - .set_next_event = hpet_next_event, + .set_mode = hpet_legacy_set_mode, + .set_next_event = hpet_legacy_next_event, .shift = 32, .irq = 0, + .rating = 50, }; static void hpet_start_counter(void) @@ -140,7 +179,18 @@ static void hpet_start_counter(void) hpet_writel(cfg, HPET_CFG); } -static void hpet_enable_int(void) +static void hpet_resume_device(void) +{ + force_hpet_resume(); +} + +static void hpet_restart_counter(void) +{ + hpet_resume_device(); + hpet_start_counter(); +} + +static void hpet_enable_legacy_int(void) { unsigned long cfg = hpet_readl(HPET_CFG); @@ -149,7 +199,39 @@ static void hpet_enable_int(void) hpet_legacy_int_enabled = 1; } -static void hpet_set_mode(enum clock_event_mode mode, +static void hpet_legacy_clockevent_register(void) +{ + uint64_t hpet_freq; + + /* Start HPET legacy interrupts */ + hpet_enable_legacy_int(); + + /* + * The period is a femto seconds value. We need to calculate the + * scaled math multiplication factor for nanosecond to hpet tick + * conversion. + */ + hpet_freq = 1000000000000000ULL; + do_div(hpet_freq, hpet_period); + hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, + NSEC_PER_SEC, 32); + /* Calculate the min / max delta */ + hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, + &hpet_clockevent); + hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, + &hpet_clockevent); + + /* + * Start hpet with the boot cpu mask and make it + * global after the IO_APIC has been initialized. + */ + hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); + clockevents_register_device(&hpet_clockevent); + global_clock_event = &hpet_clockevent; + printk(KERN_DEBUG "hpet clockevent registered\n"); +} + +static void hpet_legacy_set_mode(enum clock_event_mode mode, struct clock_event_device *evt) { unsigned long cfg, cmp, now; @@ -190,12 +272,12 @@ static void hpet_set_mode(enum clock_event_mode mode, break; case CLOCK_EVT_MODE_RESUME: - hpet_enable_int(); + hpet_enable_legacy_int(); break; } } -static int hpet_next_event(unsigned long delta, +static int hpet_legacy_next_event(unsigned long delta, struct clock_event_device *evt) { unsigned long cnt; @@ -215,6 +297,13 @@ static cycle_t read_hpet(void) return (cycle_t)hpet_readl(HPET_COUNTER); } +#ifdef CONFIG_X86_64 +static cycle_t __vsyscall_fn vread_hpet(void) +{ + return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); +} +#endif + static struct clocksource clocksource_hpet = { .name = "hpet", .rating = 250, @@ -222,61 +311,17 @@ static struct clocksource clocksource_hpet = { .mask = HPET_MASK, .shift = HPET_SHIFT, .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .resume = hpet_start_counter, + .resume = hpet_restart_counter, +#ifdef CONFIG_X86_64 + .vread = vread_hpet, +#endif }; -/* - * Try to setup the HPET timer - */ -int __init hpet_enable(void) +static int hpet_clocksource_register(void) { - unsigned long id; - uint64_t hpet_freq; u64 tmp, start, now; cycle_t t1; - if (!is_hpet_capable()) - return 0; - - hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); - - /* - * Read the period and check for a sane value: - */ - hpet_period = hpet_readl(HPET_PERIOD); - if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) - goto out_nohpet; - - /* - * The period is a femto seconds value. We need to calculate the - * scaled math multiplication factor for nanosecond to hpet tick - * conversion. - */ - hpet_freq = 1000000000000000ULL; - do_div(hpet_freq, hpet_period); - hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, - NSEC_PER_SEC, 32); - /* Calculate the min / max delta */ - hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, - &hpet_clockevent); - hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, - &hpet_clockevent); - - /* - * Read the HPET ID register to retrieve the IRQ routing - * information and the number of channels - */ - id = hpet_readl(HPET_ID); - -#ifdef CONFIG_HPET_EMULATE_RTC - /* - * The legacy routing mode needs at least two channels, tick timer - * and the rtc emulation channel. - */ - if (!(id & HPET_ID_NUMBER)) - goto out_nohpet; -#endif - /* Start the counter */ hpet_start_counter(); @@ -298,7 +343,7 @@ int __init hpet_enable(void) if (t1 == read_hpet()) { printk(KERN_WARNING "HPET counter not counting. HPET disabled\n"); - goto out_nohpet; + return -ENODEV; } /* Initialize and register HPET clocksource @@ -319,27 +364,84 @@ int __init hpet_enable(void) clocksource_register(&clocksource_hpet); + return 0; +} + +/* + * Try to setup the HPET timer + */ +int __init hpet_enable(void) +{ + unsigned long id; + + if (!is_hpet_capable()) + return 0; + + hpet_set_mapping(); + + /* + * Read the period and check for a sane value: + */ + hpet_period = hpet_readl(HPET_PERIOD); + if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) + goto out_nohpet; + + /* + * Read the HPET ID register to retrieve the IRQ routing + * information and the number of channels + */ + id = hpet_readl(HPET_ID); + +#ifdef CONFIG_HPET_EMULATE_RTC + /* + * The legacy routing mode needs at least two channels, tick timer + * and the rtc emulation channel. + */ + if (!(id & HPET_ID_NUMBER)) + goto out_nohpet; +#endif + + if (hpet_clocksource_register()) + goto out_nohpet; + if (id & HPET_ID_LEGSUP) { - hpet_enable_int(); - hpet_reserve_platform_timers(id); - /* - * Start hpet with the boot cpu mask and make it - * global after the IO_APIC has been initialized. - */ - hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); - clockevents_register_device(&hpet_clockevent); - global_clock_event = &hpet_clockevent; + hpet_legacy_clockevent_register(); return 1; } return 0; out_nohpet: - iounmap(hpet_virt_address); - hpet_virt_address = NULL; + hpet_clear_mapping(); boot_hpet_disable = 1; return 0; } +/* + * Needs to be late, as the reserve_timer code calls kalloc ! + * + * Not a problem on i386 as hpet_enable is called from late_time_init, + * but on x86_64 it is necessary ! + */ +static __init int hpet_late_init(void) +{ + if (boot_hpet_disable) + return -ENODEV; + + if (!hpet_address) { + if (!force_hpet_address) + return -ENODEV; + + hpet_address = force_hpet_address; + hpet_enable(); + if (!hpet_virt_address) + return -ENODEV; + } + + hpet_reserve_platform_timers(hpet_readl(HPET_ID)); + + return 0; +} +fs_initcall(hpet_late_init); #ifdef CONFIG_HPET_EMULATE_RTC diff --git a/arch/x86/kernel/hpet_64.c b/arch/x86/kernel/hpet_64.c deleted file mode 100644 index e2d1b912e15..00000000000 --- a/arch/x86/kernel/hpet_64.c +++ /dev/null @@ -1,493 +0,0 @@ -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/mc146818rtc.h> -#include <linux/time.h> -#include <linux/clocksource.h> -#include <linux/ioport.h> -#include <linux/acpi.h> -#include <linux/hpet.h> -#include <asm/pgtable.h> -#include <asm/vsyscall.h> -#include <asm/timex.h> -#include <asm/hpet.h> - -#define HPET_MASK 0xFFFFFFFF -#define HPET_SHIFT 22 - -/* FSEC = 10^-15 NSEC = 10^-9 */ -#define FSEC_PER_NSEC 1000000 - -int nohpet __initdata; - -unsigned long hpet_address; -unsigned long hpet_period; /* fsecs / HPET clock */ -unsigned long hpet_tick; /* HPET clocks / interrupt */ - -int hpet_use_timer; /* Use counter of hpet for time keeping, - * otherwise PIT - */ - -#ifdef CONFIG_HPET -static __init int late_hpet_init(void) -{ - struct hpet_data hd; - unsigned int ntimer; - - if (!hpet_address) - return 0; - - memset(&hd, 0, sizeof(hd)); - - ntimer = hpet_readl(HPET_ID); - ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT; - ntimer++; - - /* - * Register with driver. - * Timer0 and Timer1 is used by platform. - */ - hd.hd_phys_address = hpet_address; - hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE); - hd.hd_nirqs = ntimer; - hd.hd_flags = HPET_DATA_PLATFORM; - hpet_reserve_timer(&hd, 0); -#ifdef CONFIG_HPET_EMULATE_RTC - hpet_reserve_timer(&hd, 1); -#endif - hd.hd_irq[0] = HPET_LEGACY_8254; - hd.hd_irq[1] = HPET_LEGACY_RTC; - if (ntimer > 2) { - struct hpet *hpet; - struct hpet_timer *timer; - int i; - - hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE); - timer = &hpet->hpet_timers[2]; - for (i = 2; i < ntimer; timer++, i++) - hd.hd_irq[i] = (timer->hpet_config & - Tn_INT_ROUTE_CNF_MASK) >> - Tn_INT_ROUTE_CNF_SHIFT; - - } - - hpet_alloc(&hd); - return 0; -} -fs_initcall(late_hpet_init); -#endif - -int hpet_timer_stop_set_go(unsigned long tick) -{ - unsigned int cfg; - -/* - * Stop the timers and reset the main counter. - */ - - cfg = hpet_readl(HPET_CFG); - cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); - hpet_writel(cfg, HPET_CFG); - hpet_writel(0, HPET_COUNTER); - hpet_writel(0, HPET_COUNTER + 4); - -/* - * Set up timer 0, as periodic with first interrupt to happen at hpet_tick, - * and period also hpet_tick. - */ - if (hpet_use_timer) { - hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | - HPET_TN_32BIT, HPET_T0_CFG); - hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */ - hpet_writel(hpet_tick, HPET_T0_CMP); /* period */ - cfg |= HPET_CFG_LEGACY; - } -/* - * Go! - */ - - cfg |= HPET_CFG_ENABLE; - hpet_writel(cfg, HPET_CFG); - - return 0; -} - -static cycle_t read_hpet(void) -{ - return (cycle_t)hpet_readl(HPET_COUNTER); -} - -static cycle_t __vsyscall_fn vread_hpet(void) -{ - return readl((void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); -} - -struct clocksource clocksource_hpet = { - .name = "hpet", - .rating = 250, - .read = read_hpet, - .mask = (cycle_t)HPET_MASK, - .mult = 0, /* set below */ - .shift = HPET_SHIFT, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .vread = vread_hpet, -}; - -int __init hpet_arch_init(void) -{ - unsigned int id; - u64 tmp; - - if (!hpet_address) - return -1; - set_fixmap_nocache(FIX_HPET_BASE, hpet_address); - __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE); - -/* - * Read the period, compute tick and quotient. - */ - - id = hpet_readl(HPET_ID); - - if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER)) - return -1; - - hpet_period = hpet_readl(HPET_PERIOD); - if (hpet_period < 100000 || hpet_period > 100000000) - return -1; - - hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period; - - hpet_use_timer = (id & HPET_ID_LEGSUP); - - /* - * hpet period is in femto seconds per cycle - * so we need to convert this to ns/cyc units - * aproximated by mult/2^shift - * - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult - * (fsec/cyc << shift)/1000000 = mult - * (hpet_period << shift)/FSEC_PER_NSEC = mult - */ - tmp = (u64)hpet_period << HPET_SHIFT; - do_div(tmp, FSEC_PER_NSEC); - clocksource_hpet.mult = (u32)tmp; - clocksource_register(&clocksource_hpet); - - return hpet_timer_stop_set_go(hpet_tick); -} - -int hpet_reenable(void) -{ - return hpet_timer_stop_set_go(hpet_tick); -} - -/* - * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing - * it to the HPET timer of known frequency. - */ - -#define TICK_COUNT 100000000 -#define SMI_THRESHOLD 50000 -#define MAX_TRIES 5 - -/* - * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none - * occurs between the reads of the hpet & TSC. - */ -static void __init read_hpet_tsc(int *hpet, int *tsc) -{ - int tsc1, tsc2, hpet1, i; - - for (i = 0; i < MAX_TRIES; i++) { - tsc1 = get_cycles_sync(); - hpet1 = hpet_readl(HPET_COUNTER); - tsc2 = get_cycles_sync(); - if ((tsc2 - tsc1) < SMI_THRESHOLD) - break; - } - *hpet = hpet1; - *tsc = tsc2; -} - -unsigned int __init hpet_calibrate_tsc(void) -{ - int tsc_start, hpet_start; - int tsc_now, hpet_now; - unsigned long flags; - - local_irq_save(flags); - - read_hpet_tsc(&hpet_start, &tsc_start); - - do { - local_irq_disable(); - read_hpet_tsc(&hpet_now, &tsc_now); - local_irq_restore(flags); - } while ((tsc_now - tsc_start) < TICK_COUNT && - (hpet_now - hpet_start) < TICK_COUNT); - - return (tsc_now - tsc_start) * 1000000000L - / ((hpet_now - hpet_start) * hpet_period / 1000); -} - -#ifdef CONFIG_HPET_EMULATE_RTC -/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET - * is enabled, we support RTC interrupt functionality in software. - * RTC has 3 kinds of interrupts: - * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock - * is updated - * 2) Alarm Interrupt - generate an interrupt at a specific time of day - * 3) Periodic Interrupt - generate periodic interrupt, with frequencies - * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) - * (1) and (2) above are implemented using polling at a frequency of - * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt - * overhead. (DEFAULT_RTC_INT_FREQ) - * For (3), we use interrupts at 64Hz or user specified periodic - * frequency, whichever is higher. - */ -#include <linux/rtc.h> - -#define DEFAULT_RTC_INT_FREQ 64 -#define RTC_NUM_INTS 1 - -static unsigned long UIE_on; -static unsigned long prev_update_sec; - -static unsigned long AIE_on; -static struct rtc_time alarm_time; - -static unsigned long PIE_on; -static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ; -static unsigned long PIE_count; - -static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ -static unsigned int hpet_t1_cmp; /* cached comparator register */ - -int is_hpet_enabled(void) -{ - return hpet_address != 0; -} - -/* - * Timer 1 for RTC, we do not use periodic interrupt feature, - * even if HPET supports periodic interrupts on Timer 1. - * The reason being, to set up a periodic interrupt in HPET, we need to - * stop the main counter. And if we do that everytime someone diables/enables - * RTC, we will have adverse effect on main kernel timer running on Timer 0. - * So, for the time being, simulate the periodic interrupt in software. - * - * hpet_rtc_timer_init() is called for the first time and during subsequent - * interuppts reinit happens through hpet_rtc_timer_reinit(). - */ -int hpet_rtc_timer_init(void) -{ - unsigned int cfg, cnt; - unsigned long flags; - - if (!is_hpet_enabled()) - return 0; - /* - * Set the counter 1 and enable the interrupts. - */ - if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) - hpet_rtc_int_freq = PIE_freq; - else - hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; - - local_irq_save(flags); - - cnt = hpet_readl(HPET_COUNTER); - cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); - hpet_writel(cnt, HPET_T1_CMP); - hpet_t1_cmp = cnt; - - cfg = hpet_readl(HPET_T1_CFG); - cfg &= ~HPET_TN_PERIODIC; - cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T1_CFG); - - local_irq_restore(flags); - - return 1; -} - -static void hpet_rtc_timer_reinit(void) -{ - unsigned int cfg, cnt, ticks_per_int, lost_ints; - - if (unlikely(!(PIE_on | AIE_on | UIE_on))) { - cfg = hpet_readl(HPET_T1_CFG); - cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_T1_CFG); - return; - } - - if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) - hpet_rtc_int_freq = PIE_freq; - else - hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; - - /* It is more accurate to use the comparator value than current count.*/ - ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq; - hpet_t1_cmp += ticks_per_int; - hpet_writel(hpet_t1_cmp, HPET_T1_CMP); - - /* - * If the interrupt handler was delayed too long, the write above tries - * to schedule the next interrupt in the past and the hardware would - * not interrupt until the counter had wrapped around. - * So we have to check that the comparator wasn't set to a past time. - */ - cnt = hpet_readl(HPET_COUNTER); - if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) { - lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1; - /* Make sure that, even with the time needed to execute - * this code, the next scheduled interrupt has been moved - * back to the future: */ - lost_ints++; - - hpet_t1_cmp += lost_ints * ticks_per_int; - hpet_writel(hpet_t1_cmp, HPET_T1_CMP); - - if (PIE_on) - PIE_count += lost_ints; - - if (printk_ratelimit()) - printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", - hpet_rtc_int_freq); - } -} - -/* - * The functions below are called from rtc driver. - * Return 0 if HPET is not being used. - * Otherwise do the necessary changes and return 1. - */ -int hpet_mask_rtc_irq_bit(unsigned long bit_mask) -{ - if (!is_hpet_enabled()) - return 0; - - if (bit_mask & RTC_UIE) - UIE_on = 0; - if (bit_mask & RTC_PIE) - PIE_on = 0; - if (bit_mask & RTC_AIE) - AIE_on = 0; - - return 1; -} - -int hpet_set_rtc_irq_bit(unsigned long bit_mask) -{ - int timer_init_reqd = 0; - - if (!is_hpet_enabled()) - return 0; - - if (!(PIE_on | AIE_on | UIE_on)) - timer_init_reqd = 1; - - if (bit_mask & RTC_UIE) { - UIE_on = 1; - } - if (bit_mask & RTC_PIE) { - PIE_on = 1; - PIE_count = 0; - } - if (bit_mask & RTC_AIE) { - AIE_on = 1; - } - - if (timer_init_reqd) - hpet_rtc_timer_init(); - - return 1; -} - -int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec) -{ - if (!is_hpet_enabled()) - return 0; - - alarm_time.tm_hour = hrs; - alarm_time.tm_min = min; - alarm_time.tm_sec = sec; - - return 1; -} - -int hpet_set_periodic_freq(unsigned long freq) -{ - if (!is_hpet_enabled()) - return 0; - - PIE_freq = freq; - PIE_count = 0; - - return 1; -} - -int hpet_rtc_dropped_irq(void) -{ - if (!is_hpet_enabled()) - return 0; - - return 1; -} - -irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) -{ - struct rtc_time curr_time; - unsigned long rtc_int_flag = 0; - int call_rtc_interrupt = 0; - - hpet_rtc_timer_reinit(); - - if (UIE_on | AIE_on) { - rtc_get_rtc_time(&curr_time); - } - if (UIE_on) { - if (curr_time.tm_sec != prev_update_sec) { - /* Set update int info, call real rtc int routine */ - call_rtc_interrupt = 1; - rtc_int_flag = RTC_UF; - prev_update_sec = curr_time.tm_sec; - } - } - if (PIE_on) { - PIE_count++; - if (PIE_count >= hpet_rtc_int_freq/PIE_freq) { - /* Set periodic int info, call real rtc int routine */ - call_rtc_interrupt = 1; - rtc_int_flag |= RTC_PF; - PIE_count = 0; - } - } - if (AIE_on) { - if ((curr_time.tm_sec == alarm_time.tm_sec) && - (curr_time.tm_min == alarm_time.tm_min) && - (curr_time.tm_hour == alarm_time.tm_hour)) { - /* Set alarm int info, call real rtc int routine */ - call_rtc_interrupt = 1; - rtc_int_flag |= RTC_AF; - } - } - if (call_rtc_interrupt) { - rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); - rtc_interrupt(rtc_int_flag, dev_id); - } - return IRQ_HANDLED; -} -#endif - -static int __init nohpet_setup(char *s) -{ - nohpet = 1; - return 1; -} - -__setup("nohpet", nohpet_setup); diff --git a/arch/x86/kernel/i387_32.c b/arch/x86/kernel/i387_32.c index 665847281ed..7d2e12f6c78 100644 --- a/arch/x86/kernel/i387_32.c +++ b/arch/x86/kernel/i387_32.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/i387.c - * * Copyright (C) 1994 Linus Torvalds * * Pentium III FXSR, SSE support diff --git a/arch/x86/kernel/i387_64.c b/arch/x86/kernel/i387_64.c index 1d58c13bc6b..56c1f114710 100644 --- a/arch/x86/kernel/i387_64.c +++ b/arch/x86/kernel/i387_64.c @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/i387.c - * * Copyright (C) 1994 Linus Torvalds * Copyright (C) 2002 Andi Kleen, SuSE Labs * diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index 6f508e8d7c5..29313832df0 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c @@ -1,5 +1,5 @@ /* - * i8237.c: 8237A DMA controller suspend functions. + * 8237A DMA controller suspend functions. * * Written by Pierre Ossman, 2005. * diff --git a/arch/x86/kernel/i8253_32.c b/arch/x86/kernel/i8253.c index 6d839f2f1b1..5cc8841ca2c 100644 --- a/arch/x86/kernel/i8253_32.c +++ b/arch/x86/kernel/i8253.c @@ -1,5 +1,5 @@ /* - * i8253.c 8253/PIT functions + * 8253/PIT functions * */ #include <linux/clockchips.h> @@ -13,7 +13,6 @@ #include <asm/delay.h> #include <asm/i8253.h> #include <asm/io.h> -#include <asm/timer.h> DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); @@ -120,6 +119,7 @@ void __init setup_pit_timer(void) global_clock_event = &pit_clockevent; } +#ifndef CONFIG_X86_64 /* * Since the PIT overflows every tick, its not very useful * to just read by itself. So use jiffies to emulate a free @@ -204,3 +204,5 @@ static int __init init_pit_clocksource(void) return clocksource_register(&clocksource_pit); } arch_initcall(init_pit_clocksource); + +#endif diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c index 0499cbe9871..679bb33acbf 100644 --- a/arch/x86/kernel/i8259_32.c +++ b/arch/x86/kernel/i8259_32.c @@ -10,7 +10,6 @@ #include <linux/sysdev.h> #include <linux/bitops.h> -#include <asm/8253pit.h> #include <asm/atomic.h> #include <asm/system.h> #include <asm/io.h> diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index 948cae64609..eb72976cc13 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -444,46 +444,6 @@ void __init init_ISA_irqs (void) } } -static void setup_timer_hardware(void) -{ - outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ - udelay(10); - outb_p(LATCH & 0xff , 0x40); /* LSB */ - udelay(10); - outb(LATCH >> 8 , 0x40); /* MSB */ -} - -static int timer_resume(struct sys_device *dev) -{ - setup_timer_hardware(); - return 0; -} - -void i8254_timer_resume(void) -{ - setup_timer_hardware(); -} - -static struct sysdev_class timer_sysclass = { - set_kset_name("timer_pit"), - .resume = timer_resume, -}; - -static struct sys_device device_timer = { - .id = 0, - .cls = &timer_sysclass, -}; - -static int __init init_timer_sysfs(void) -{ - int error = sysdev_class_register(&timer_sysclass); - if (!error) - error = sysdev_register(&device_timer); - return error; -} - -device_initcall(init_timer_sysfs); - void __init init_IRQ(void) { int i; @@ -533,12 +493,6 @@ void __init init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - /* - * Set the clock to HZ Hz, we already have a valid - * vector now: - */ - setup_timer_hardware(); - if (!acpi_ioapic) setup_irq(2, &irq2); } diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index e2f4a1c6854..4ee1e5ee9b5 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -378,7 +378,7 @@ static struct irq_cpu_info { #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) static cpumask_t balance_irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL @@ -598,7 +598,7 @@ tryanotherirq: * (A+B)/2 vs B */ load = CPU_IRQ(min_loaded) >> 1; - for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { + for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) { if (load > CPU_IRQ(j)) { /* This won't change cpu_sibling_map[min_loaded] */ load = CPU_IRQ(j); diff --git a/arch/x86/kernel/ioport_32.c b/arch/x86/kernel/ioport_32.c index 3d310a946d7..4ed48dc8df1 100644 --- a/arch/x86/kernel/ioport_32.c +++ b/arch/x86/kernel/ioport_32.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/ioport.c - * * This contains the io-permission bitmap code - written by obz, with changes * by Linus. */ diff --git a/arch/x86/kernel/ioport_64.c b/arch/x86/kernel/ioport_64.c index 653efa30b0f..5f62fad64da 100644 --- a/arch/x86/kernel/ioport_64.c +++ b/arch/x86/kernel/ioport_64.c @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/ioport.c - * * This contains the io-permission bitmap code - written by obz, with changes * by Linus. */ diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 4f681bcdb1f..e173b763f14 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/irq.c - * * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar * * This file contains the lowest level x86-specific interrupt diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index bd11e42b22b..865669efc54 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/irq.c - * * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar * * This file contains the lowest level x86_64-specific interrupt diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c index 448a50b1324..90f778c04b3 100644 --- a/arch/x86/kernel/kprobes_32.c +++ b/arch/x86/kernel/kprobes_32.c @@ -1,6 +1,5 @@ /* * Kernel Probes (KProbes) - * arch/i386/kernel/kprobes.c * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -42,6 +41,13 @@ void jprobe_return_end(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +struct kretprobe_blackpoint kretprobe_blacklist[] = { + {"__switch_to", }, /* This function switches only current task, but + doesn't switch kernel stack.*/ + {NULL, NULL} /* Terminator */ +}; +const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); + /* insert a jmp code */ static __always_inline void set_jmp_op(void *from, void *to) { @@ -558,6 +564,12 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) resume_execution(cur, regs, kcb); regs->eflags |= kcb->kprobe_saved_eflags; +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT + if (raw_irqs_disabled_flags(regs->eflags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +#endif /*Restore back the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -579,7 +591,7 @@ out: return 1; } -static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -661,7 +673,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_GPF: - case DIE_PAGE_FAULT: /* kprobe_running() needs smp_processor_id() */ preempt_disable(); if (kprobe_running() && @@ -695,6 +706,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); regs->eflags &= ~IF_MASK; + trace_hardirqs_off(); regs->eip = (unsigned long)(jp->entry); return 1; } diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c index a30e004682e..681b801c5e2 100644 --- a/arch/x86/kernel/kprobes_64.c +++ b/arch/x86/kernel/kprobes_64.c @@ -1,6 +1,5 @@ /* * Kernel Probes (KProbes) - * arch/x86_64/kernel/kprobes.c * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -49,6 +48,13 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +struct kretprobe_blackpoint kretprobe_blacklist[] = { + {"__switch_to", }, /* This function switches only current task, but + doesn't switch kernel stack.*/ + {NULL, NULL} /* Terminator */ +}; +const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); + /* * returns non-zero if opcode modifies the interrupt flag. */ @@ -545,6 +551,12 @@ int __kprobes post_kprobe_handler(struct pt_regs *regs) resume_execution(cur, regs, kcb); regs->eflags |= kcb->kprobe_saved_rflags; +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT + if (raw_irqs_disabled_flags(regs->eflags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +#endif /* Restore the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -652,7 +664,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_GPF: - case DIE_PAGE_FAULT: /* kprobe_running() needs smp_processor_id() */ preempt_disable(); if (kprobe_running() && @@ -685,6 +696,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); regs->eflags &= ~IF_MASK; + trace_hardirqs_off(); regs->rip = (unsigned long)(jp->entry); return 1; } diff --git a/arch/x86/kernel/ldt_32.c b/arch/x86/kernel/ldt_32.c index e0b2d17f4f1..a8b18421863 100644 --- a/arch/x86/kernel/ldt_32.c +++ b/arch/x86/kernel/ldt_32.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/ldt.c - * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> */ diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt_64.c index bc9ffd5c19c..3796523d616 100644 --- a/arch/x86/kernel/ldt_64.c +++ b/arch/x86/kernel/ldt_64.c @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/ldt.c - * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> * Copyright (C) 2002 Andi Kleen diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 91966bafb3d..deda9a221cf 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -1,5 +1,5 @@ /* - * machine_kexec.c - handle transition of Linux booting another kernel + * handle transition of Linux booting another kernel * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> * * This source code is licensed under the GNU General Public License, diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index c3a55470367..cd1899a2f0c 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -1,5 +1,5 @@ /* - * machine_kexec.c - handle transition of Linux booting another kernel + * handle transition of Linux booting another kernel * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> * * This source code is licensed under the GNU General Public License, diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c index b83672b8952..9482033ed0f 100644 --- a/arch/x86/kernel/mca_32.c +++ b/arch/x86/kernel/mca_32.c @@ -1,5 +1,4 @@ /* - * linux/arch/i386/kernel/mca.c * Written by Martin Kolinek, February 1996 * * Changes: diff --git a/arch/x86/kernel/mce_amd_64.c b/arch/x86/kernel/mce_amd_64.c index 2f8a7f18b0f..805b62b1e0d 100644 --- a/arch/x86/kernel/mce_amd_64.c +++ b/arch/x86/kernel/mce_amd_64.c @@ -472,7 +472,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifdef CONFIG_SMP if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) { /* symlink */ - i = first_cpu(cpu_core_map[cpu]); + i = first_cpu(per_cpu(cpu_core_map, cpu)); /* first core not up yet */ if (cpu_data[i].cpu_core_id) @@ -492,7 +492,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out; - b->cpus = cpu_core_map[cpu]; + b->cpus = per_cpu(cpu_core_map, cpu); per_cpu(threshold_banks, cpu)[bank] = b; goto out; } @@ -509,7 +509,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifndef CONFIG_SMP b->cpus = CPU_MASK_ALL; #else - b->cpus = cpu_core_map[cpu]; + b->cpus = per_cpu(cpu_core_map, cpu); #endif err = kobject_register(&b->kobj); if (err) diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c new file mode 100644 index 00000000000..0ab680f2d9d --- /dev/null +++ b/arch/x86/kernel/mfgpt_32.c @@ -0,0 +1,362 @@ +/* + * Driver/API for AMD Geode Multi-Function General Purpose Timers (MFGPT) + * + * Copyright (C) 2006, Advanced Micro Devices, Inc. + * Copyright (C) 2007, Andres Salomon <dilinger@debian.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book. + */ + +/* + * We are using the 32Khz input clock - its the only one that has the + * ranges we find desirable. The following table lists the suitable + * divisors and the associated hz, minimum interval + * and the maximum interval: + * + * Divisor Hz Min Delta (S) Max Delta (S) + * 1 32000 .0005 2.048 + * 2 16000 .001 4.096 + * 4 8000 .002 8.192 + * 8 4000 .004 16.384 + * 16 2000 .008 32.768 + * 32 1000 .016 65.536 + * 64 500 .032 131.072 + * 128 250 .064 262.144 + * 256 125 .128 524.288 + */ + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <asm/geode.h> + +#define F_AVAIL 0x01 + +static struct mfgpt_timer_t { + int flags; + struct module *owner; +} mfgpt_timers[MFGPT_MAX_TIMERS]; + +/* Selected from the table above */ + +#define MFGPT_DIVISOR 16 +#define MFGPT_SCALE 4 /* divisor = 2^(scale) */ +#define MFGPT_HZ (32000 / MFGPT_DIVISOR) +#define MFGPT_PERIODIC (MFGPT_HZ / HZ) + +#ifdef CONFIG_GEODE_MFGPT_TIMER +static int __init mfgpt_timer_setup(void); +#else +#define mfgpt_timer_setup() (0) +#endif + +/* Allow for disabling of MFGPTs */ +static int disable; +static int __init mfgpt_disable(char *s) +{ + disable = 1; + return 1; +} +__setup("nomfgpt", mfgpt_disable); + +/* + * Check whether any MFGPTs are available for the kernel to use. In most + * cases, firmware that uses AMD's VSA code will claim all timers during + * bootup; we certainly don't want to take them if they're already in use. + * In other cases (such as with VSAless OpenFirmware), the system firmware + * leaves timers available for us to use. + */ +int __init geode_mfgpt_detect(void) +{ + int count = 0, i; + u16 val; + + if (disable) { + printk(KERN_INFO "geode-mfgpt: Skipping MFGPT setup\n"); + return 0; + } + + for (i = 0; i < MFGPT_MAX_TIMERS; i++) { + val = geode_mfgpt_read(i, MFGPT_REG_SETUP); + if (!(val & MFGPT_SETUP_SETUP)) { + mfgpt_timers[i].flags = F_AVAIL; + count++; + } + } + + /* set up clock event device, if desired */ + i = mfgpt_timer_setup(); + + return count; +} + +int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) +{ + u32 msr, mask, value, dummy; + int shift = (cmp == MFGPT_CMP1) ? 0 : 8; + + if (timer < 0 || timer >= MFGPT_MAX_TIMERS) + return -EIO; + + /* + * The register maps for these are described in sections 6.17.1.x of + * the AMD Geode CS5536 Companion Device Data Book. + */ + switch (event) { + case MFGPT_EVENT_RESET: + /* + * XXX: According to the docs, we cannot reset timers above + * 6; that is, resets for 7 and 8 will be ignored. Is this + * a problem? -dilinger + */ + msr = MFGPT_NR_MSR; + mask = 1 << (timer + 24); + break; + + case MFGPT_EVENT_NMI: + msr = MFGPT_NR_MSR; + mask = 1 << (timer + shift); + break; + + case MFGPT_EVENT_IRQ: + msr = MFGPT_IRQ_MSR; + mask = 1 << (timer + shift); + break; + + default: + return -EIO; + } + + rdmsr(msr, value, dummy); + + if (enable) + value |= mask; + else + value &= ~mask; + + wrmsr(msr, value, dummy); + return 0; +} + +int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) +{ + u32 val, dummy; + int offset; + + if (timer < 0 || timer >= MFGPT_MAX_TIMERS) + return -EIO; + + if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) + return -EIO; + + rdmsr(MSR_PIC_ZSEL_LOW, val, dummy); + + offset = (timer % 4) * 4; + + val &= ~((0xF << offset) | (0xF << (offset + 16))); + + if (enable) { + val |= (irq & 0x0F) << (offset); + val |= (irq & 0x0F) << (offset + 16); + } + + wrmsr(MSR_PIC_ZSEL_LOW, val, dummy); + return 0; +} + +static int mfgpt_get(int timer, struct module *owner) +{ + mfgpt_timers[timer].flags &= ~F_AVAIL; + mfgpt_timers[timer].owner = owner; + printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer); + return timer; +} + +int geode_mfgpt_alloc_timer(int timer, int domain, struct module *owner) +{ + int i; + + if (!geode_get_dev_base(GEODE_DEV_MFGPT)) + return -ENODEV; + if (timer >= MFGPT_MAX_TIMERS) + return -EIO; + + if (timer < 0) { + /* Try to find an available timer */ + for (i = 0; i < MFGPT_MAX_TIMERS; i++) { + if (mfgpt_timers[i].flags & F_AVAIL) + return mfgpt_get(i, owner); + + if (i == 5 && domain == MFGPT_DOMAIN_WORKING) + break; + } + } else { + /* If they requested a specific timer, try to honor that */ + if (mfgpt_timers[timer].flags & F_AVAIL) + return mfgpt_get(timer, owner); + } + + /* No timers available - too bad */ + return -1; +} + + +#ifdef CONFIG_GEODE_MFGPT_TIMER + +/* + * The MFPGT timers on the CS5536 provide us with suitable timers to use + * as clock event sources - not as good as a HPET or APIC, but certainly + * better then the PIT. This isn't a general purpose MFGPT driver, but + * a simplified one designed specifically to act as a clock event source. + * For full details about the MFGPT, please consult the CS5536 data sheet. + */ + +#include <linux/clocksource.h> +#include <linux/clockchips.h> + +static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; +static u16 mfgpt_event_clock; + +static int irq = 7; +static int __init mfgpt_setup(char *str) +{ + get_option(&str, &irq); + return 1; +} +__setup("mfgpt_irq=", mfgpt_setup); + +static inline void mfgpt_disable_timer(u16 clock) +{ + u16 val = geode_mfgpt_read(clock, MFGPT_REG_SETUP); + geode_mfgpt_write(clock, MFGPT_REG_SETUP, val & ~MFGPT_SETUP_CNTEN); +} + +static int mfgpt_next_event(unsigned long, struct clock_event_device *); +static void mfgpt_set_mode(enum clock_event_mode, struct clock_event_device *); + +static struct clock_event_device mfgpt_clockevent = { + .name = "mfgpt-timer", + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, + .set_mode = mfgpt_set_mode, + .set_next_event = mfgpt_next_event, + .rating = 250, + .cpumask = CPU_MASK_ALL, + .shift = 32 +}; + +static inline void mfgpt_start_timer(u16 clock, u16 delta) +{ + geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta); + geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0); + + geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, + MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2); +} + +static void mfgpt_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + mfgpt_disable_timer(mfgpt_event_clock); + + if (mode == CLOCK_EVT_MODE_PERIODIC) + mfgpt_start_timer(mfgpt_event_clock, MFGPT_PERIODIC); + + mfgpt_tick_mode = mode; +} + +static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt) +{ + mfgpt_start_timer(mfgpt_event_clock, delta); + return 0; +} + +/* Assume (foolishly?), that this interrupt was due to our tick */ + +static irqreturn_t mfgpt_tick(int irq, void *dev_id) +{ + if (mfgpt_tick_mode == CLOCK_EVT_MODE_SHUTDOWN) + return IRQ_HANDLED; + + /* Turn off the clock */ + mfgpt_disable_timer(mfgpt_event_clock); + + /* Clear the counter */ + geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0); + + /* Restart the clock in periodic mode */ + + if (mfgpt_tick_mode == CLOCK_EVT_MODE_PERIODIC) { + geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, + MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2); + } + + mfgpt_clockevent.event_handler(&mfgpt_clockevent); + return IRQ_HANDLED; +} + +static struct irqaction mfgptirq = { + .handler = mfgpt_tick, + .flags = IRQF_DISABLED | IRQF_NOBALANCING, + .mask = CPU_MASK_NONE, + .name = "mfgpt-timer" +}; + +static int __init mfgpt_timer_setup(void) +{ + int timer, ret; + u16 val; + + timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING, + THIS_MODULE); + if (timer < 0) { + printk(KERN_ERR + "mfgpt-timer: Could not allocate a MFPGT timer\n"); + return -ENODEV; + } + + mfgpt_event_clock = timer; + /* Set the clock scale and enable the event mode for CMP2 */ + val = MFGPT_SCALE | (3 << 8); + + geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val); + + /* Set up the IRQ on the MFGPT side */ + if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) { + printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); + return -EIO; + } + + /* And register it with the kernel */ + ret = setup_irq(irq, &mfgptirq); + + if (ret) { + printk(KERN_ERR + "mfgpt-timer: Unable to set up the interrupt.\n"); + goto err; + } + + /* Set up the clock event */ + mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, 32); + mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF, + &mfgpt_clockevent); + mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE, + &mfgpt_clockevent); + + printk(KERN_INFO + "mfgpt-timer: registering the MFGT timer as a clock event.\n"); + clockevents_register_device(&mfgpt_clockevent); + + return 0; + +err: + geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq); + printk(KERN_ERR + "mfgpt-timer: Unable to set up the MFGPT clock source\n"); + return -EIO; +} + +#endif diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 0c1069b8d63..c044de310b6 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -11,8 +11,6 @@ * ----------------------------------------------------------------------- */ /* - * msr.c - * * x86 MSR access device * * This device is accessed by lseek() to the appropriate register number diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index c7227e2180f..f803ed0ed1c 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/nmi.c - * * NMI watchdog support on APIC systems * * Started by Ingo Molnar <mingo@redhat.com> @@ -353,7 +351,8 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) * Take the local apic timer and PIT/HPET into account. We don't * know which one is active, when we have highres/dyntick on */ - sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_cpu(cpu).irqs[0]; + sum = per_cpu(irq_stat, cpu).apic_timer_irqs + + per_cpu(irq_stat, cpu).irq0_irqs; /* if the none of the timers isn't firing, this cpu isn't doing much */ if (!touched && last_irq_sums[cpu] == sum) { diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 0ec6d2ddb93..a576fd74006 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/nmi.c - * * NMI watchdog support on APIC systems * * Started by Ingo Molnar <mingo@redhat.com> @@ -329,7 +327,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) touched = 1; } - sum = read_pda(apic_timer_irqs); + sum = read_pda(apic_timer_irqs) + read_pda(irq0_irqs); if (__get_cpu_var(nmi_touch)) { __get_cpu_var(nmi_touch) = 0; touched = 1; diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 71da01e73f0..a50b787b3bf 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -35,6 +35,7 @@ #include <linux/pci_ids.h> #include <linux/pci.h> #include <linux/delay.h> +#include <linux/scatterlist.h> #include <asm/iommu.h> #include <asm/calgary.h> #include <asm/tce.h> @@ -384,31 +385,32 @@ static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, int direction) { struct iommu_table *tbl = find_iommu_table(dev); + struct scatterlist *s; + int i; if (!translate_phb(to_pci_dev(dev))) return; - while (nelems--) { + for_each_sg(sglist, s, nelems, i) { unsigned int npages; - dma_addr_t dma = sglist->dma_address; - unsigned int dmalen = sglist->dma_length; + dma_addr_t dma = s->dma_address; + unsigned int dmalen = s->dma_length; if (dmalen == 0) break; npages = num_dma_pages(dma, dmalen); iommu_free(tbl, dma, npages); - sglist++; } } static int calgary_nontranslate_map_sg(struct device* dev, struct scatterlist *sg, int nelems, int direction) { + struct scatterlist *s; int i; - for (i = 0; i < nelems; i++ ) { - struct scatterlist *s = &sg[i]; + for_each_sg(sg, s, nelems, i) { BUG_ON(!s->page); s->dma_address = virt_to_bus(page_address(s->page) +s->offset); s->dma_length = s->length; @@ -420,6 +422,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, int nelems, int direction) { struct iommu_table *tbl = find_iommu_table(dev); + struct scatterlist *s; unsigned long vaddr; unsigned int npages; unsigned long entry; @@ -428,8 +431,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, if (!translate_phb(to_pci_dev(dev))) return calgary_nontranslate_map_sg(dev, sg, nelems, direction); - for (i = 0; i < nelems; i++ ) { - struct scatterlist *s = &sg[i]; + for_each_sg(sg, s, nelems, i) { BUG_ON(!s->page); vaddr = (unsigned long)page_address(s->page) + s->offset; @@ -454,9 +456,9 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, return nelems; error: calgary_unmap_sg(dev, sg, nelems, direction); - for (i = 0; i < nelems; i++) { - sg[i].dma_address = bad_dma_address; - sg[i].dma_length = 0; + for_each_sg(sg, s, nelems, i) { + sg->dma_address = bad_dma_address; + sg->dma_length = 0; } return 0; } diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 048f09b6255..0aae2f3847a 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -63,7 +63,8 @@ void dma_free_coherent(struct device *dev, size_t size, { struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); - + + WARN_ON(irqs_disabled()); /* for portability */ if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 29711445c81..9576a2eb375 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -167,6 +167,7 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t bus) { + WARN_ON(irqs_disabled()); /* for portability */ if (dma_ops->unmap_single) dma_ops->unmap_single(dev, bus, size, 0); free_pages((unsigned long)vaddr, get_order(size)); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 4918c575d58..cfcc84e6c35 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -23,6 +23,7 @@ #include <linux/interrupt.h> #include <linux/bitops.h> #include <linux/kdebug.h> +#include <linux/scatterlist.h> #include <asm/atomic.h> #include <asm/io.h> #include <asm/mtrr.h> @@ -278,10 +279,10 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, */ static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) { + struct scatterlist *s; int i; - for (i = 0; i < nents; i++) { - struct scatterlist *s = &sg[i]; + for_each_sg(sg, s, nents, i) { if (!s->dma_length || !s->length) break; gart_unmap_single(dev, s->dma_address, s->dma_length, dir); @@ -292,14 +293,14 @@ static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, int nents, int dir) { + struct scatterlist *s; int i; #ifdef CONFIG_IOMMU_DEBUG printk(KERN_DEBUG "dma_map_sg overflow\n"); #endif - for (i = 0; i < nents; i++ ) { - struct scatterlist *s = &sg[i]; + for_each_sg(sg, s, nents, i) { unsigned long addr = page_to_phys(s->page) + s->offset; if (nonforced_iommu(dev, addr, s->length)) { addr = dma_map_area(dev, addr, s->length, dir); @@ -319,23 +320,23 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, } /* Map multiple scatterlist entries continuous into the first. */ -static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, +static int __dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { unsigned long iommu_start = alloc_iommu(pages); unsigned long iommu_page = iommu_start; + struct scatterlist *s; int i; if (iommu_start == -1) return -1; - - for (i = start; i < stopat; i++) { - struct scatterlist *s = &sg[i]; + + for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; - BUG_ON(i > start && s->offset); - if (i == start) { + BUG_ON(s != start && s->offset); + if (s == start) { *sout = *s; sout->dma_address = iommu_bus_base; sout->dma_address += iommu_page*PAGE_SIZE + s->offset; @@ -357,17 +358,17 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, return 0; } -static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, +static inline int dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages, int need) { - if (!need) { - BUG_ON(stopat - start != 1); - *sout = sg[start]; - sout->dma_length = sg[start].length; + if (!need) { + BUG_ON(nelems != 1); + *sout = *start; + sout->dma_length = start->length; return 0; - } - return __dma_map_cont(sg, start, stopat, sout, pages); + } + return __dma_map_cont(start, nelems, sout, pages); } /* @@ -381,6 +382,7 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) int start; unsigned long pages = 0; int need = 0, nextneed; + struct scatterlist *s, *ps, *start_sg, *sgmap; if (nents == 0) return 0; @@ -390,8 +392,9 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) out = 0; start = 0; - for (i = 0; i < nents; i++) { - struct scatterlist *s = &sg[i]; + start_sg = sgmap = sg; + ps = NULL; /* shut up gcc */ + for_each_sg(sg, s, nents, i) { dma_addr_t addr = page_to_phys(s->page) + s->offset; s->dma_address = addr; BUG_ON(s->length == 0); @@ -400,29 +403,33 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) /* Handle the previous not yet processed entries */ if (i > start) { - struct scatterlist *ps = &sg[i-1]; /* Can only merge when the last chunk ends on a page boundary and the new one doesn't have an offset. */ if (!iommu_merge || !nextneed || !need || s->offset || - (ps->offset + ps->length) % PAGE_SIZE) { - if (dma_map_cont(sg, start, i, sg+out, pages, - need) < 0) + (ps->offset + ps->length) % PAGE_SIZE) { + if (dma_map_cont(start_sg, i - start, sgmap, + pages, need) < 0) goto error; out++; + sgmap = sg_next(sgmap); pages = 0; - start = i; + start = i; + start_sg = s; } } need = nextneed; pages += to_pages(s->offset, s->length); + ps = s; } - if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) + if (dma_map_cont(start_sg, i - start, sgmap, pages, need) < 0) goto error; out++; flush_gart(); - if (out < nents) - sg[out].dma_length = 0; + if (out < nents) { + sgmap = sg_next(sgmap); + sgmap->dma_length = 0; + } return out; error: @@ -437,8 +444,8 @@ error: if (panic_on_overflow) panic("dma_map_sg: overflow on %lu pages\n", pages); iommu_full(dev, pages << PAGE_SHIFT, dir); - for (i = 0; i < nents; i++) - sg[i].dma_address = bad_dma_address; + for_each_sg(sg, s, nents, i) + s->dma_address = bad_dma_address; return 0; } diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index 2a34c6c025a..e85d4360360 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -5,6 +5,7 @@ #include <linux/pci.h> #include <linux/string.h> #include <linux/dma-mapping.h> +#include <linux/scatterlist.h> #include <asm/iommu.h> #include <asm/processor.h> @@ -57,10 +58,10 @@ static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size, static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction) { + struct scatterlist *s; int i; - for (i = 0; i < nents; i++ ) { - struct scatterlist *s = &sg[i]; + for_each_sg(sg, s, nents, i) { BUG_ON(!s->page); s->dma_address = virt_to_bus(page_address(s->page) +s->offset); if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 84664710b78..097aeafce5f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/process.c - * * Copyright (C) 1995 Linus Torvalds * * Pentium III FXSR, SSE support diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 98956555450..6309b275cb9 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -1,6 +1,4 @@ /* - * linux/arch/x86-64/kernel/process.c - * * Copyright (C) 1995 Linus Torvalds * * Pentium III FXSR, SSE support @@ -38,6 +36,7 @@ #include <linux/notifier.h> #include <linux/kprobes.h> #include <linux/kdebug.h> +#include <linux/tick.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -208,6 +207,8 @@ void cpu_idle (void) if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; + tick_nohz_stop_sched_tick(); + rmb(); idle = pm_idle; if (!idle) @@ -228,6 +229,7 @@ void cpu_idle (void) __exit_idle(); } + tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); schedule(); preempt_disable(); @@ -579,7 +581,7 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, * * Kprobes not supported here. Set the probe on schedule instead. */ -__kprobes struct task_struct * +struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c index 7c1b92522e9..8622b9cd3e3 100644 --- a/arch/x86/kernel/ptrace_32.c +++ b/arch/x86/kernel/ptrace_32.c @@ -1,4 +1,3 @@ -/* ptrace.c */ /* By Ross Biro 1/23/92 */ /* * Pentium III FXSR, SSE support @@ -525,11 +524,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = 0; break; - case PTRACE_DETACH: - /* detach a process that was attached. */ - ret = ptrace_detach(child, data); - break; - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) { ret = -EIO; diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c index eea3702427b..86321ee6da9 100644 --- a/arch/x86/kernel/ptrace_64.c +++ b/arch/x86/kernel/ptrace_64.c @@ -1,4 +1,3 @@ -/* ptrace.c */ /* By Ross Biro 1/23/92 */ /* * Pentium III FXSR, SSE support @@ -501,11 +500,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = 0; break; - case PTRACE_DETACH: - /* detach a process that was attached. */ - ret = ptrace_detach(child, data); - break; - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ if (!access_ok(VERIFY_WRITE, (unsigned __user *)data, sizeof(struct user_regs_struct))) { diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 6722469c263..d769e204f94 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -4,6 +4,8 @@ #include <linux/pci.h> #include <linux/irq.h> +#include <asm/hpet.h> + #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) @@ -47,3 +49,206 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quir DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); #endif + +#if defined(CONFIG_HPET_TIMER) +unsigned long force_hpet_address; + +static enum { + NONE_FORCE_HPET_RESUME, + OLD_ICH_FORCE_HPET_RESUME, + ICH_FORCE_HPET_RESUME +} force_hpet_resume_type; + +static void __iomem *rcba_base; + +static void ich_force_hpet_resume(void) +{ + u32 val; + + if (!force_hpet_address) + return; + + if (rcba_base == NULL) + BUG(); + + /* read the Function Disable register, dword mode only */ + val = readl(rcba_base + 0x3404); + if (!(val & 0x80)) { + /* HPET disabled in HPTC. Trying to enable */ + writel(val | 0x80, rcba_base + 0x3404); + } + + val = readl(rcba_base + 0x3404); + if (!(val & 0x80)) + BUG(); + else + printk(KERN_DEBUG "Force enabled HPET at resume\n"); + + return; +} + +static void ich_force_enable_hpet(struct pci_dev *dev) +{ + u32 val; + u32 uninitialized_var(rcba); + int err = 0; + + if (hpet_address || force_hpet_address) + return; + + pci_read_config_dword(dev, 0xF0, &rcba); + rcba &= 0xFFFFC000; + if (rcba == 0) { + printk(KERN_DEBUG "RCBA disabled. Cannot force enable HPET\n"); + return; + } + + /* use bits 31:14, 16 kB aligned */ + rcba_base = ioremap_nocache(rcba, 0x4000); + if (rcba_base == NULL) { + printk(KERN_DEBUG "ioremap failed. Cannot force enable HPET\n"); + return; + } + + /* read the Function Disable register, dword mode only */ + val = readl(rcba_base + 0x3404); + + if (val & 0x80) { + /* HPET is enabled in HPTC. Just not reported by BIOS */ + val = val & 0x3; + force_hpet_address = 0xFED00000 | (val << 12); + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + iounmap(rcba_base); + return; + } + + /* HPET disabled in HPTC. Trying to enable */ + writel(val | 0x80, rcba_base + 0x3404); + + val = readl(rcba_base + 0x3404); + if (!(val & 0x80)) { + err = 1; + } else { + val = val & 0x3; + force_hpet_address = 0xFED00000 | (val << 12); + } + + if (err) { + force_hpet_address = 0; + iounmap(rcba_base); + printk(KERN_DEBUG "Failed to force enable HPET\n"); + } else { + force_hpet_resume_type = ICH_FORCE_HPET_RESUME; + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + } +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, + ich_force_enable_hpet); + + +static struct pci_dev *cached_dev; + +static void old_ich_force_hpet_resume(void) +{ + u32 val; + u32 uninitialized_var(gen_cntl); + + if (!force_hpet_address || !cached_dev) + return; + + pci_read_config_dword(cached_dev, 0xD0, &gen_cntl); + gen_cntl &= (~(0x7 << 15)); + gen_cntl |= (0x4 << 15); + + pci_write_config_dword(cached_dev, 0xD0, gen_cntl); + pci_read_config_dword(cached_dev, 0xD0, &gen_cntl); + val = gen_cntl >> 15; + val &= 0x7; + if (val == 0x4) + printk(KERN_DEBUG "Force enabled HPET at resume\n"); + else + BUG(); +} + +static void old_ich_force_enable_hpet(struct pci_dev *dev) +{ + u32 val; + u32 uninitialized_var(gen_cntl); + + if (hpet_address || force_hpet_address) + return; + + pci_read_config_dword(dev, 0xD0, &gen_cntl); + /* + * Bit 17 is HPET enable bit. + * Bit 16:15 control the HPET base address. + */ + val = gen_cntl >> 15; + val &= 0x7; + if (val & 0x4) { + val &= 0x3; + force_hpet_address = 0xFED00000 | (val << 12); + printk(KERN_DEBUG "HPET at base address 0x%lx\n", + force_hpet_address); + return; + } + + /* + * HPET is disabled. Trying enabling at FED00000 and check + * whether it sticks + */ + gen_cntl &= (~(0x7 << 15)); + gen_cntl |= (0x4 << 15); + pci_write_config_dword(dev, 0xD0, gen_cntl); + + pci_read_config_dword(dev, 0xD0, &gen_cntl); + + val = gen_cntl >> 15; + val &= 0x7; + if (val & 0x4) { + /* HPET is enabled in HPTC. Just not reported by BIOS */ + val &= 0x3; + force_hpet_address = 0xFED00000 | (val << 12); + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + cached_dev = dev; + force_hpet_resume_type = OLD_ICH_FORCE_HPET_RESUME; + return; + } + + printk(KERN_DEBUG "Failed to force enable HPET\n"); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, + old_ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12, + old_ich_force_enable_hpet); + +void force_hpet_resume(void) +{ + switch (force_hpet_resume_type) { + case ICH_FORCE_HPET_RESUME: + return ich_force_hpet_resume(); + + case OLD_ICH_FORCE_HPET_RESUME: + return old_ich_force_hpet_resume(); + + default: + break; + } +} + +#endif diff --git a/arch/x86/kernel/reboot_32.c b/arch/x86/kernel/reboot_32.c index b37ed226830..9e2269d0091 100644 --- a/arch/x86/kernel/reboot_32.c +++ b/arch/x86/kernel/reboot_32.c @@ -1,7 +1,3 @@ -/* - * linux/arch/i386/kernel/reboot.c - */ - #include <linux/mm.h> #include <linux/module.h> #include <linux/delay.h> diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index 03e1cce58f4..8b30b26ad06 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/reboot_fixups.c - * * This is a good place to put board specific reboot fixups. * * List of supported fixups: @@ -11,6 +9,7 @@ #include <asm/delay.h> #include <linux/pci.h> +#include <linux/interrupt.h> #include <asm/reboot_fixups.h> #include <asm/msr.h> @@ -56,6 +55,11 @@ void mach_reboot_fixups(void) struct pci_dev *dev; int i; + /* we can be called from sysrq-B code. In such a case it is + * prohibited to dig PCI */ + if (in_interrupt()) + return; + for (i=0; i < ARRAY_SIZE(fixups_table); i++) { cur = &(fixups_table[i]); dev = pci_get_device(cur->vendor, cur->device, NULL); diff --git a/arch/x86/kernel/scx200_32.c b/arch/x86/kernel/scx200_32.c index c7d3df23f58..87bc159d29d 100644 --- a/arch/x86/kernel/scx200_32.c +++ b/arch/x86/kernel/scx200_32.c @@ -1,8 +1,8 @@ -/* linux/arch/i386/kernel/scx200.c - - Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com> - - National Semiconductor SCx200 support. */ +/* + * Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com> + * + * National Semiconductor SCx200 support. + */ #include <linux/module.h> #include <linux/errno.h> @@ -24,7 +24,7 @@ MODULE_DESCRIPTION("NatSemi SCx200 Driver"); MODULE_LICENSE("GPL"); unsigned scx200_gpio_base = 0; -long scx200_gpio_shadow[2]; +unsigned long scx200_gpio_shadow[2]; unsigned scx200_cb_base = 0; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index d474cd639bc..c8e1bc38d42 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/setup.c - * * Copyright (C) 1995 Linus Torvalds * * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index af838f6b0b7..85b5b6310ac 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -1,10 +1,5 @@ /* - * linux/arch/x86-64/kernel/setup.c - * * Copyright (C) 1995 Linus Torvalds - * - * Nov 2001 Dave Jones <davej@suse.de> - * Forked from i386 setup code. */ /* @@ -546,6 +541,37 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) #endif } +#define ENABLE_C1E_MASK 0x18000000 +#define CPUID_PROCESSOR_SIGNATURE 1 +#define CPUID_XFAM 0x0ff00000 +#define CPUID_XFAM_K8 0x00000000 +#define CPUID_XFAM_10H 0x00100000 +#define CPUID_XFAM_11H 0x00200000 +#define CPUID_XMOD 0x000f0000 +#define CPUID_XMOD_REV_F 0x00040000 + +/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ +static __cpuinit int amd_apic_timer_broken(void) +{ + u32 lo, hi; + u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + switch (eax & CPUID_XFAM) { + case CPUID_XFAM_K8: + if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) + break; + case CPUID_XFAM_10H: + case CPUID_XFAM_11H: + rdmsr(MSR_K8_ENABLE_C1E, lo, hi); + if (lo & ENABLE_C1E_MASK) + return 1; + break; + default: + /* err on the side of caution */ + return 1; + } + return 0; +} + static void __cpuinit init_amd(struct cpuinfo_x86 *c) { unsigned level; @@ -617,6 +643,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* Family 10 doesn't support C states in MWAIT so don't use it */ if (c->x86 == 0x10 && !force_mwait) clear_bit(X86_FEATURE_MWAIT, &c->x86_capability); + + if (amd_apic_timer_broken()) + disable_apic_timer = 1; } static void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -1041,7 +1070,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (smp_num_siblings * c->x86_max_cores > 1) { int cpu = c - cpu_data; seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, cpu))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index c03570f7fe8..d01d51fcce2 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/signal.c - * * Copyright (C) 1991, 1992 Linus Torvalds * * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 739175b01e0..683802bec41 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/signal.c - * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs * diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index e4f61d1c624..31fc08bd15e 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -70,12 +70,12 @@ EXPORT_SYMBOL(smp_num_siblings); int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; /* representing HT siblings of each logical CPU */ -cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(cpu_sibling_map); +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); /* representing HT and core siblings of each logical CPU */ -cpumask_t cpu_core_map[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(cpu_core_map); +DEFINE_PER_CPU(cpumask_t, cpu_core_map); +EXPORT_PER_CPU_SYMBOL(cpu_core_map); /* bitmap of online cpus */ cpumask_t cpu_online_map __read_mostly; @@ -300,7 +300,7 @@ cpumask_t cpu_coregroup_map(int cpu) * And for power savings, we return cpu_core_map */ if (sched_mc_power_savings || sched_smt_power_savings) - return cpu_core_map[cpu]; + return per_cpu(cpu_core_map, cpu); else return c->llc_shared_map; } @@ -319,22 +319,22 @@ void __cpuinit set_cpu_sibling_map(int cpu) for_each_cpu_mask(i, cpu_sibling_setup_map) { if (c[cpu].phys_proc_id == c[i].phys_proc_id && c[cpu].cpu_core_id == c[i].cpu_core_id) { - cpu_set(i, cpu_sibling_map[cpu]); - cpu_set(cpu, cpu_sibling_map[i]); - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, per_cpu(cpu_sibling_map, cpu)); + cpu_set(cpu, per_cpu(cpu_sibling_map, i)); + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); cpu_set(i, c[cpu].llc_shared_map); cpu_set(cpu, c[i].llc_shared_map); } } } else { - cpu_set(cpu, cpu_sibling_map[cpu]); + cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); } cpu_set(cpu, c[cpu].llc_shared_map); if (current_cpu_data.x86_max_cores == 1) { - cpu_core_map[cpu] = cpu_sibling_map[cpu]; + per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); c[cpu].booted_cores = 1; return; } @@ -346,17 +346,17 @@ void __cpuinit set_cpu_sibling_map(int cpu) cpu_set(cpu, c[i].llc_shared_map); } if (c[cpu].phys_proc_id == c[i].phys_proc_id) { - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); /* * Does this new cpu bringup a new core? */ - if (cpus_weight(cpu_sibling_map[cpu]) == 1) { + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { /* * for each core in package, increment * the booted_cores for this new cpu */ - if (first_cpu(cpu_sibling_map[i]) == i) + if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) c[cpu].booted_cores++; /* * increment the core count for all @@ -983,8 +983,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) printk(KERN_NOTICE "Local APIC not detected." " Using dummy APIC emulation.\n"); map_cpu_to_logical_apicid(); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); return; } @@ -1008,8 +1008,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); smpboot_clear_io_apic_irqs(); phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); return; } @@ -1023,8 +1023,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); smpboot_clear_io_apic_irqs(); phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); return; } @@ -1102,16 +1102,16 @@ static void __init smp_boot_cpus(unsigned int max_cpus) Dprintk("Boot done.\n"); /* - * construct cpu_sibling_map[], so that we can tell sibling CPUs + * construct cpu_sibling_map, so that we can tell sibling CPUs * efficiently. */ for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); } - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); smpboot_setup_io_apic(); @@ -1148,19 +1148,19 @@ void remove_siblinginfo(int cpu) int sibling; struct cpuinfo_x86 *c = cpu_data; - for_each_cpu_mask(sibling, cpu_core_map[cpu]) { - cpu_clear(cpu, cpu_core_map[sibling]); - /* + for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { + cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); + /*/ * last thread sibling in this cpu core going down */ - if (cpus_weight(cpu_sibling_map[cpu]) == 1) + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) c[sibling].booted_cores--; } - for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) - cpu_clear(cpu, cpu_sibling_map[sibling]); - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) + cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); c[cpu].phys_proc_id = 0; c[cpu].cpu_core_id = 0; cpu_clear(cpu, cpu_sibling_setup_map); diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 32f50783edc..0faa0a0af27 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -91,12 +91,12 @@ EXPORT_SYMBOL(cpu_data); int smp_threads_ready; /* representing HT siblings of each logical CPU */ -cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(cpu_sibling_map); +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); /* representing HT and core siblings of each logical CPU */ -cpumask_t cpu_core_map[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(cpu_core_map); +DEFINE_PER_CPU(cpumask_t, cpu_core_map); +EXPORT_PER_CPU_SYMBOL(cpu_core_map); /* * Trampoline 80x86 program as an array. @@ -223,8 +223,6 @@ void __cpuinit smp_callin(void) local_irq_disable(); Dprintk("Stack at about %p\n",&cpuid); - disable_APIC_timer(); - /* * Save our processor parameters */ @@ -245,7 +243,7 @@ cpumask_t cpu_coregroup_map(int cpu) * And for power savings, we return cpu_core_map */ if (sched_mc_power_savings || sched_smt_power_savings) - return cpu_core_map[cpu]; + return per_cpu(cpu_core_map, cpu); else return c->llc_shared_map; } @@ -264,22 +262,22 @@ static inline void set_cpu_sibling_map(int cpu) for_each_cpu_mask(i, cpu_sibling_setup_map) { if (c[cpu].phys_proc_id == c[i].phys_proc_id && c[cpu].cpu_core_id == c[i].cpu_core_id) { - cpu_set(i, cpu_sibling_map[cpu]); - cpu_set(cpu, cpu_sibling_map[i]); - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, per_cpu(cpu_sibling_map, cpu)); + cpu_set(cpu, per_cpu(cpu_sibling_map, i)); + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); cpu_set(i, c[cpu].llc_shared_map); cpu_set(cpu, c[i].llc_shared_map); } } } else { - cpu_set(cpu, cpu_sibling_map[cpu]); + cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); } cpu_set(cpu, c[cpu].llc_shared_map); if (current_cpu_data.x86_max_cores == 1) { - cpu_core_map[cpu] = cpu_sibling_map[cpu]; + per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); c[cpu].booted_cores = 1; return; } @@ -291,17 +289,17 @@ static inline void set_cpu_sibling_map(int cpu) cpu_set(cpu, c[i].llc_shared_map); } if (c[cpu].phys_proc_id == c[i].phys_proc_id) { - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); /* * Does this new cpu bringup a new core? */ - if (cpus_weight(cpu_sibling_map[cpu]) == 1) { + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { /* * for each core in package, increment * the booted_cores for this new cpu */ - if (first_cpu(cpu_sibling_map[i]) == i) + if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) c[cpu].booted_cores++; /* * increment the core count for all @@ -337,19 +335,12 @@ void __cpuinit start_secondary(void) */ check_tsc_sync_target(); - Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); - setup_secondary_APIC_clock(); - - Dprintk("cpu %d: enabling apic timer\n", smp_processor_id()); - if (nmi_watchdog == NMI_IO_APIC) { disable_8259A_irq(0); enable_NMI_through_LVT0(NULL); enable_8259A_irq(0); } - enable_APIC_timer(); - /* * The sibling maps must be set before turing the online map on for * this cpu @@ -378,6 +369,8 @@ void __cpuinit start_secondary(void) unlock_ipi_call_lock(); + setup_secondary_APIC_clock(); + cpu_idle(); } @@ -742,8 +735,8 @@ static __init void disable_smp(void) phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); else phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); } #ifdef CONFIG_HOTPLUG_CPU @@ -978,19 +971,19 @@ static void remove_siblinginfo(int cpu) int sibling; struct cpuinfo_x86 *c = cpu_data; - for_each_cpu_mask(sibling, cpu_core_map[cpu]) { - cpu_clear(cpu, cpu_core_map[sibling]); + for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { + cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); /* * last thread sibling in this cpu core going down */ - if (cpus_weight(cpu_sibling_map[cpu]) == 1) + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) c[sibling].booted_cores--; } - for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) - cpu_clear(cpu, cpu_sibling_map[sibling]); - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) + cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); c[cpu].phys_proc_id = 0; c[cpu].cpu_core_id = 0; cpu_clear(cpu, cpu_sibling_setup_map); diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index cb910911358..413e527cdeb 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -1,6 +1,4 @@ /* - * arch/x86_64/kernel/stacktrace.c - * * Stack trace management functions * * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index d0e01a3acf3..91c7acc8d99 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -1,5 +1,5 @@ /* - * arch/i386/kernel/summit.c - IBM Summit-Specific Code + * IBM Summit-Specific Code * * Written By: Matthew Dobson, IBM Corporation * diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 42147304de8..f8bae9ba032 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/sys_i386.c - * * This file contains various random system calls that * have a non-standard calling sequence on the Linux/i386 * platform. diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 4770b7a2052..907942ee6e7 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -1,7 +1,3 @@ -/* - * linux/arch/x86_64/kernel/sys_x86_64.c - */ - #include <linux/errno.h> #include <linux/sched.h> #include <linux/syscalls.h> diff --git a/arch/x86/kernel/sysenter_32.c b/arch/x86/kernel/sysenter_32.c index 4eb2e408764..5a2d951e260 100644 --- a/arch/x86/kernel/sysenter_32.c +++ b/arch/x86/kernel/sysenter_32.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/sysenter.c - * * (C) Copyright 2002 Linus Torvalds * Portions based on the vdso-randomization code from exec-shield: * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 19a6c678d02..8a322c96bc2 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/time.c - * * Copyright (C) 1991, 1992, 1995 Linus Torvalds * * This file contains the PC-specific time handling details: @@ -157,6 +155,9 @@ EXPORT_SYMBOL(profile_pc); */ irqreturn_t timer_interrupt(int irq, void *dev_id) { + /* Keep nmi watchdog up to date */ + per_cpu(irq_stat, smp_processor_id()).irq0_irqs++; + #ifdef CONFIG_X86_IO_APIC if (timer_ack) { /* diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 6d48a4e826d..c821edc3221 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -1,6 +1,4 @@ /* - * linux/arch/x86-64/kernel/time.c - * * "High Precision Event Timer" based timekeeping. * * Copyright (c) 1991,1992,1995 Linus Torvalds @@ -28,11 +26,12 @@ #include <linux/cpu.h> #include <linux/kallsyms.h> #include <linux/acpi.h> +#include <linux/clockchips.h> + #ifdef CONFIG_ACPI #include <acpi/achware.h> /* for PM timer frequency */ #include <acpi/acpi_bus.h> #endif -#include <asm/8253pit.h> #include <asm/i8253.h> #include <asm/pgtable.h> #include <asm/vsyscall.h> @@ -47,12 +46,8 @@ #include <asm/nmi.h> #include <asm/vgtod.h> -static char *timename = NULL; - DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); -DEFINE_SPINLOCK(i8253_lock); -EXPORT_SYMBOL(i8253_lock); volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; @@ -153,45 +148,12 @@ int update_persistent_clock(struct timespec now) return set_rtc_mmss(now.tv_sec); } -void main_timer_handler(void) +static irqreturn_t timer_event_interrupt(int irq, void *dev_id) { -/* - * Here we are in the timer irq handler. We have irqs locally disabled (so we - * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running - * on the other CPU, so we need a lock. We also need to lock the vsyscall - * variables, because both do_timer() and us change them -arca+vojtech - */ - - write_seqlock(&xtime_lock); - -/* - * Do the timer stuff. - */ + add_pda(irq0_irqs, 1); - do_timer(1); -#ifndef CONFIG_SMP - update_process_times(user_mode(get_irq_regs())); -#endif - -/* - * In the SMP case we use the local APIC timer interrupt to do the profiling, - * except when we simulate SMP mode on a uniprocessor system, in that case we - * have to call the local interrupt handler. - */ - - if (!using_apic_timer) - smp_local_timer_interrupt(); + global_clock_event->event_handler(global_clock_event); - write_sequnlock(&xtime_lock); -} - -static irqreturn_t timer_interrupt(int irq, void *dev_id) -{ - if (apic_runs_main_timer > 1) - return IRQ_HANDLED; - main_timer_handler(); - if (using_apic_timer) - smp_send_timer_broadcast_ipi(); return IRQ_HANDLED; } @@ -292,97 +254,21 @@ static unsigned int __init tsc_calibrate_cpu_khz(void) return pmc_now * tsc_khz / (tsc_now - tsc_start); } -/* - * pit_calibrate_tsc() uses the speaker output (channel 2) of - * the PIT. This is better than using the timer interrupt output, - * because we can read the value of the speaker with just one inb(), - * where we need three i/o operations for the interrupt channel. - * We count how many ticks the TSC does in 50 ms. - */ - -static unsigned int __init pit_calibrate_tsc(void) -{ - unsigned long start, end; - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); - - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - outb(0xb0, 0x43); - outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42); - start = get_cycles_sync(); - while ((inb(0x61) & 0x20) == 0); - end = get_cycles_sync(); - - spin_unlock_irqrestore(&i8253_lock, flags); - - return (end - start) / 50; -} - -#define PIT_MODE 0x43 -#define PIT_CH0 0x40 - -static void __pit_init(int val, u8 mode) -{ - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); - outb_p(mode, PIT_MODE); - outb_p(val & 0xff, PIT_CH0); /* LSB */ - outb_p(val >> 8, PIT_CH0); /* MSB */ - spin_unlock_irqrestore(&i8253_lock, flags); -} - -void __init pit_init(void) -{ - __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */ -} - -void pit_stop_interrupt(void) -{ - __pit_init(0, 0x30); /* mode 0 */ -} - -void stop_timer_interrupt(void) -{ - char *name; - if (hpet_address) { - name = "HPET"; - hpet_timer_stop_set_go(0); - } else { - name = "PIT"; - pit_stop_interrupt(); - } - printk(KERN_INFO "timer: %s interrupt stopped.\n", name); -} - static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_IRQPOLL, + .handler = timer_event_interrupt, + .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, .mask = CPU_MASK_NONE, .name = "timer" }; void __init time_init(void) { - if (nohpet) - hpet_address = 0; + if (!hpet_enable()) + setup_pit_timer(); - if (hpet_arch_init()) - hpet_address = 0; + setup_irq(0, &irq0); - if (hpet_use_timer) { - /* set tick_nsec to use the proper rate for HPET */ - tick_nsec = TICK_NSEC_HPET; - tsc_khz = hpet_calibrate_tsc(); - timename = "HPET"; - } else { - pit_init(); - tsc_khz = pit_calibrate_tsc(); - timename = "PIT"; - } + tsc_calibrate(); cpu_khz = tsc_khz; if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && @@ -398,50 +284,7 @@ void __init time_init(void) else vgetcpu_mode = VGETCPU_LSL; - set_cyc2ns_scale(tsc_khz); printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); init_tsc_clocksource(); - - setup_irq(0, &irq0); -} - -/* - * sysfs support for the timer. - */ - -static int timer_suspend(struct sys_device *dev, pm_message_t state) -{ - return 0; -} - -static int timer_resume(struct sys_device *dev) -{ - if (hpet_address) - hpet_reenable(); - else - i8254_timer_resume(); - return 0; } - -static struct sysdev_class timer_sysclass = { - .resume = timer_resume, - .suspend = timer_suspend, - set_kset_name("timer"), -}; - -/* XXX this sysfs stuff should probably go elsewhere later -john */ -static struct sys_device device_timer = { - .id = 0, - .cls = &timer_sysclass, -}; - -static int time_init_device(void) -{ - int error = sysdev_class_register(&timer_sysclass); - if (!error) - error = sysdev_register(&device_timer); - return error; -} - -device_initcall(time_init_device); diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 45782356a61..c25f23eb397 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -1,5 +1,5 @@ /* - * arch/i386/kernel/topology.c - Populate sysfs with topology information + * Populate sysfs with topology information * * Written by: Matthew Dobson, IBM Corporation * Original Code: Paul Dorwin, IBM Corporation, Patrick Mochel, OSDL diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 47b0bef335b..05c27ecaf2a 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/traps.c - * * Copyright (C) 1991, 1992 Linus Torvalds * * Pentium III FXSR, SSE support diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 03888420775..bc7116acf8f 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1,6 +1,4 @@ /* - * linux/arch/x86-64/traps.c - * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs * diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 3ed0ae8c918..b85ad754f70 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -1,9 +1,3 @@ -/* - * This code largely moved from arch/i386/kernel/timer/timer_tsc.c - * which was originally moved from arch/i386/kernel/time.c. - * See comments there for proper credits. - */ - #include <linux/sched.h> #include <linux/clocksource.h> #include <linux/workqueue.h> diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index 2a59bde663f..9f22e542c37 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -6,7 +6,9 @@ #include <linux/time.h> #include <linux/acpi.h> #include <linux/cpufreq.h> +#include <linux/acpi_pmtmr.h> +#include <asm/hpet.h> #include <asm/timex.h> static int notsc __initdata = 0; @@ -18,7 +20,7 @@ EXPORT_SYMBOL(tsc_khz); static unsigned int cyc2ns_scale __read_mostly; -void set_cyc2ns_scale(unsigned long khz) +static inline void set_cyc2ns_scale(unsigned long khz) { cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz; } @@ -118,6 +120,95 @@ core_initcall(cpufreq_tsc); #endif +#define MAX_RETRIES 5 +#define SMI_TRESHOLD 50000 + +/* + * Read TSC and the reference counters. Take care of SMI disturbance + */ +static unsigned long __init tsc_read_refs(unsigned long *pm, + unsigned long *hpet) +{ + unsigned long t1, t2; + int i; + + for (i = 0; i < MAX_RETRIES; i++) { + t1 = get_cycles_sync(); + if (hpet) + *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; + else + *pm = acpi_pm_read_early(); + t2 = get_cycles_sync(); + if ((t2 - t1) < SMI_TRESHOLD) + return t2; + } + return ULONG_MAX; +} + +/** + * tsc_calibrate - calibrate the tsc on boot + */ +void __init tsc_calibrate(void) +{ + unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2; + int hpet = is_hpet_enabled(); + + local_irq_save(flags); + + tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + outb(0xb0, 0x43); + outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); + outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + tr1 = get_cycles_sync(); + while ((inb(0x61) & 0x20) == 0); + tr2 = get_cycles_sync(); + + tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + + local_irq_restore(flags); + + /* + * Preset the result with the raw and inaccurate PIT + * calibration value + */ + tsc_khz = (tr2 - tr1) / 50; + + /* hpet or pmtimer available ? */ + if (!hpet && !pm1 && !pm2) { + printk(KERN_INFO "TSC calibrated against PIT\n"); + return; + } + + /* Check, whether the sampling was disturbed by an SMI */ + if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) { + printk(KERN_WARNING "TSC calibration disturbed by SMI, " + "using PIT calibration result\n"); + return; + } + + tsc2 = (tsc2 - tsc1) * 1000000L; + + if (hpet) { + printk(KERN_INFO "TSC calibrated against HPET\n"); + if (hpet2 < hpet1) + hpet2 += 0x100000000; + hpet2 -= hpet1; + tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000; + } else { + printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); + if (pm2 < pm1) + pm2 += ACPI_PM_OVRRUN; + pm2 -= pm1; + tsc1 = (pm2 * 1000000000) / PMTMR_TICKS_PER_SEC; + } + + tsc_khz = tsc2 / tsc1; + set_cyc2ns_scale(tsc_khz); +} + /* * Make an educated guess if the TSC is trustworthy and synchronized * over all CPUs. diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 355f5f506c8..9125efe66a0 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -1,5 +1,5 @@ /* - * arch/x86_64/kernel/tsc_sync.c: check TSC synchronization. + * check TSC synchronization. * * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar * diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index f2dcd1d27c0..157e4bedd3c 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -1,6 +1,4 @@ /* - * linux/kernel/vm86.c - * * Copyright (C) 1994 Linus Torvalds * * 29 dec 2001 - Fixed oopses caused by unchecked access to the vm86 diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 06c34949bfd..93847d84815 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/vsyscall.c - * * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE * Copyright 2003 Andi Kleen, SuSE Labs. * diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 4620efb12f1..5196762b3b0 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -117,6 +117,7 @@ ENTRY(__copy_user_nocache) popq %rbx CFI_ADJUST_CFA_OFFSET -8 CFI_RESTORE rbx + sfence ret CFI_RESTORE_STATE diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index 55e586d352d..6ea73f3de56 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -50,6 +50,10 @@ thunk trace_hardirqs_on_thunk,trace_hardirqs_on thunk trace_hardirqs_off_thunk,trace_hardirqs_off #endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + thunk lockdep_sys_exit_thunk,lockdep_sys_exit +#endif /* SAVE_ARGS below is used only for the .cfi directives it contains. */ CFI_STARTPROC diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c index fcb38e7f354..c686ae20fd6 100644 --- a/arch/x86/mm/fault_32.c +++ b/arch/x86/mm/fault_32.c @@ -25,6 +25,7 @@ #include <linux/kprobes.h> #include <linux/uaccess.h> #include <linux/kdebug.h> +#include <linux/kprobes.h> #include <asm/system.h> #include <asm/desc.h> @@ -32,33 +33,27 @@ extern void die(const char *,struct pt_regs *,long); -static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); - -int register_page_fault_notifier(struct notifier_block *nb) +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs) { - vmalloc_sync_all(); - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(register_page_fault_notifier); + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode_vm(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; + preempt_enable(); + } -int unregister_page_fault_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); + return ret; } -EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); - -static inline int notify_page_fault(struct pt_regs *regs, long err) +#else +static inline int notify_page_fault(struct pt_regs *regs) { - struct die_args args = { - .regs = regs, - .str = "page fault", - .err = err, - .trapnr = 14, - .signr = SIGSEGV - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, - DIE_PAGE_FAULT, &args); + return 0; } +#endif /* * Return EIP plus the CS segment base. The segment limit is also @@ -331,7 +326,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, if (unlikely(address >= TASK_SIZE)) { if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) return; - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch @@ -340,7 +335,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, goto bad_area_nosemaphore; } - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* It's safe to allow irq's after cr2 has been saved and the vmalloc @@ -598,7 +593,7 @@ out_of_memory: } printk("VM: killing process %s\n", tsk->comm); if (error_code & 4) - do_exit(SIGKILL); + do_group_exit(SIGKILL); goto no_context; do_sigbus: diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c index 54816adb8e9..5e0e54906c4 100644 --- a/arch/x86/mm/fault_64.c +++ b/arch/x86/mm/fault_64.c @@ -25,6 +25,7 @@ #include <linux/kprobes.h> #include <linux/uaccess.h> #include <linux/kdebug.h> +#include <linux/kprobes.h> #include <asm/system.h> #include <asm/pgalloc.h> @@ -40,34 +41,27 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) -static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); - -/* Hook to register for page fault notifications */ -int register_page_fault_notifier(struct notifier_block *nb) +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs) { - vmalloc_sync_all(); - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(register_page_fault_notifier); + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; + preempt_enable(); + } -int unregister_page_fault_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); + return ret; } -EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); - -static inline int notify_page_fault(struct pt_regs *regs, long err) +#else +static inline int notify_page_fault(struct pt_regs *regs) { - struct die_args args = { - .regs = regs, - .str = "page fault", - .err = err, - .trapnr = 14, - .signr = SIGSEGV - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, - DIE_PAGE_FAULT, &args); + return 0; } +#endif /* Sometimes the CPU reports invalid exceptions on prefetch. Check that here and ignore. @@ -345,7 +339,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if (vmalloc_fault(address) >= 0) return; } - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch @@ -354,7 +348,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, goto bad_area_nosemaphore; } - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; if (likely(regs->eflags & X86_EFLAGS_IF)) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 730a5b177b1..dda4e83649a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -735,11 +735,6 @@ int arch_add_memory(int nid, u64 start, u64 size) return __add_pages(zone, start_pfn, nr_pages); } -int remove_memory(u64 start, u64 size) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(remove_memory); #endif struct kmem_cache *pmd_cache; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 458893b376f..1e3862e4106 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -474,12 +474,6 @@ error: } EXPORT_SYMBOL_GPL(arch_add_memory); -int remove_memory(u64 start, u64 size) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(remove_memory); - #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA) int memory_add_physaddr_to_nid(u64 start) { @@ -748,3 +742,48 @@ const char *arch_vma_name(struct vm_area_struct *vma) return "[vsyscall]"; return NULL; } + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +/* + * Initialise the sparsemem vmemmap using huge-pages at the PMD level. + */ +int __meminit vmemmap_populate(struct page *start_page, + unsigned long size, int node) +{ + unsigned long addr = (unsigned long)start_page; + unsigned long end = (unsigned long)(start_page + size); + unsigned long next; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + for (; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + + pgd = vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + pud = vmemmap_pud_populate(pgd, addr, node); + if (!pud) + return -ENOMEM; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte_t entry; + void *p = vmemmap_alloc_block(PMD_SIZE, node); + if (!p) + return -ENOMEM; + + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); + mk_pte_huge(entry); + set_pmd(pmd, __pmd(pte_val(entry))); + + printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", + addr, addr + PMD_SIZE - 1, p, node); + } else + vmemmap_verify((pte_t *)pmd, node, addr, next); + } + + return 0; +} +#endif diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 47925927b12..56b4757a1f4 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -379,7 +379,7 @@ static unsigned int get_stagger(void) { #ifdef CONFIG_SMP int cpu = smp_processor_id(); - return (cpu != first_cpu(cpu_sibling_map[cpu])); + return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu))); #endif return 0; } diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index bc8a44bddaa..2d88f7c6d6a 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -2,15 +2,199 @@ #include <linux/acpi.h> #include <linux/init.h> #include <linux/irq.h> +#include <linux/dmi.h> #include <asm/numa.h> #include "pci.h" +static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) +{ + pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; + printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident); + return 0; +} + +static struct dmi_system_id acpi_pciprobe_dmi_table[] = { +/* + * Systems where PCI IO resource ISA alignment can be skipped + * when the ISA enable bit in the bridge control is not set + */ + { + .callback = can_skip_ioresource_align, + .ident = "IBM System x3800", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "x3800"), + }, + }, + { + .callback = can_skip_ioresource_align, + .ident = "IBM System x3850", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "x3850"), + }, + }, + { + .callback = can_skip_ioresource_align, + .ident = "IBM System x3950", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "x3950"), + }, + }, + {} +}; + +struct pci_root_info { + char *name; + unsigned int res_num; + struct resource *res; + struct pci_bus *bus; + int busnum; +}; + +static acpi_status +resource_to_addr(struct acpi_resource *resource, + struct acpi_resource_address64 *addr) +{ + acpi_status status; + + status = acpi_resource_to_address64(resource, addr); + if (ACPI_SUCCESS(status) && + (addr->resource_type == ACPI_MEMORY_RANGE || + addr->resource_type == ACPI_IO_RANGE) && + addr->address_length > 0 && + addr->producer_consumer == ACPI_PRODUCER) { + return AE_OK; + } + return AE_ERROR; +} + +static acpi_status +count_resource(struct acpi_resource *acpi_res, void *data) +{ + struct pci_root_info *info = data; + struct acpi_resource_address64 addr; + acpi_status status; + + status = resource_to_addr(acpi_res, &addr); + if (ACPI_SUCCESS(status)) + info->res_num++; + return AE_OK; +} + +static acpi_status +setup_resource(struct acpi_resource *acpi_res, void *data) +{ + struct pci_root_info *info = data; + struct resource *res; + struct acpi_resource_address64 addr; + acpi_status status; + unsigned long flags; + struct resource *root; + + status = resource_to_addr(acpi_res, &addr); + if (!ACPI_SUCCESS(status)) + return AE_OK; + + if (addr.resource_type == ACPI_MEMORY_RANGE) { + root = &iomem_resource; + flags = IORESOURCE_MEM; + if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY) + flags |= IORESOURCE_PREFETCH; + } else if (addr.resource_type == ACPI_IO_RANGE) { + root = &ioport_resource; + flags = IORESOURCE_IO; + } else + return AE_OK; + + res = &info->res[info->res_num]; + res->name = info->name; + res->flags = flags; + res->start = addr.minimum + addr.translation_offset; + res->end = res->start + addr.address_length - 1; + res->child = NULL; + + if (insert_resource(root, res)) { + printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " + "from %s for %s\n", (unsigned long) res->start, + (unsigned long) res->end, root->name, info->name); + } else { + info->bus->resource[info->res_num] = res; + info->res_num++; + } + return AE_OK; +} + +static void +adjust_transparent_bridge_resources(struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + int i; + u16 class = dev->class >> 8; + + if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent) { + for(i = 3; i < PCI_BUS_NUM_RESOURCES; i++) + dev->subordinate->resource[i] = + dev->bus->resource[i - 3]; + } + } +} + +static void +get_current_resources(struct acpi_device *device, int busnum, + struct pci_bus *bus) +{ + struct pci_root_info info; + size_t size; + + info.bus = bus; + info.res_num = 0; + acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, + &info); + if (!info.res_num) + return; + + size = sizeof(*info.res) * info.res_num; + info.res = kmalloc(size, GFP_KERNEL); + if (!info.res) + goto res_alloc_fail; + + info.name = kmalloc(12, GFP_KERNEL); + if (!info.name) + goto name_alloc_fail; + sprintf(info.name, "PCI Bus #%02x", busnum); + + info.res_num = 0; + acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, + &info); + if (info.res_num) + adjust_transparent_bridge_resources(bus); + + return; + +name_alloc_fail: + kfree(info.res); +res_alloc_fail: + return; +} + struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum) { struct pci_bus *bus; struct pci_sysdata *sd; int pxm; + dmi_check_system(acpi_pciprobe_dmi_table); + + if (domain && !pci_domains_supported) { + printk(KERN_WARNING "PCI: Multiple domains not supported " + "(dom %d, bus %d)\n", domain, busnum); + return NULL; + } + /* Allocate per-root-bus (not per bus) arch-specific data. * TODO: leak; this memory is never freed. * It's arguable whether it's worth the trouble to care. @@ -21,12 +205,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do return NULL; } - if (domain != 0) { - printk(KERN_WARNING "PCI: Multiple domains not supported\n"); - kfree(sd); - return NULL; - } - + sd->domain = domain; sd->node = -1; pxm = acpi_get_pxm(device->handle); @@ -47,6 +226,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do } } #endif + + if (bus && (pci_probe & PCI_USE__CRS)) + get_current_resources(device, busnum, bus); return bus; } diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 07d5223442b..2d71bbc411d 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -29,12 +29,14 @@ struct pci_raw_ops *raw_pci_ops; static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) { - return raw_pci_ops->read(0, bus->number, devfn, where, size, value); + return raw_pci_ops->read(pci_domain_nr(bus), bus->number, + devfn, where, size, value); } static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) { - return raw_pci_ops->write(0, bus->number, devfn, where, size, value); + return raw_pci_ops->write(pci_domain_nr(bus), bus->number, + devfn, where, size, value); } struct pci_ops pci_root_ops = { @@ -287,6 +289,16 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL685c G1"), }, }, +#ifdef __i386__ + { + .callback = assign_all_busses, + .ident = "Compaq EVO N800c", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), + DMI_MATCH(DMI_PRODUCT_NAME, "EVO N800c"), + }, + }, +#endif {} }; @@ -426,6 +438,9 @@ char * __devinit pcibios_setup(char *str) } else if (!strcmp(str, "assign-busses")) { pci_probe |= PCI_ASSIGN_ALL_BUSSES; return NULL; + } else if (!strcmp(str, "use_crs")) { + pci_probe |= PCI_USE__CRS; + return NULL; } else if (!strcmp(str, "routeirq")) { pci_routeirq = 1; return NULL; diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index c82cbf4c722..6cff66dd0c9 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -353,6 +353,53 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); + +static struct dmi_system_id __devinitdata msi_k8t_dmi_table[] = { + { + .ident = "MSI-K8T-Neo2Fir", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MSI"), + DMI_MATCH(DMI_PRODUCT_NAME, "MS-6702E"), + }, + }, + {} +}; + +/* + * The AMD-Athlon64 board MSI "K8T Neo2-FIR" disables the onboard sound + * card if a PCI-soundcard is added. + * + * The BIOS only gives options "DISABLED" and "AUTO". This code sets + * the corresponding register-value to enable the soundcard. + * + * The soundcard is only enabled, if the mainborad is identified + * via DMI-tables and the soundcard is detected to be off. + */ +static void __devinit pci_fixup_msi_k8t_onboard_sound(struct pci_dev *dev) +{ + unsigned char val; + if (!dmi_check_system(msi_k8t_dmi_table)) + return; /* only applies to MSI K8T Neo2-FIR */ + + pci_read_config_byte(dev, 0x50, &val); + if (val & 0x40) { + pci_write_config_byte(dev, 0x50, val & (~0x40)); + + /* verify the change for status output */ + pci_read_config_byte(dev, 0x50, &val); + if (val & 0x40) + printk(KERN_INFO "PCI: Detected MSI K8T Neo2-FIR, " + "can't enable onboard soundcard!\n"); + else + printk(KERN_INFO "PCI: Detected MSI K8T Neo2-FIR, " + "enabled onboard soundcard.\n"); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, + pci_fixup_msi_k8t_onboard_sound); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, + pci_fixup_msi_k8t_onboard_sound); + /* * Some Toshiba laptops need extra code to enable their TI TSB43AB22/A. * diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index bcd2f94b732..42ba0e2da1a 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -33,6 +33,15 @@ #include "pci.h" +static int +skip_isa_ioresource_align(struct pci_dev *dev) { + + if ((pci_probe & PCI_CAN_SKIP_ISA_ALIGN) && + !(dev->bus->bridge_ctl & PCI_BRIDGE_CTL_ISA)) + return 1; + return 0; +} + /* * We need to avoid collisions with `mirrored' VGA ports * and other strange ISA hardware, so we always want the @@ -50,9 +59,13 @@ void pcibios_align_resource(void *data, struct resource *res, resource_size_t size, resource_size_t align) { + struct pci_dev *dev = data; + if (res->flags & IORESOURCE_IO) { resource_size_t start = res->start; + if (skip_isa_ioresource_align(dev)) + return; if (start & 0x300) { start = (start + 0x3ff) & ~0x3ff; res->start = start; diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index d98c6b096f8..c52150fdf82 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -492,6 +492,26 @@ static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq return 1; } +/* + * PicoPower PT86C523 + */ +static int pirq_pico_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + outb(0x10 + ((pirq - 1) >> 1), 0x24); + return ((pirq - 1) & 1) ? (inb(0x26) >> 4) : (inb(0x26) & 0xf); +} + +static int pirq_pico_set(struct pci_dev *router, struct pci_dev *dev, int pirq, + int irq) +{ + unsigned int x; + outb(0x10 + ((pirq - 1) >> 1), 0x24); + x = inb(0x26); + x = ((pirq - 1) & 1) ? ((x & 0x0f) | (irq << 4)) : ((x & 0xf0) | (irq)); + outb(x, 0x26); + return 1; +} + #ifdef CONFIG_PCI_BIOS static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) @@ -721,6 +741,24 @@ static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, return 1; } +static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch (device) { + case PCI_DEVICE_ID_PICOPOWER_PT86C523: + r->name = "PicoPower PT86C523"; + r->get = pirq_pico_get; + r->set = pirq_pico_set; + return 1; + + case PCI_DEVICE_ID_PICOPOWER_PT86C523BBP: + r->name = "PicoPower PT86C523 rev. BB+"; + r->get = pirq_pico_get; + r->set = pirq_pico_set; + return 1; + } + return 0; +} + static __initdata struct irq_router_handler pirq_routers[] = { { PCI_VENDOR_ID_INTEL, intel_router_probe }, { PCI_VENDOR_ID_AL, ali_router_probe }, @@ -732,6 +770,7 @@ static __initdata struct irq_router_handler pirq_routers[] = { { PCI_VENDOR_ID_VLSI, vlsi_router_probe }, { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe }, { PCI_VENDOR_ID_AMD, amd_router_probe }, + { PCI_VENDOR_ID_PICOPOWER, pico_router_probe }, /* Someone with docs needs to add the ATI Radeon IGP */ { 0, NULL } }; diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index 8c66f275756..ac56d3916c5 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -26,6 +26,8 @@ #define PCI_ASSIGN_ROMS 0x1000 #define PCI_BIOS_IRQ_SCAN 0x2000 #define PCI_ASSIGN_ALL_BUSSES 0x4000 +#define PCI_CAN_SKIP_ISA_ALIGN 0x8000 +#define PCI_USE__CRS 0x10000 extern unsigned int pci_probe; extern unsigned long pirq_table_addr; diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 557b8e24706..4fa33c27ccb 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -147,8 +147,13 @@ void __init xen_smp_prepare_boot_cpu(void) make_lowmem_page_readwrite(&per_cpu__gdt_page); for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + /* + * cpu_core_map lives in a per cpu area that is cleared + * when the per cpu array is allocated. + * + * cpus_clear(per_cpu(cpu_core_map, cpu)); + */ } xen_setup_vcpu_info_placement(); @@ -159,8 +164,13 @@ void __init xen_smp_prepare_cpus(unsigned int max_cpus) unsigned cpu; for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + /* + * cpu_core_ map will be zeroed when the per + * cpu area is allocated. + * + * cpus_clear(per_cpu(cpu_core_map, cpu)); + */ } smp_store_cpu_info(0); |