diff options
Diffstat (limited to 'arch/i386/kernel')
30 files changed, 513 insertions, 302 deletions
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 5546ddebec3..9204be6eedb 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -803,6 +803,7 @@ no_apic: void __init init_apic_mappings(void) { + unsigned int orig_apicid; unsigned long apic_phys; /* @@ -824,8 +825,11 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + orig_apicid = boot_cpu_physical_apicid; + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + if ((orig_apicid != -1U) && (orig_apicid != boot_cpu_physical_apicid)) + printk(KERN_WARNING "Boot APIC ID in local APIC unexpected (%d vs %d)", + orig_apicid, boot_cpu_physical_apicid); #ifdef CONFIG_X86_IO_APIC { @@ -1046,10 +1050,11 @@ static unsigned int calibration_result; void __init setup_boot_APIC_clock(void) { + unsigned long flags; apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"); using_apic_timer = 1; - local_irq_disable(); + local_irq_save(flags); calibration_result = calibrate_APIC_clock(); /* @@ -1057,7 +1062,7 @@ void __init setup_boot_APIC_clock(void) */ setup_APIC_timer(calibration_result); - local_irq_enable(); + local_irq_restore(flags); } void __devinit setup_secondary_APIC_clock(void) @@ -1254,40 +1259,81 @@ fastcall void smp_error_interrupt(struct pt_regs *regs) } /* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. + * This initializes the IO-APIC and APIC hardware. */ -int __init APIC_init_uniprocessor (void) +int __init APIC_init(void) { - if (enable_local_apic < 0) - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); + if (enable_local_apic < 0) { + printk(KERN_INFO "APIC disabled\n"); + return -1; + } - if (!smp_found_config && !cpu_has_apic) + /* See if we have a SMP configuration or have forced enabled + * the local apic. + */ + if (!smp_found_config && !acpi_lapic && !cpu_has_apic) { + enable_local_apic = -1; return -1; + } /* - * Complain if the BIOS pretends there is one. + * Complain if the BIOS pretends there is an apic. + * Then get out because we don't have an a local apic. */ if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); + printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); + enable_local_apic = -1; return -1; } verify_local_APIC(); + /* + * Should not be necessary because the MP table should list the boot + * CPU too, but we do it for the sake of robustness anyway. + * Makes no sense to do this check in clustered apic mode, so skip it + */ + if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { + printk("weird, boot CPU (#%d) not listed by the BIOS.\n", + boot_cpu_physical_apicid); + physid_set(boot_cpu_physical_apicid, phys_cpu_present_map); + } + + /* + * Switch from PIC to APIC mode. + */ connect_bsp_APIC(); + setup_local_APIC(); - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); +#ifdef CONFIG_X86_IO_APIC + /* + * Now start the IO-APICs + */ + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); +#endif + return 0; +} - setup_local_APIC(); +void __init APIC_late_time_init(void) +{ + /* Improve our loops per jiffy estimate */ + loops_per_jiffy = ((1000 + HZ - 1)/HZ)*cpu_khz; + boot_cpu_data.loops_per_jiffy = loops_per_jiffy; + cpu_data[0].loops_per_jiffy = loops_per_jiffy; + + /* setup_apic_nmi_watchdog doesn't work properly before cpu_khz is + * initialized. So redo it here to ensure the boot cpu is setup + * properly. + */ + if (nmi_watchdog == NMI_LOCAL_APIC) + setup_apic_nmi_watchdog(); #ifdef CONFIG_X86_IO_APIC - if (smp_found_config) - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) + IO_APIC_late_time_init(); #endif setup_boot_APIC_clock(); - - return 0; } diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index d7811c4e8b5..d2ef0c2aa93 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -597,12 +597,14 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, cpumask_t cpus; int cpu; struct desc_struct save_desc_40; + struct desc_struct *gdt; cpus = apm_save_cpus(); cpu = get_cpu(); - save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; - per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; + gdt = get_cpu_gdt_table(cpu); + save_desc_40 = gdt[0x40 / 8]; + gdt[0x40 / 8] = bad_bios_desc; local_save_flags(flags); APM_DO_CLI; @@ -610,7 +612,7 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); APM_DO_RESTORE_SEGS; local_irq_restore(flags); - per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = save_desc_40; + gdt[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); @@ -639,13 +641,14 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) cpumask_t cpus; int cpu; struct desc_struct save_desc_40; - + struct desc_struct *gdt; cpus = apm_save_cpus(); cpu = get_cpu(); - save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; - per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; + gdt = get_cpu_gdt_table(cpu); + save_desc_40 = gdt[0x40 / 8]; + gdt[0x40 / 8] = bad_bios_desc; local_save_flags(flags); APM_DO_CLI; @@ -653,7 +656,7 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); APM_DO_RESTORE_SEGS; local_irq_restore(flags); - __get_cpu_var(cpu_gdt_table)[0x40 / 8] = save_desc_40; + gdt[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); return error; @@ -2295,35 +2298,36 @@ static int __init apm_init(void) apm_bios_entry.segment = APM_CS; for (i = 0; i < NR_CPUS; i++) { - set_base(per_cpu(cpu_gdt_table, i)[APM_CS >> 3], + struct desc_struct *gdt = get_cpu_gdt_table(i); + set_base(gdt[APM_CS >> 3], __va((unsigned long)apm_info.bios.cseg << 4)); - set_base(per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], + set_base(gdt[APM_CS_16 >> 3], __va((unsigned long)apm_info.bios.cseg_16 << 4)); - set_base(per_cpu(cpu_gdt_table, i)[APM_DS >> 3], + set_base(gdt[APM_DS >> 3], __va((unsigned long)apm_info.bios.dseg << 4)); #ifndef APM_RELAX_SEGMENTS if (apm_info.bios.version == 0x100) { #endif /* For ASUS motherboard, Award BIOS rev 110 (and others?) */ - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 - 1); + _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 - 1); /* For some unknown machine. */ - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], 64 * 1024 - 1); + _set_limit((char *)&gdt[APM_CS_16 >> 3], 64 * 1024 - 1); /* For the DEC Hinote Ultra CT475 (and others?) */ - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3], 64 * 1024 - 1); + _set_limit((char *)&gdt[APM_DS >> 3], 64 * 1024 - 1); #ifndef APM_RELAX_SEGMENTS } else { - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], + _set_limit((char *)&gdt[APM_CS >> 3], (apm_info.bios.cseg_len - 1) & 0xffff); - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], + _set_limit((char *)&gdt[APM_CS_16 >> 3], (apm_info.bios.cseg_16_len - 1) & 0xffff); - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3], + _set_limit((char *)&gdt[APM_DS >> 3], (apm_info.bios.dseg_len - 1) & 0xffff); /* workaround for broken BIOSes */ if (apm_info.bios.cseg_len <= apm_info.bios.offset) - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 -1); + _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 -1); if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 64kB */ /* for the BIOS that assumes granularity = 1 */ - per_cpu(cpu_gdt_table, i)[APM_DS >> 3].b |= 0x800000; + gdt[APM_DS >> 3].b |= 0x800000; printk(KERN_NOTICE "apm: we set the granularity of dseg.\n"); } } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 9ad43be9a01..74145a33cb0 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -573,6 +573,7 @@ void __devinit cpu_init(void) int cpu = smp_processor_id(); struct tss_struct * t = &per_cpu(init_tss, cpu); struct thread_struct *thread = ¤t->thread; + struct desc_struct *gdt = get_cpu_gdt_table(cpu); __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); if (cpu_test_and_set(cpu, cpu_initialized)) { @@ -594,24 +595,16 @@ void __devinit cpu_init(void) * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table, - GDT_SIZE); + memcpy(gdt, cpu_gdt_table, GDT_SIZE); /* Set up GDT entry for 16bit stack */ - *(__u64 *)&(per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_ESPFIX_SS]) |= + *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | (CPU_16BIT_STACK_SIZE - 1); cpu_gdt_descr[cpu].size = GDT_SIZE - 1; - cpu_gdt_descr[cpu].address = - (unsigned long)&per_cpu(cpu_gdt_table, cpu); - - /* - * Set up the per-thread TLS descriptor cache: - */ - memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu), - GDT_ENTRY_TLS_ENTRIES * 8); + cpu_gdt_descr[cpu].address = (unsigned long)gdt; load_gdt(&cpu_gdt_descr[cpu]); load_idt(&idt_descr); diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 822c8ce9d1f..caa9f771134 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -32,6 +32,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/compiler.h> +#include <linux/sched.h> /* current */ #include <asm/io.h> #include <asm/delay.h> #include <asm/uaccess.h> diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c index aa622d52c6e..270f2188d68 100644 --- a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c @@ -28,6 +28,7 @@ #include <linux/cpufreq.h> #include <linux/slab.h> #include <linux/cpumask.h> +#include <linux/sched.h> /* current / set_cpus_allowed() */ #include <asm/processor.h> #include <asm/msr.h> diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 58ca98fdc2c..2d5c9adba0c 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -32,6 +32,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/cpumask.h> +#include <linux/sched.h> /* for current / set_cpus_allowed() */ #include <asm/msr.h> #include <asm/io.h> diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index c397b622043..1465974256c 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -22,6 +22,7 @@ #include <linux/init.h> #include <linux/cpufreq.h> #include <linux/config.h> +#include <linux/sched.h> /* current */ #include <linux/delay.h> #include <linux/compiler.h> diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 9e0d5f83cb9..4dc42a189ae 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -3,6 +3,7 @@ * * Changes: * Venkatesh Pallipadi : Adding cache identification through cpuid(4) + * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. */ #include <linux/init.h> @@ -10,6 +11,7 @@ #include <linux/device.h> #include <linux/compiler.h> #include <linux/cpu.h> +#include <linux/sched.h> #include <asm/processor.h> #include <asm/smp.h> @@ -28,7 +30,7 @@ struct _cache_table }; /* all the cache descriptor types we care about (no TLB or trace cache entries) */ -static struct _cache_table cache_table[] __devinitdata = +static struct _cache_table cache_table[] __cpuinitdata = { { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ @@ -117,10 +119,9 @@ struct _cpuid4_info { cpumask_t shared_cpu_map; }; -#define MAX_CACHE_LEAVES 4 static unsigned short num_cache_leaves; -static int __devinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { unsigned int eax, ebx, ecx, edx; union _cpuid4_leaf_eax cache_eax; @@ -144,23 +145,18 @@ static int __init find_num_cache_leaves(void) { unsigned int eax, ebx, ecx, edx; union _cpuid4_leaf_eax cache_eax; - int i; - int retval; + int i = -1; - retval = MAX_CACHE_LEAVES; - /* Do cpuid(4) loop to find out num_cache_leaves */ - for (i = 0; i < MAX_CACHE_LEAVES; i++) { + do { + ++i; + /* Do cpuid(4) loop to find out num_cache_leaves */ cpuid_count(4, i, &eax, &ebx, &ecx, &edx); cache_eax.full = eax; - if (cache_eax.split.type == CACHE_TYPE_NULL) { - retval = i; - break; - } - } - return retval; + } while (cache_eax.split.type != CACHE_TYPE_NULL); + return i; } -unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c) +unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) { unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ @@ -284,13 +280,7 @@ unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c) if ( l3 ) printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - /* - * This assumes the L3 cache is shared; it typically lives in - * the northbridge. The L1 caches are included by the L2 - * cache, and so should not be included for the purpose of - * SMP switching weights. - */ - c->x86_cache_size = l2 ? l2 : (l1i+l1d); + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); } return l2; @@ -301,7 +291,7 @@ static struct _cpuid4_info *cpuid4_info[NR_CPUS]; #define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y])) #ifdef CONFIG_SMP -static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index) +static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) { struct _cpuid4_info *this_leaf; unsigned long num_threads_sharing; @@ -334,7 +324,7 @@ static void free_cache_attributes(unsigned int cpu) cpuid4_info[cpu] = NULL; } -static int __devinit detect_cache_attributes(unsigned int cpu) +static int __cpuinit detect_cache_attributes(unsigned int cpu) { struct _cpuid4_info *this_leaf; unsigned long j; @@ -511,7 +501,7 @@ static void cpuid4_cache_sysfs_exit(unsigned int cpu) free_cache_attributes(cpu); } -static int __devinit cpuid4_cache_sysfs_init(unsigned int cpu) +static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) { if (num_cache_leaves == 0) @@ -542,7 +532,7 @@ err_out: } /* Add/Remove cache interface for CPU device */ -static int __devinit cache_add_dev(struct sys_device * sys_dev) +static int __cpuinit cache_add_dev(struct sys_device * sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i, j; @@ -579,7 +569,7 @@ static int __devinit cache_add_dev(struct sys_device * sys_dev) return retval; } -static int __devexit cache_remove_dev(struct sys_device * sys_dev) +static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i; @@ -588,24 +578,49 @@ static int __devexit cache_remove_dev(struct sys_device * sys_dev) kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); kobject_unregister(cache_kobject[cpu]); cpuid4_cache_sysfs_exit(cpu); - return 0; + return; +} + +static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + cache_add_dev(sys_dev); + break; + case CPU_DEAD: + cache_remove_dev(sys_dev); + break; + } + return NOTIFY_OK; } -static struct sysdev_driver cache_sysdev_driver = { - .add = cache_add_dev, - .remove = __devexit_p(cache_remove_dev), +static struct notifier_block cacheinfo_cpu_notifier = +{ + .notifier_call = cacheinfo_cpu_callback, }; -/* Register/Unregister the cpu_cache driver */ -static int __devinit cache_register_driver(void) +static int __cpuinit cache_sysfs_init(void) { + int i; + if (num_cache_leaves == 0) return 0; - return sysdev_driver_register(&cpu_sysdev_class,&cache_sysdev_driver); + register_cpu_notifier(&cacheinfo_cpu_notifier); + + for_each_online_cpu(i) { + cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE, + (void *)(long)i); + } + + return 0; } -device_initcall(cache_register_driver); +device_initcall(cache_sysfs_init); #endif - diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c index 3c035b8fa3d..979b18bc95c 100644 --- a/arch/i386/kernel/cpu/mcheck/p6.c +++ b/arch/i386/kernel/cpu/mcheck/p6.c @@ -102,11 +102,16 @@ void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); nr_mce_banks = l & 0xff; - /* Don't enable bank 0 on intel P6 cores, it goes bang quickly. */ - for (i=1; i<nr_mce_banks; i++) { + /* + * Following the example in IA-32 SDM Vol 3: + * - MC0_CTL should not be written + * - Status registers on all banks should be cleared on reset + */ + for (i=1; i<nr_mce_banks; i++) wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); + + for (i=0; i<nr_mce_banks; i++) wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); - } set_in_cr4 (X86_CR4_MCE); printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c index 1923e0aed26..cf39e205d33 100644 --- a/arch/i386/kernel/cpu/mtrr/if.c +++ b/arch/i386/kernel/cpu/mtrr/if.c @@ -149,60 +149,89 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) return -EINVAL; } -static int -mtrr_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long __arg) +static long +mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) { - int err; + int err = 0; mtrr_type type; struct mtrr_sentry sentry; struct mtrr_gentry gentry; void __user *arg = (void __user *) __arg; switch (cmd) { + case MTRRIOC_ADD_ENTRY: + case MTRRIOC_SET_ENTRY: + case MTRRIOC_DEL_ENTRY: + case MTRRIOC_KILL_ENTRY: + case MTRRIOC_ADD_PAGE_ENTRY: + case MTRRIOC_SET_PAGE_ENTRY: + case MTRRIOC_DEL_PAGE_ENTRY: + case MTRRIOC_KILL_PAGE_ENTRY: + if (copy_from_user(&sentry, arg, sizeof sentry)) + return -EFAULT; + break; + case MTRRIOC_GET_ENTRY: + case MTRRIOC_GET_PAGE_ENTRY: + if (copy_from_user(&gentry, arg, sizeof gentry)) + return -EFAULT; + break; +#ifdef CONFIG_COMPAT + case MTRRIOC32_ADD_ENTRY: + case MTRRIOC32_SET_ENTRY: + case MTRRIOC32_DEL_ENTRY: + case MTRRIOC32_KILL_ENTRY: + case MTRRIOC32_ADD_PAGE_ENTRY: + case MTRRIOC32_SET_PAGE_ENTRY: + case MTRRIOC32_DEL_PAGE_ENTRY: + case MTRRIOC32_KILL_PAGE_ENTRY: { + struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg; + err = get_user(sentry.base, &s32->base); + err |= get_user(sentry.size, &s32->size); + err |= get_user(sentry.type, &s32->type); + if (err) + return err; + break; + } + case MTRRIOC32_GET_ENTRY: + case MTRRIOC32_GET_PAGE_ENTRY: { + struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + err = get_user(gentry.regnum, &g32->regnum); + err |= get_user(gentry.base, &g32->base); + err |= get_user(gentry.size, &g32->size); + err |= get_user(gentry.type, &g32->type); + if (err) + return err; + break; + } +#endif + } + + switch (cmd) { default: return -ENOTTY; case MTRRIOC_ADD_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, file, 0); - if (err < 0) - return err; break; case MTRRIOC_SET_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); - if (err < 0) - return err; break; case MTRRIOC_DEL_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_file_del(sentry.base, sentry.size, file, 0); - if (err < 0) - return err; break; case MTRRIOC_KILL_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_del(-1, sentry.base, sentry.size); - if (err < 0) - return err; break; case MTRRIOC_GET_ENTRY: - if (copy_from_user(&gentry, arg, sizeof gentry)) - return -EFAULT; if (gentry.regnum >= num_var_ranges) return -EINVAL; mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type); @@ -217,60 +246,59 @@ mtrr_ioctl(struct inode *inode, struct file *file, gentry.type = type; } - if (copy_to_user(arg, &gentry, sizeof gentry)) - return -EFAULT; break; case MTRRIOC_ADD_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, file, 1); - if (err < 0) - return err; break; case MTRRIOC_SET_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); - if (err < 0) - return err; break; case MTRRIOC_DEL_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_file_del(sentry.base, sentry.size, file, 1); - if (err < 0) - return err; break; case MTRRIOC_KILL_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_del_page(-1, sentry.base, sentry.size); - if (err < 0) - return err; break; case MTRRIOC_GET_PAGE_ENTRY: - if (copy_from_user(&gentry, arg, sizeof gentry)) - return -EFAULT; if (gentry.regnum >= num_var_ranges) return -EINVAL; mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type); gentry.type = type; + break; + } + + if (err) + return err; + switch(cmd) { + case MTRRIOC_GET_ENTRY: + case MTRRIOC_GET_PAGE_ENTRY: if (copy_to_user(arg, &gentry, sizeof gentry)) - return -EFAULT; + err = -EFAULT; + break; +#ifdef CONFIG_COMPAT + case MTRRIOC32_GET_ENTRY: + case MTRRIOC32_GET_PAGE_ENTRY: { + struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + err = put_user(gentry.base, &g32->base); + err |= put_user(gentry.size, &g32->size); + err |= put_user(gentry.regnum, &g32->regnum); + err |= put_user(gentry.type, &g32->type); break; } - return 0; +#endif + } + return err; } static int @@ -310,7 +338,8 @@ static struct file_operations mtrr_fops = { .read = seq_read, .llseek = seq_lseek, .write = mtrr_write, - .ioctl = mtrr_ioctl, + .unlocked_ioctl = mtrr_ioctl, + .compat_ioctl = mtrr_ioctl, .release = mtrr_close, }; diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 8bd77d948a8..41b871ecf4b 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -44,7 +44,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est", + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est", "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index 4647db4ad6d..13bae799e62 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -163,7 +163,7 @@ static int cpuid_class_device_create(int i) int err = 0; struct class_device *class_err; - class_err = class_device_create(cpuid_class, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i); + class_err = class_device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i); if (IS_ERR(class_err)) err = PTR_ERR(class_err); return err; diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 0248e084017..af809ccf5fb 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -21,7 +21,6 @@ #include <asm/hardirq.h> #include <asm/nmi.h> #include <asm/hw_irq.h> -#include <asm/apic.h> #include <mach_ipi.h> @@ -148,7 +147,6 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) regs = &fixed_regs; } crash_save_this_cpu(regs, cpu); - disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ halt(); @@ -188,7 +186,6 @@ static void nmi_shootdown_cpus(void) } /* Leave the nmi callback set */ - disable_local_APIC(); } #else static void nmi_shootdown_cpus(void) @@ -213,9 +210,5 @@ void machine_crash_shutdown(struct pt_regs *regs) /* Make a note of crashing cpu. Will be used in NMI callback.*/ crashing_cpu = smp_processor_id(); nmi_shootdown_cpus(); - lapic_shutdown(); -#if defined(CONFIG_X86_IO_APIC) - disable_IO_APIC(); -#endif crash_save_self(regs); } diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index 323ef8ab324..d86f2490928 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -435,4 +435,8 @@ void __init init_IRQ(void) setup_irq(FPU_IRQ, &fpu_irq); irq_ctx_init(smp_processor_id()); + +#ifdef CONFIG_X86_LOCAL_APIC + APIC_init(); +#endif } diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index fb3991e8229..5a77c52b20a 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -46,6 +46,9 @@ int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; +/* Where if anywhere is the i8259 connect in external int mode */ +static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; + static DEFINE_SPINLOCK(ioapic_lock); /* @@ -738,7 +741,7 @@ static int find_irq_entry(int apic, int pin, int type) /* * Find the pin to which IRQ[irq] (ISA) is connected */ -static int find_isa_irq_pin(int irq, int type) +static int __init find_isa_irq_pin(int irq, int type) { int i; @@ -758,6 +761,33 @@ static int find_isa_irq_pin(int irq, int type) return -1; } +static int __init find_isa_irq_apic(int irq, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mpc_srcbus; + + if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || + mp_bus_id_to_type[lbus] == MP_BUS_EISA || + mp_bus_id_to_type[lbus] == MP_BUS_MCA || + mp_bus_id_to_type[lbus] == MP_BUS_NEC98 + ) && + (mp_irqs[i].mpc_irqtype == type) && + (mp_irqs[i].mpc_srcbusirq == irq)) + break; + } + if (i < mp_irq_entries) { + int apic; + for(apic = 0; apic < nr_ioapics; apic++) { + if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + return apic; + } + } + + return -1; +} + /* * Find a specific PCI IRQ entry. * Not an __init, possibly needed by modules @@ -1253,7 +1283,7 @@ static void __init setup_IO_APIC_irqs(void) /* * Set up the 8259A-master output pin: */ -static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) +static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) { struct IO_APIC_route_entry entry; unsigned long flags; @@ -1287,8 +1317,8 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) * Add it to the IO-APIC irq-routing table: */ spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); spin_unlock_irqrestore(&ioapic_lock, flags); enable_8259A_irq(0); @@ -1595,7 +1625,8 @@ void /*__init*/ print_PIC(void) static void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; - int i; + int i8259_apic, i8259_pin; + int i, apic; unsigned long flags; for (i = 0; i < PIN_MAP_SIZE; i++) { @@ -1609,11 +1640,52 @@ static void __init enable_IO_APIC(void) /* * The number of IO-APIC IRQ registers (== #pins): */ - for (i = 0; i < nr_ioapics; i++) { + for (apic = 0; apic < nr_ioapics; apic++) { spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(i, 1); + reg_01.raw = io_apic_read(apic, 1); spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[i] = reg_01.bits.entries+1; + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + for(apic = 0; apic < nr_ioapics; apic++) { + int pin; + /* See if any of the pins is in ExtINT mode */ + for(pin = 0; pin < nr_ioapic_registers[i]; pin++) { + struct IO_APIC_route_entry entry; + spin_lock_irqsave(&ioapic_lock, flags); + *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); + *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); + spin_unlock_irqrestore(&ioapic_lock, flags); + + + /* If the interrupt line is enabled and in ExtInt mode + * I have found the pin where the i8259 is connected. + */ + if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { + ioapic_i8259.apic = apic; + ioapic_i8259.pin = pin; + goto found_i8259; + } + } + } + found_i8259: + /* Look to see what if the MP table has reported the ExtINT */ + /* If we could not find the appropriate pin by looking at the ioapic + * the i8259 probably is not connected the ioapic but give the + * mptable a chance anyway. + */ + i8259_pin = find_isa_irq_pin(0, mp_ExtINT); + i8259_apic = find_isa_irq_apic(0, mp_ExtINT); + /* Trust the MP table if nothing is setup in the hardware */ + if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { + printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); + ioapic_i8259.pin = i8259_pin; + ioapic_i8259.apic = i8259_apic; + } + /* Complain if the MP table and the hardware disagree */ + if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && + (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) + { + printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); } /* @@ -1627,7 +1699,6 @@ static void __init enable_IO_APIC(void) */ void disable_IO_APIC(void) { - int pin; /* * Clear the IO-APIC before rebooting: */ @@ -1638,8 +1709,7 @@ void disable_IO_APIC(void) * Put that IOAPIC in virtual wire mode * so legacy interrupts can be delivered. */ - pin = find_isa_irq_pin(0, mp_ExtINT); - if (pin != -1) { + if (ioapic_i8259.pin != -1) { struct IO_APIC_route_entry entry; unsigned long flags; @@ -1650,7 +1720,7 @@ void disable_IO_APIC(void) entry.polarity = 0; /* High */ entry.delivery_status = 0; entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = 7; /* ExtInt */ + entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; entry.dest.physical.physical_dest = 0; @@ -1659,11 +1729,13 @@ void disable_IO_APIC(void) * Add it to the IO-APIC irq-routing table: */ spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, + *(((int *)&entry)+1)); + io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, + *(((int *)&entry)+0)); spin_unlock_irqrestore(&ioapic_lock, flags); } - disconnect_bsp_APIC(pin != -1); + disconnect_bsp_APIC(ioapic_i8259.pin != -1); } /* @@ -2113,20 +2185,21 @@ static void setup_nmi (void) */ static inline void unlock_ExtINT_logic(void) { - int pin, i; + int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; unsigned char save_control, save_freq_select; unsigned long flags; - pin = find_isa_irq_pin(8, mp_INT); + pin = find_isa_irq_pin(8, mp_INT); + apic = find_isa_irq_apic(8, mp_INT); if (pin == -1) return; spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin); - *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin); + *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); + *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); spin_unlock_irqrestore(&ioapic_lock, flags); - clear_IO_APIC_pin(0, pin); + clear_IO_APIC_pin(apic, pin); memset(&entry1, 0, sizeof(entry1)); @@ -2139,8 +2212,8 @@ static inline void unlock_ExtINT_logic(void) entry1.vector = 0; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); + io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); + io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); spin_unlock_irqrestore(&ioapic_lock, flags); save_control = CMOS_READ(RTC_CONTROL); @@ -2158,11 +2231,11 @@ static inline void unlock_ExtINT_logic(void) CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(0, pin); + clear_IO_APIC_pin(apic, pin); spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); + io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); + io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -2174,7 +2247,7 @@ static inline void unlock_ExtINT_logic(void) */ static inline void check_timer(void) { - int pin1, pin2; + int apic1, pin1, apic2, pin2; int vector; /* @@ -2196,10 +2269,13 @@ static inline void check_timer(void) timer_ack = 1; enable_8259A_irq(0); - pin1 = find_isa_irq_pin(0, mp_INT); - pin2 = find_isa_irq_pin(0, mp_ExtINT); + pin1 = find_isa_irq_pin(0, mp_INT); + apic1 = find_isa_irq_apic(0, mp_INT); + pin2 = ioapic_i8259.pin; + apic2 = ioapic_i8259.apic; - printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2); + printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", + vector, apic1, pin1, apic2, pin2); if (pin1 != -1) { /* @@ -2216,8 +2292,9 @@ static inline void check_timer(void) clear_IO_APIC_pin(0, pin1); return; } - clear_IO_APIC_pin(0, pin1); - printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); + clear_IO_APIC_pin(apic1, pin1); + printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " + "IO-APIC\n"); } printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); @@ -2226,13 +2303,13 @@ static inline void check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - setup_ExtINT_IRQ0_pin(pin2, vector); + setup_ExtINT_IRQ0_pin(apic2, pin2, vector); if (timer_irq_works()) { printk("works.\n"); if (pin1 != -1) - replace_pin_at_irq(0, 0, pin1, 0, pin2); + replace_pin_at_irq(0, apic1, pin1, apic2, pin2); else - add_pin_to_irq(0, 0, pin2); + add_pin_to_irq(0, apic2, pin2); if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); } @@ -2241,7 +2318,7 @@ static inline void check_timer(void) /* * Cleanup, just in case ... */ - clear_IO_APIC_pin(0, pin2); + clear_IO_APIC_pin(apic2, pin2); } printk(" failed.\n"); @@ -2310,11 +2387,15 @@ void __init setup_IO_APIC(void) sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); - check_timer(); if (!acpi_ioapic) print_IO_APIC(); } +void __init IO_APIC_late_time_init(void) +{ + check_timer(); +} + /* * Called after all the initialization is done. If we didnt find any * APIC bugs then we can allow the modify fast path diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index ce66dcc26d9..1a201a93286 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -218,7 +218,7 @@ int show_interrupts(struct seq_file *p, void *v) if (i == 0) { seq_printf(p, " "); - for_each_cpu(j) + for_each_online_cpu(j) seq_printf(p, "CPU%d ",j); seq_putc(p, '\n'); } @@ -232,7 +232,7 @@ int show_interrupts(struct seq_file *p, void *v) #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else - for_each_cpu(j) + for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif seq_printf(p, " %14s", irq_desc[i].handler->typename); @@ -246,12 +246,12 @@ skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); - for_each_cpu(j) + for_each_online_cpu(j) seq_printf(p, "%10u ", nmi_count(j)); seq_putc(p, '\n'); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); - for_each_cpu(j) + for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs); seq_putc(p, '\n'); diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 27aabfceb67..8f767d9aa45 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -69,7 +69,7 @@ unsigned int def_to_bigsmp = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; /* Internal processor count */ -static unsigned int __initdata num_processors; +static unsigned int __devinitdata num_processors; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; @@ -119,7 +119,7 @@ static int MP_valid_apicid(int apicid, int version) } #endif -static void __init MP_processor_info (struct mpc_config_processor *m) +static void __devinit MP_processor_info (struct mpc_config_processor *m) { int ver, apicid; physid_mask_t phys_cpu; @@ -182,17 +182,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m) boot_cpu_physical_apicid = m->mpc_apicid; } - if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); - return; - } - - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } ver = m->mpc_apicver; if (!MP_valid_apicid(apicid, ver)) { @@ -201,11 +190,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m) return; } - cpu_set(num_processors, cpu_possible_map); - num_processors++; - phys_cpu = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); - /* * Validate version */ @@ -216,6 +200,25 @@ static void __init MP_processor_info (struct mpc_config_processor *m) ver = 0x10; } apic_version[m->mpc_apicid] = ver; + + phys_cpu = apicid_to_cpu_present(apicid); + physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); + + if (num_processors >= NR_CPUS) { + printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); + return; + } + + if (num_processors >= maxcpus) { + printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." + " Processor ignored.\n", maxcpus); + return; + } + + cpu_set(num_processors, cpu_possible_map); + num_processors++; + if ((num_processors > 8) && APIC_XAPIC(ver) && (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) @@ -834,7 +837,7 @@ void __init mp_register_lapic_address ( } -void __init mp_register_lapic ( +void __devinit mp_register_lapic ( u8 id, u8 enabled) { diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 03100d6fc5d..44470fea430 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -246,7 +246,7 @@ static int msr_class_device_create(int i) int err = 0; struct class_device *class_err; - class_err = class_device_create(msr_class, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i); + class_err = class_device_create(msr_class, NULL, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i); if (IS_ERR(class_err)) err = PTR_ERR(class_err); return err; diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 72515b8a1b1..d661703ac1c 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -100,16 +100,44 @@ int nmi_active; (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) +#ifdef CONFIG_SMP +/* The performance counters used by NMI_LOCAL_APIC don't trigger when + * the CPU is idle. To make sure the NMI watchdog really ticks on all + * CPUs during the test make them busy. + */ +static __init void nmi_cpu_busy(void *data) +{ + volatile int *endflag = data; + local_irq_enable(); + /* Intentionally don't use cpu_relax here. This is + to make sure that the performance counter really ticks, + even if there is a simulator or similar that catches the + pause instruction. On a real HT machine this is fine because + all other CPUs are busy with "useless" delay loops and don't + care if they get somewhat less cycles. */ + while (*endflag == 0) + barrier(); +} +#endif + static int __init check_nmi_watchdog(void) { - unsigned int prev_nmi_count[NR_CPUS]; + volatile int endflag = 0; + unsigned int *prev_nmi_count; int cpu; if (nmi_watchdog == NMI_NONE) return 0; + prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); + if (!prev_nmi_count) + return -1; + printk(KERN_INFO "Testing NMI watchdog ... "); + if (nmi_watchdog == NMI_LOCAL_APIC) + smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); + for (cpu = 0; cpu < NR_CPUS; cpu++) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); @@ -123,12 +151,18 @@ static int __init check_nmi_watchdog(void) continue; #endif if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - printk("CPU#%d: NMI appears to be stuck!\n", cpu); + endflag = 1; + printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", + cpu, + prev_nmi_count[cpu], + nmi_count(cpu)); nmi_active = 0; lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; + kfree(prev_nmi_count); return -1; } } + endflag = 1; printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to @@ -136,6 +170,7 @@ static int __init check_nmi_watchdog(void) if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = 1; + kfree(prev_nmi_count); return 0; } /* This needs to happen later in boot so counters are working */ diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 7b6368bf897..efd11f09c99 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -354,7 +354,7 @@ ptrace_set_thread_area(struct task_struct *child, return 0; } -asmlinkage int sys_ptrace(long request, long pid, long addr, long data) +asmlinkage long sys_ptrace(long request, long pid, long addr, long data) { struct task_struct *child; struct user * dummy = NULL; diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c index 1b183b378c2..c9b87330aee 100644 --- a/arch/i386/kernel/reboot_fixups.c +++ b/arch/i386/kernel/reboot_fixups.c @@ -44,7 +44,7 @@ void mach_reboot_fixups(void) for (i=0; i < (sizeof(fixups_table)/sizeof(fixups_table[0])); i++) { cur = &(fixups_table[i]); - dev = pci_get_device(cur->vendor, cur->device, 0); + dev = pci_get_device(cur->vendor, cur->device, NULL); if (!dev) continue; diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 9b8c8a19824..b48ac635f3c 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -389,14 +389,24 @@ static void __init limit_regions(unsigned long long size) } } for (i = 0; i < e820.nr_map; i++) { - if (e820.map[i].type == E820_RAM) { - current_addr = e820.map[i].addr + e820.map[i].size; - if (current_addr >= size) { - e820.map[i].size -= current_addr-size; - e820.nr_map = i + 1; - return; - } + current_addr = e820.map[i].addr + e820.map[i].size; + if (current_addr < size) + continue; + + if (e820.map[i].type != E820_RAM) + continue; + + if (e820.map[i].addr >= size) { + /* + * This region starts past the end of the + * requested size, skip it completely. + */ + e820.nr_map = i; + } else { + e820.nr_map = i + 1; + e820.map[i].size -= current_addr - size; } + return; } } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 1fb26d0e30b..5a2bbe0c4ff 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -87,7 +87,11 @@ EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); +#ifdef CONFIG_HOTPLUG_CPU +cpumask_t cpu_possible_map = CPU_MASK_ALL; +#else cpumask_t cpu_possible_map; +#endif EXPORT_SYMBOL(cpu_possible_map); static cpumask_t smp_commenced_mask; @@ -1074,6 +1078,16 @@ void *xquad_portio; EXPORT_SYMBOL(xquad_portio); #endif +/* + * Fall back to non SMP mode after errors. + * + */ +static __init void disable_smp(void) +{ + cpu_set(0, cpu_sibling_map[0]); + cpu_set(0, cpu_core_map[0]); +} + static void __init smp_boot_cpus(unsigned int max_cpus) { int apicid, cpu, bit, kicked; @@ -1086,7 +1100,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) printk("CPU%d: ", 0); print_cpu_info(&cpu_data[0]); - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); boot_cpu_logical_apicid = logical_smp_processor_id(); x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; @@ -1098,68 +1111,27 @@ static void __init smp_boot_cpus(unsigned int max_cpus) cpus_clear(cpu_core_map[0]); cpu_set(0, cpu_core_map[0]); + map_cpu_to_logical_apicid(); + /* * If we couldn't find an SMP configuration at boot time, * get out of here now! */ if (!smp_found_config && !acpi_lapic) { printk(KERN_NOTICE "SMP motherboard not detected.\n"); - smpboot_clear_io_apic_irqs(); - phys_cpu_present_map = physid_mask_of_physid(0); - if (APIC_init_uniprocessor()) - printk(KERN_NOTICE "Local APIC not detected." - " Using dummy APIC emulation.\n"); - map_cpu_to_logical_apicid(); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + disable_smp(); return; } /* - * Should not be necessary because the MP table should list the boot - * CPU too, but we do it for the sake of robustness anyway. - * Makes no sense to do this check in clustered apic mode, so skip it - */ - if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { - printk("weird, boot CPU (#%d) not listed by the BIOS.\n", - boot_cpu_physical_apicid); - physid_set(hard_smp_processor_id(), phys_cpu_present_map); - } - - /* - * If we couldn't find a local APIC, then get out of here now! - */ - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); - smpboot_clear_io_apic_irqs(); - phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); - return; - } - - verify_local_APIC(); - - /* * If SMP should be disabled, then really disable it! */ - if (!max_cpus) { - smp_found_config = 0; - printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); - smpboot_clear_io_apic_irqs(); - phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + if (!max_cpus || (enable_local_apic < 0)) { + printk(KERN_INFO "SMP mode deactivated.\n"); + disable_smp(); return; } - connect_bsp_APIC(); - setup_local_APIC(); - map_cpu_to_logical_apicid(); - - setup_portio_remap(); /* @@ -1240,10 +1212,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) cpu_set(0, cpu_sibling_map[0]); cpu_set(0, cpu_core_map[0]); - smpboot_setup_io_apic(); - - setup_boot_APIC_clock(); - /* * Synchronize the TSC with the AP */ diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 516bf5653b0..8de658db814 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -327,7 +327,12 @@ int __init get_memcfg_from_srat(void) int tables = 0; int i = 0; - acpi_find_root_pointer(ACPI_PHYSICAL_ADDRESSING, rsdp_address); + if (ACPI_FAILURE(acpi_find_root_pointer(ACPI_PHYSICAL_ADDRESSING, + rsdp_address))) { + printk("%s: System description tables not found\n", + __FUNCTION__); + goto out_err; + } if (rsdp_address->pointer_type == ACPI_PHYSICAL_POINTER) { printk("%s: assigning address to rsdp\n", __FUNCTION__); diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 2883a4d4f01..07471bba2dc 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -74,10 +74,6 @@ int pit_latch_buggy; /* extern */ #include "do_timer.h" -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - unsigned int cpu_khz; /* Detected as we calibrate the TSC */ EXPORT_SYMBOL(cpu_khz); @@ -444,8 +440,8 @@ static int time_init_device(void) device_initcall(time_init_device); -#ifdef CONFIG_HPET_TIMER extern void (*late_time_init)(void); +#ifdef CONFIG_HPET_TIMER /* Duplicate of time_init() below, with hpet_enable part added */ static void __init hpet_time_init(void) { @@ -462,6 +458,11 @@ static void __init hpet_time_init(void) printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); time_init_hook(); + +#ifdef CONFIG_X86_LOCAL_APIC + if (enable_local_apic >= 0) + APIC_late_time_init(); +#endif } #endif @@ -486,4 +487,9 @@ void __init time_init(void) printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); time_init_hook(); + +#ifdef CONFIG_X86_LOCAL_APIC + if (enable_local_apic >= 0) + late_time_init = APIC_late_time_init; +#endif } diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 658c0629ba6..9caeaa315cd 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -275,6 +275,7 @@ static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ; static unsigned long PIE_count; static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ +static unsigned int hpet_t1_cmp; /* cached comparator register */ /* * Timer 1 for RTC, we do not use periodic interrupt feature, @@ -306,10 +307,12 @@ int hpet_rtc_timer_init(void) cnt = hpet_readl(HPET_COUNTER); cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); hpet_writel(cnt, HPET_T1_CMP); + hpet_t1_cmp = cnt; local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); - cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; + cfg &= ~HPET_TN_PERIODIC; + cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; hpet_writel(cfg, HPET_T1_CFG); return 1; @@ -319,8 +322,12 @@ static void hpet_rtc_timer_reinit(void) { unsigned int cfg, cnt; - if (!(PIE_on | AIE_on | UIE_on)) + if (unlikely(!(PIE_on | AIE_on | UIE_on))) { + cfg = hpet_readl(HPET_T1_CFG); + cfg &= ~HPET_TN_ENABLE; + hpet_writel(cfg, HPET_T1_CFG); return; + } if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) hpet_rtc_int_freq = PIE_freq; @@ -328,15 +335,10 @@ static void hpet_rtc_timer_reinit(void) hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; /* It is more accurate to use the comparator value than current count.*/ - cnt = hpet_readl(HPET_T1_CMP); + cnt = hpet_t1_cmp; cnt += hpet_tick*HZ/hpet_rtc_int_freq; hpet_writel(cnt, HPET_T1_CMP); - - cfg = hpet_readl(HPET_T1_CFG); - cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T1_CFG); - - return; + hpet_t1_cmp = cnt; } /* diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index d973a8b681f..be242723c33 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -30,23 +30,28 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; * basic equation: * ns = cycles / (freq / ns_per_sec) * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_mhz * 10^6)) - * ns = cycles * (10^3 / cpu_mhz) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) * * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^3 * SC / cpu_mhz) / SC + * ns = cycles * (10^6 * SC / cpu_khz) / SC * ns = cycles * cyc2ns_scale / SC * * And since SC is a constant power of two, we can convert the div * into a shift. + * + * We can use khz divisor instead of mhz to keep a better percision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ static unsigned long cyc2ns_scale; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) +static inline void set_cyc2ns_scale(unsigned long cpu_khz) { - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; + cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) @@ -163,7 +168,7 @@ static int __init init_hpet(char* override) printk("Detected %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } - set_cyc2ns_scale(cpu_khz/1000); + set_cyc2ns_scale(cpu_khz); } /* set this only when cpu_has_tsc */ timer_hpet.read_timer = read_timer_tsc; diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 6dd470cc9f7..d395e3b4248 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -49,23 +49,28 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; * basic equation: * ns = cycles / (freq / ns_per_sec) * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_mhz * 10^6)) - * ns = cycles * (10^3 / cpu_mhz) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) * * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^3 * SC / cpu_mhz) / SC + * ns = cycles * (10^6 * SC / cpu_khz) / SC * ns = cycles * cyc2ns_scale / SC * * And since SC is a constant power of two, we can convert the div - * into a shift. + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better percision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ static unsigned long cyc2ns_scale; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) +static inline void set_cyc2ns_scale(unsigned long cpu_khz) { - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; + cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) @@ -286,7 +291,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (use_tsc) { if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq); - set_cyc2ns_scale(cpu_khz/1000); + set_cyc2ns_scale(cpu_khz); } } #endif @@ -536,7 +541,7 @@ static int __init init_tsc(char* override) printk("Detected %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } - set_cyc2ns_scale(cpu_khz/1000); + set_cyc2ns_scale(cpu_khz); return 0; } } diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 19e90bdd84e..c34d1bfc516 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -488,6 +488,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs, tss->io_bitmap_max - thread->io_bitmap_max); tss->io_bitmap_max = thread->io_bitmap_max; tss->io_bitmap_base = IO_BITMAP_OFFSET; + tss->io_bitmap_owner = thread; put_cpu(); return; } diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index 16b48500962..fc1993564f9 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -134,17 +134,16 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) return ret; } -static void mark_screen_rdonly(struct task_struct * tsk) +static void mark_screen_rdonly(struct mm_struct *mm) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte, *mapped; + pte_t *pte; + spinlock_t *ptl; int i; - preempt_disable(); - spin_lock(&tsk->mm->page_table_lock); - pgd = pgd_offset(tsk->mm, 0xA0000); + pgd = pgd_offset(mm, 0xA0000); if (pgd_none_or_clear_bad(pgd)) goto out; pud = pud_offset(pgd, 0xA0000); @@ -153,16 +152,14 @@ static void mark_screen_rdonly(struct task_struct * tsk) pmd = pmd_offset(pud, 0xA0000); if (pmd_none_or_clear_bad(pmd)) goto out; - pte = mapped = pte_offset_map(pmd, 0xA0000); + pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); for (i = 0; i < 32; i++) { if (pte_present(*pte)) set_pte(pte, pte_wrprotect(*pte)); pte++; } - pte_unmap(mapped); + pte_unmap_unlock(pte, ptl); out: - spin_unlock(&tsk->mm->page_table_lock); - preempt_enable(); flush_tlb(); } @@ -306,7 +303,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk tsk->thread.screen_bitmap = info->screen_bitmap; if (info->flags & VM86_SCREEN_BITMAP) - mark_screen_rdonly(tsk); + mark_screen_rdonly(tsk->mm); __asm__ __volatile__( "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t" "movl %0,%%esp\n\t" |