diff options
Diffstat (limited to 'arch/x86/kernel')
75 files changed, 865 insertions, 608 deletions
diff --git a/arch/x86/kernel/.gitignore b/arch/x86/kernel/.gitignore index 40836ad9079..4ea38a39aed 100644 --- a/arch/x86/kernel/.gitignore +++ b/arch/x86/kernel/.gitignore @@ -1 +1,2 @@ vsyscall.lds +vsyscall_32.lds diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 45855c97923..38573340b14 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -3,3 +3,7 @@ include ${srctree}/arch/x86/kernel/Makefile_32 else include ${srctree}/arch/x86/kernel/Makefile_64 endif + +# Workaround to delete .lds files with make clean +# The problem is that we do not enter Makefile_32 with make clean. +clean-files := vsyscall*.lds vsyscall*.so diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 index 7ff02063b85..a3fa11f8f46 100644 --- a/arch/x86/kernel/Makefile_32 +++ b/arch/x86/kernel/Makefile_32 @@ -51,7 +51,7 @@ obj-$(CONFIG_SCx200) += scx200_32.o # We must build both images before we can assemble it. # Note: kbuild does not track this dependency due to usage of .incbin $(obj)/vsyscall_32.o: $(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so -targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so) +targets += $(foreach F,int80 sysenter,vsyscall-$F_32.o vsyscall-$F_32.so) targets += vsyscall-note_32.o vsyscall_32.lds # The DSO images are built using a special linker script. diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 11b03d3c6fd..3bd2688bd44 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -63,11 +63,11 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt); /* Use inline assembly to define this because the nops are defined as inline assembly strings in the include files and we cannot get them easily into strings. */ -asm("\t.data\nintelnops: " +asm("\t.section .rodata, \"a\"\nintelnops: " GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 GENERIC_NOP7 GENERIC_NOP8); -extern unsigned char intelnops[]; -static unsigned char *intel_nops[ASM_NOP_MAX+1] = { +extern const unsigned char intelnops[]; +static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { NULL, intelnops, intelnops + 1, @@ -81,11 +81,11 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = { #endif #ifdef K8_NOP1 -asm("\t.data\nk8nops: " +asm("\t.section .rodata, \"a\"\nk8nops: " K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 K8_NOP7 K8_NOP8); -extern unsigned char k8nops[]; -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { +extern const unsigned char k8nops[]; +static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { NULL, k8nops, k8nops + 1, @@ -99,11 +99,11 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = { #endif #ifdef K7_NOP1 -asm("\t.data\nk7nops: " +asm("\t.section .rodata, \"a\"\nk7nops: " K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 K7_NOP7 K7_NOP8); -extern unsigned char k7nops[]; -static unsigned char *k7_nops[ASM_NOP_MAX+1] = { +extern const unsigned char k7nops[]; +static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { NULL, k7nops, k7nops + 1, @@ -116,28 +116,49 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = { }; #endif +#ifdef P6_NOP1 +asm("\t.section .rodata, \"a\"\np6nops: " + P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 + P6_NOP7 P6_NOP8); +extern const unsigned char p6nops[]; +static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { + NULL, + p6nops, + p6nops + 1, + p6nops + 1 + 2, + p6nops + 1 + 2 + 3, + p6nops + 1 + 2 + 3 + 4, + p6nops + 1 + 2 + 3 + 4 + 5, + p6nops + 1 + 2 + 3 + 4 + 5 + 6, + p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +#endif + #ifdef CONFIG_X86_64 extern char __vsyscall_0; -static inline unsigned char** find_nop_table(void) +static inline const unsigned char*const * find_nop_table(void) { - return k8_nops; + return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || + boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; } #else /* CONFIG_X86_64 */ -static struct nop { +static const struct nop { int cpuid; - unsigned char **noptable; + const unsigned char *const *noptable; } noptypes[] = { { X86_FEATURE_K8, k8_nops }, { X86_FEATURE_K7, k7_nops }, + { X86_FEATURE_P4, p6_nops }, + { X86_FEATURE_P3, p6_nops }, { -1, NULL } }; -static unsigned char** find_nop_table(void) +static const unsigned char*const * find_nop_table(void) { - unsigned char **noptable = intel_nops; + const unsigned char *const *noptable = intel_nops; int i; for (i = 0; noptypes[i].cpuid >= 0; i++) { @@ -154,7 +175,7 @@ static unsigned char** find_nop_table(void) /* Use this to add nops to a buffer, then text_poke the whole buffer. */ static void add_nops(void *insns, unsigned int len) { - unsigned char **noptable = find_nop_table(); + const unsigned char *const *noptable = find_nop_table(); while (len > 0) { unsigned int noplen = len; @@ -369,8 +390,8 @@ void apply_paravirt(struct paravirt_patch_site *start, BUG_ON(p->len > MAX_PATCH_LEN); /* prep the buffer with the original instructions */ memcpy(insnbuf, p->instr, p->len); - used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf, - (unsigned long)p->instr, p->len); + used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf, + (unsigned long)p->instr, p->len); BUG_ON(used > p->len); @@ -415,9 +436,6 @@ void __init alternative_instructions(void) alternatives_smp_unlock(__smp_locks, __smp_locks_end, _text, _etext); } - free_init_pages("SMP alternatives", - (unsigned long)__smp_locks, - (unsigned long)__smp_locks_end); } else { alternatives_smp_module_add(NULL, "core kernel", __smp_locks, __smp_locks_end, @@ -428,6 +446,11 @@ void __init alternative_instructions(void) apply_paravirt(__parainstructions, __parainstructions_end); local_irq_restore(flags); + if (smp_alt_once) + free_init_pages("SMP alternatives", + (unsigned long)__smp_locks, + (unsigned long)__smp_locks_end); + restart_nmi(); #ifdef CONFIG_X86_MCE restart_mce(); diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3d67ae18d76..793341fffc8 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1277,6 +1277,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) /* see sw-dev-man vol 3, chapter 7.4.13.5 */ printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " "should never happen.\n", smp_processor_id()); + __get_cpu_var(irq_stat).irq_spurious_count++; irq_exit(); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 09b82093bc7..f47bc493dba 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -974,15 +974,12 @@ void __init setup_boot_APIC_clock (void) */ void __cpuinit check_boot_apic_timer_broadcast(void) { - struct clock_event_device *levt = &per_cpu(lapic_events, boot_cpu_id); - if (!disable_apic_timer || (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) return; printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n"); lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; - levt->features |= CLOCK_EVT_FEAT_DUMMY; local_irq_enable(); clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id); @@ -1143,6 +1140,7 @@ asmlinkage void smp_spurious_interrupt(void) if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) ack_APIC_irq(); + add_pda(irq_spurious_count, 1); irq_exit(); } diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 8029742c0fc..f1b7cdda82b 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -116,12 +116,14 @@ void foo(void) #ifdef CONFIG_PARAVIRT BLANK(); - OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled); - OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable); - OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable); - OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit); - OFFSET(PARAVIRT_iret, paravirt_ops, iret); - OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); + OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); + OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); + OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); + OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); + OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); + OFFSET(PV_CPU_iret, pv_cpu_ops, iret); + OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); + OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); #endif #ifdef CONFIG_XEN diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index dcf6bbb1c7c..5f8af875f45 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -4,6 +4,7 @@ #include <asm/io.h> #include <asm/processor.h> #include <asm/apic.h> +#include <asm/mach_apic.h> #include "cpu.h" @@ -45,13 +46,17 @@ static __cpuinit int amd_apic_timer_broken(void) case CPUID_XFAM_10H: case CPUID_XFAM_11H: rdmsr(MSR_K8_ENABLE_C1E, lo, hi); - if (lo & ENABLE_C1E_MASK) + if (lo & ENABLE_C1E_MASK) { + if (smp_processor_id() != boot_cpu_physical_apicid) + printk(KERN_INFO "AMD C1E detected late. " + " Force timer broadcast.\n"); return 1; - break; - default: - /* err on the side of caution */ + } + break; + default: + /* err on the side of caution */ return 1; - } + } return 0; } #endif diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ffd01e5dcb5..2ca43ba32bc 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -595,7 +595,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) dmi_check_system(sw_any_bug_dmi_table); if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; - policy->cpus = cpu_core_map[cpu]; + policy->cpus = per_cpu(cpu_core_map, cpu); } #endif diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 8eb414b906d..793eae854f4 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -200,7 +200,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) unsigned int i; #ifdef CONFIG_SMP - policy->cpus = cpu_sibling_map[policy->cpu]; + policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); #endif /* Errata workaround */ diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 7decd6a50ff..f3686a5f230 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -565,7 +565,7 @@ static unsigned int powernow_get(unsigned int cpu) } -static int __init acer_cpufreq_pst(struct dmi_system_id *d) +static int __init acer_cpufreq_pst(const struct dmi_system_id *d) { printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident); printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index b273b69cfdd..c06ac680c9c 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -57,7 +57,7 @@ static struct powernow_k8_data *powernow_data[NR_CPUS]; static int cpu_family = CPU_OPTERON; #ifndef CONFIG_SMP -static cpumask_t cpu_core_map[1]; +DEFINE_PER_CPU(cpumask_t, cpu_core_map); #endif /* Return a frequency in MHz, given an input fid */ @@ -667,7 +667,7 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); data->powernow_table = powernow_table; - if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) print_basics(data); for (j = 0; j < data->numps; j++) @@ -821,7 +821,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) /* fill in data */ data->numps = data->acpi_data.state_count; - if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) print_basics(data); powernow_k8_acpi_pst_values(data, 0); @@ -1214,7 +1214,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (cpu_family == CPU_HW_PSTATE) pol->cpus = cpumask_of_cpu(pol->cpu); else - pol->cpus = cpu_core_map[pol->cpu]; + pol->cpus = per_cpu(cpu_core_map, pol->cpu); data->available_cores = &(pol->cpus); /* Take a crude guess here. @@ -1281,7 +1281,7 @@ static unsigned int powernowk8_get (unsigned int cpu) cpumask_t oldmask = current->cpus_allowed; unsigned int khz = 0; - data = powernow_data[first_cpu(cpu_core_map[cpu])]; + data = powernow_data[first_cpu(per_cpu(cpu_core_map, cpu))]; if (!data) return -EINVAL; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 36685e8f7be..14d68aa301e 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -322,7 +322,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) /* only run on CPU to be set, or on its sibling */ #ifdef CONFIG_SMP - policy->cpus = cpu_sibling_map[policy->cpu]; + policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); #endif cpus_allowed = current->cpus_allowed; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index dc4e08147b1..cc8c501b9f3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <asm/processor.h> +#include <asm/pgtable.h> #include <asm/msr.h> #include <asm/uaccess.h> @@ -19,8 +20,6 @@ #include <mach_apic.h> #endif -extern int trap_init_f00f_bug(void); - #ifdef CONFIG_X86_INTEL_USERCOPY /* * Alignment at which movsl is preferred for bulk memory copies. @@ -95,6 +94,20 @@ static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) return 1; } +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); +} +#endif + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index db6c25aa577..1826395ebee 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -170,15 +170,15 @@ union l3_cache { unsigned val; }; -static const unsigned short assocs[] = { +static unsigned short assocs[] __cpuinitdata = { [1] = 1, [2] = 2, [4] = 4, [6] = 8, [8] = 16, [0xa] = 32, [0xb] = 48, [0xc] = 64, [0xf] = 0xffff // ?? }; -static const unsigned char levels[] = { 1, 1, 2, 3 }; -static const unsigned char types[] = { 1, 2, 3, 3 }; +static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; +static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, union _cpuid4_leaf_ebx *ebx, @@ -493,8 +493,8 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) } } #else -static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} -static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} +static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) {} +static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {} #endif static void free_cache_attributes(unsigned int cpu) @@ -794,8 +794,9 @@ static int __cpuinit cache_sysfs_init(void) register_hotcpu_notifier(&cacheinfo_cpu_notifier); for_each_online_cpu(i) { - cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE, - (void *)(long)i); + struct sys_device *sys_dev = get_cpu_sysdev((unsigned int)i); + + cache_add_dev(sys_dev); } return 0; diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 1509edfb231..be4dabfee1f 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -61,6 +61,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs) { irq_enter(); vendor_thermal_interrupt(regs); + __get_cpu_var(irq_stat).irq_thermal_count++; irq_exit(); } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 1203dc5ab87..494d320d909 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -152,7 +152,7 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block thermal_throttle_cpu_notifier = +static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = { .notifier_call = thermal_throttle_cpu_callback, }; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index c48b6fea5ab..5e4be30ff90 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -738,13 +738,7 @@ void mtrr_ap_init(void) */ void mtrr_save_state(void) { - int cpu = get_cpu(); - - if (cpu == 0) - mtrr_save_fixed_ranges(NULL); - else - smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); - put_cpu(); + smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); } static int __init mtrr_init_finialize(void) diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 93fecd4b03d..54cdbf1a40f 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -34,7 +34,7 @@ struct wd_ops { u64 checkbit; }; -static struct wd_ops *wd_ops; +static const struct wd_ops *wd_ops; /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) @@ -317,7 +317,7 @@ static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); } -static struct wd_ops k7_wd_ops = { +static const struct wd_ops k7_wd_ops = { .reserve = single_msr_reserve, .unreserve = single_msr_unreserve, .setup = setup_k7_watchdog, @@ -380,7 +380,7 @@ static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); } -static struct wd_ops p6_wd_ops = { +static const struct wd_ops p6_wd_ops = { .reserve = single_msr_reserve, .unreserve = single_msr_unreserve, .setup = setup_p6_watchdog, @@ -532,7 +532,7 @@ static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); } -static struct wd_ops p4_wd_ops = { +static const struct wd_ops p4_wd_ops = { .reserve = p4_reserve, .unreserve = p4_unreserve, .setup = setup_p4_watchdog, @@ -550,6 +550,8 @@ static struct wd_ops p4_wd_ops = { #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK +static struct wd_ops intel_arch_wd_ops; + static int setup_intel_arch_watchdog(unsigned nmi_hz) { unsigned int ebx; @@ -591,11 +593,11 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; //unused - wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1); + intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); return 1; } -static struct wd_ops intel_arch_wd_ops = { +static struct wd_ops intel_arch_wd_ops __read_mostly = { .reserve = single_msr_reserve, .unreserve = single_msr_unreserve, .setup = setup_intel_arch_watchdog, diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 1e31b6caffb..879a0f789b1 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -122,7 +122,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_X86_HT if (c->x86_max_cores * smp_num_siblings > 1) { seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, n))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index f4548c93ccf..70dcf912d9f 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -43,8 +43,6 @@ static struct class *cpuid_class; -#ifdef CONFIG_SMP - struct cpuid_command { u32 reg; u32 *data; @@ -62,25 +60,11 @@ static inline void do_cpuid(int cpu, u32 reg, u32 * data) { struct cpuid_command cmd; - preempt_disable(); - if (cpu == smp_processor_id()) { - cpuid(reg, &data[0], &data[1], &data[2], &data[3]); - } else { - cmd.reg = reg; - cmd.data = data; + cmd.reg = reg; + cmd.data = data; - smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); - } - preempt_enable(); + smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); } -#else /* ! CONFIG_SMP */ - -static inline void do_cpuid(int cpu, u32 reg, u32 * data) -{ - cpuid(reg, &data[0], &data[1], &data[2], &data[3]); -} - -#endif /* ! CONFIG_SMP */ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) { @@ -150,7 +134,7 @@ static const struct file_operations cpuid_fops = { .open = cpuid_open, }; -static int cpuid_device_create(int i) +static int __cpuinit cpuid_device_create(int i) { int err = 0; struct device *dev; @@ -161,7 +145,9 @@ static int cpuid_device_create(int i) return err; } -static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 0f4d5e209e9..e422b8159f6 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -24,7 +24,7 @@ #include <asm/page.h> #include <asm/e820.h> #include <asm/proto.h> -#include <asm/bootsetup.h> +#include <asm/setup.h> #include <asm/sections.h> struct e820map e820; @@ -68,10 +68,15 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) /* initrd */ #ifdef CONFIG_BLK_DEV_INITRD - if (LOADER_TYPE && INITRD_START && last >= INITRD_START && - addr < INITRD_START+INITRD_SIZE) { - *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE); - return 1; + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; + unsigned long ramdisk_end = ramdisk_image+ramdisk_size; + + if (last >= ramdisk_image && addr < ramdisk_end) { + *addrp = PAGE_ALIGN(ramdisk_end); + return 1; + } } #endif /* kernel code */ @@ -594,8 +599,8 @@ void __init setup_memory_region(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - sanitize_e820_map(E820_MAP, &E820_MAP_NR); - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) + sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); + if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) early_panic("Cannot find a valid memory map"); printk(KERN_INFO "BIOS-provided physical RAM map:\n"); e820_print_map("BIOS-e820"); diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index fd9aff3f389..b7d6c23f287 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -6,15 +6,10 @@ #include <asm/io.h> #include <asm/processor.h> #include <asm/fcntl.h> +#include <asm/setup.h> #include <xen/hvc-console.h> /* Simple VGA output */ - -#ifdef __i386__ -#include <asm/setup.h> -#else -#include <asm/bootsetup.h> -#endif #define VGABASE (__ISA_IO_base + 0xb8000) static int max_ypos = 25, max_xpos = 80; @@ -234,10 +229,10 @@ static int __init setup_early_printk(char *buf) early_serial_init(buf); early_console = &early_serial_console; } else if (!strncmp(buf, "vga", 3) - && SCREEN_INFO.orig_video_isVGA == 1) { - max_xpos = SCREEN_INFO.orig_video_cols; - max_ypos = SCREEN_INFO.orig_video_lines; - current_ypos = SCREEN_INFO.orig_y; + && boot_params.screen_info.orig_video_isVGA == 1) { + max_xpos = boot_params.screen_info.orig_video_cols; + max_ypos = boot_params.screen_info.orig_video_lines; + current_ypos = boot_params.screen_info.orig_y; early_console = &early_vga_console; } else if (!strncmp(buf, "simnow", 6)) { simnow_init(buf + 6); diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 2452c6fbe99..b42558c48e9 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c @@ -331,11 +331,13 @@ void __init efi_init(void) memset(&efi, 0, sizeof(efi) ); memset(&efi_phys, 0, sizeof(efi_phys)); - efi_phys.systab = EFI_SYSTAB; - memmap.phys_map = EFI_MEMMAP; - memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; - memmap.desc_version = EFI_MEMDESC_VERSION; - memmap.desc_size = EFI_MEMDESC_SIZE; + efi_phys.systab = + (efi_system_table_t *)boot_params.efi_info.efi_systab; + memmap.phys_map = (void *)boot_params.efi_info.efi_memmap; + memmap.nr_map = boot_params.efi_info.efi_memmap_size/ + boot_params.efi_info.efi_memdesc_size; + memmap.desc_version = boot_params.efi_info.efi_memdesc_version; + memmap.desc_size = boot_params.efi_info.efi_memdesc_size; efi.systab = (efi_system_table_t *) boot_ioremap((unsigned long) efi_phys.systab, @@ -446,7 +448,8 @@ void __init efi_init(void) printk(KERN_ERR PFX "Could not map the runtime service table!\n"); /* Map the EFI memory map for use until paging_init() */ - memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); + memmap.map = boot_ioremap(boot_params.efi_info.efi_memmap, + boot_params.efi_info.efi_memmap_size); if (memmap.map == NULL) printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 8099fea0a72..dc7f938e501 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -437,7 +437,7 @@ ldt_ss: * is still available to implement the setting of the high * 16-bits in the INTERRUPT_RETURN paravirt-op. */ - cmpl $0, paravirt_ops+PARAVIRT_enabled + cmpl $0, pv_info+PARAVIRT_enabled jne restore_nocheck #endif diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index f1cacd4897f..3a058bb1640 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -988,7 +988,7 @@ child_rip: movq %rsi, %rdi call *%rax # exit - xorl %edi, %edi + mov %eax, %edi call do_exit CFI_ENDPROC ENDPROC(child_rip) diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 47496a40e84..4ae03e3e829 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -29,8 +29,6 @@ u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; EXPORT_SYMBOL(x86_cpu_to_apicid); -u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; - struct genapic __read_mostly *genapic = &apic_flat; /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index ecb01eefdd2..91c7526768e 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -52,7 +52,6 @@ static void flat_init_apic_ldr(void) num = smp_processor_id(); id = 1UL << num; - x86_cpu_to_log_apicid[num] = id; apic_write(APIC_DFR, APIC_DFR_FLAT); val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; val |= SET_APIC_LOGICAL_ID(id); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 8561f626eda..a7eee0a4751 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -14,7 +14,6 @@ #include <asm/processor.h> #include <asm/proto.h> #include <asm/smp.h> -#include <asm/bootsetup.h> #include <asm/setup.h> #include <asm/desc.h> #include <asm/pgtable.h> @@ -36,26 +35,15 @@ static void __init clear_bss(void) (unsigned long) __bss_stop - (unsigned long) __bss_start); } -#define NEW_CL_POINTER 0x228 /* Relative to real mode data */ -#define OLD_CL_MAGIC_ADDR 0x20 -#define OLD_CL_MAGIC 0xA33F -#define OLD_CL_OFFSET 0x22 - static void __init copy_bootdata(char *real_mode_data) { - unsigned long new_data; char * command_line; - memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); - new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER); - if (!new_data) { - if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) { - return; - } - new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET); + memcpy(&boot_params, real_mode_data, sizeof boot_params); + if (boot_params.hdr.cmd_line_ptr) { + command_line = __va(boot_params.hdr.cmd_line_ptr); + memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); } - command_line = __va(new_data); - memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); } void __init x86_64_start_kernel(char * real_mode_data) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 9150ca9b5f8..39677965e16 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -51,6 +51,15 @@ */ LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) +/* + * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate + * pagetables from above the 16MB DMA limit, so we'll have to set + * up pagetables 16MB more (worst-case): + */ +#ifdef CONFIG_DEBUG_PAGEALLOC +LOW_PAGES = LOW_PAGES + 0x1000000 +#endif + #if PTRS_PER_PMD > 1 PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD #else @@ -443,6 +452,7 @@ early_page_fault: early_fault: cld #ifdef CONFIG_PRINTK + pusha movl $(__KERNEL_DS),%eax movl %eax,%ds movl %eax,%es @@ -534,8 +544,15 @@ int_msg: .asciz "Unknown interrupt or fault at EIP %p %p %p\n" fault_msg: - .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" - .asciz "Stack: %p %p %p %p %p %p %p %p\n" + .ascii \ +/* fault info: */ "BUG: Int %d: CR2 %p\n" \ +/* pusha regs: */ " EDI %p ESI %p EBP %p ESP %p\n" \ + " EBX %p EDX %p ECX %p EAX %p\n" \ +/* fault frame: */ " err %p EIP %p CS %p flg %p\n" \ + \ + "Stack: %p %p %p %p %p %p %p %p\n" \ + " %p %p %p %p %p %p %p %p\n" \ + " %p %p %p %p %p %p %p %p\n" #include "../../x86/xen/xen-head.S" diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index e3d4b73bfdb..edd39ccf139 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -1,4 +1,5 @@ #include <linux/module.h> +#include <asm/semaphore.h> #include <asm/checksum.h> #include <asm/desc.h> diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c index 679bb33acbf..d34a10cc13a 100644 --- a/arch/x86/kernel/i8259_32.c +++ b/arch/x86/kernel/i8259_32.c @@ -349,7 +349,11 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id) * New motherboards sometimes make IRQ 13 be a PCI interrupt, * so allow interrupt sharing. */ -static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL }; +static struct irqaction fpu_irq = { + .handler = math_error_irq, + .mask = CPU_MASK_NONE, + .name = "fpu", +}; void __init init_ISA_irqs (void) { diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index eb72976cc13..3f27ea0b981 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -395,7 +395,11 @@ device_initcall(i8259A_init_sysfs); * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [0 ... IRQ0_VECTOR - 1] = -1, [IRQ0_VECTOR] = 0, diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index e2f4a1c6854..5f10c718953 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -378,7 +378,7 @@ static struct irq_cpu_info { #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) static cpumask_t balance_irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL @@ -598,7 +598,7 @@ tryanotherirq: * (A+B)/2 vs B */ load = CPU_IRQ(min_loaded) >> 1; - for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { + for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) { if (load > CPU_IRQ(j)) { /* This won't change cpu_sibling_map[min_loaded] */ load = CPU_IRQ(j); @@ -1296,6 +1296,11 @@ static void __init setup_IO_APIC_irqs(void) continue; } + if (!first_notcon) { + apic_printk(APIC_VERBOSE, " not connected.\n"); + first_notcon = 1; + } + entry.trigger = irq_trigger(idx); entry.polarity = irq_polarity(idx); diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 966fa106249..1c2c7bf6a9d 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -875,6 +875,10 @@ static void __init setup_IO_APIC_irqs(void) apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); continue; } + if (!first_notcon) { + apic_printk(APIC_VERBOSE, " not connected.\n"); + first_notcon = 1; + } irq = pin_2_irq(idx, apic, pin); add_pin_to_irq(irq, apic, pin); @@ -885,7 +889,7 @@ static void __init setup_IO_APIC_irqs(void) } if (!first_notcon) - apic_printk(APIC_VERBOSE," not connected.\n"); + apic_printk(APIC_VERBOSE, " not connected.\n"); } /* @@ -1845,7 +1849,7 @@ static struct sysdev_class ioapic_sysdev_class = { static int __init ioapic_init_sysfs(void) { struct sys_device * dev; - int i, size, error = 0; + int i, size, error; error = sysdev_class_register(&ioapic_sysdev_class); if (error) @@ -1854,12 +1858,11 @@ static int __init ioapic_init_sysfs(void) for (i = 0; i < nr_ioapics; i++ ) { size = sizeof(struct sys_device) + nr_ioapic_registers[i] * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); + mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); if (!mp_ioapic_data[i]) { printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); continue; } - memset(mp_ioapic_data[i], 0, size); dev = &mp_ioapic_data[i]->dev; dev->id = i; dev->cls = &ioapic_sysdev_class; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index e173b763f14..d3fde94f734 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -255,9 +255,17 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { + unsigned any_count = 0; + spin_lock_irqsave(&irq_desc[i].lock, flags); +#ifndef CONFIG_SMP + any_count = kstat_irqs(i); +#else + for_each_online_cpu(j) + any_count |= kstat_cpu(j).irqs[i]; +#endif action = irq_desc[i].action; - if (!action) + if (!action && !any_count) goto skip; seq_printf(p, "%3d: ",i); #ifndef CONFIG_SMP @@ -268,10 +276,12 @@ int show_interrupts(struct seq_file *p, void *v) #endif seq_printf(p, " %8s", irq_desc[i].chip->name); seq_printf(p, "-%-8s", irq_desc[i].name); - seq_printf(p, " %s", action->name); - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); + } seq_putc(p, '\n'); skip: @@ -280,14 +290,41 @@ skip: seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", nmi_count(j)); - seq_putc(p, '\n'); + seq_printf(p, " Non-maskable interrupts\n"); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs); - seq_putc(p, '\n'); + seq_printf(p, " Local timer interrupts\n"); #endif +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_call_count); + seq_printf(p, " function call interrupts\n"); + seq_printf(p, "TLB: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); +#endif + seq_printf(p, "TRM: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); + seq_printf(p, "SPU: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 865669efc54..6b5c730d67b 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -62,9 +62,17 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { + unsigned any_count = 0; + spin_lock_irqsave(&irq_desc[i].lock, flags); +#ifndef CONFIG_SMP + any_count = kstat_irqs(i); +#else + for_each_online_cpu(j) + any_count |= kstat_cpu(j).irqs[i]; +#endif action = irq_desc[i].action; - if (!action) + if (!action && !any_count) goto skip; seq_printf(p, "%3d: ",i); #ifndef CONFIG_SMP @@ -76,9 +84,11 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, " %8s", irq_desc[i].chip->name); seq_printf(p, "-%-8s", irq_desc[i].name); - seq_printf(p, " %s", action->name); - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); + } seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); @@ -86,11 +96,37 @@ skip: seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); - seq_putc(p, '\n'); + seq_printf(p, " Non-maskable interrupts\n"); seq_printf(p, "LOC: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); - seq_putc(p, '\n'); + seq_printf(p, " Local timer interrupts\n"); +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); + seq_printf(p, " function call interrupts\n"); + seq_printf(p, "TLB: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); +#endif + seq_printf(p, "TRM: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); + seq_printf(p, "THR: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); + seq_printf(p, " Threshold APIC interrupts\n"); + seq_printf(p, "SPU: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); } return 0; diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c index e7d0d3c2ef6..90f778c04b3 100644 --- a/arch/x86/kernel/kprobes_32.c +++ b/arch/x86/kernel/kprobes_32.c @@ -41,6 +41,13 @@ void jprobe_return_end(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +struct kretprobe_blackpoint kretprobe_blacklist[] = { + {"__switch_to", }, /* This function switches only current task, but + doesn't switch kernel stack.*/ + {NULL, NULL} /* Terminator */ +}; +const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); + /* insert a jmp code */ static __always_inline void set_jmp_op(void *from, void *to) { @@ -584,7 +591,7 @@ out: return 1; } -static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -666,7 +673,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_GPF: - case DIE_PAGE_FAULT: /* kprobe_running() needs smp_processor_id() */ preempt_disable(); if (kprobe_running() && diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c index 62e28e52d78..681b801c5e2 100644 --- a/arch/x86/kernel/kprobes_64.c +++ b/arch/x86/kernel/kprobes_64.c @@ -48,6 +48,13 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +struct kretprobe_blackpoint kretprobe_blacklist[] = { + {"__switch_to", }, /* This function switches only current task, but + doesn't switch kernel stack.*/ + {NULL, NULL} /* Terminator */ +}; +const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); + /* * returns non-zero if opcode modifies the interrupt flag. */ @@ -657,7 +664,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_GPF: - case DIE_PAGE_FAULT: /* kprobe_running() needs smp_processor_id() */ preempt_disable(); if (kprobe_running() && diff --git a/arch/x86/kernel/ldt_32.c b/arch/x86/kernel/ldt_32.c index a8b18421863..9ff90a27c45 100644 --- a/arch/x86/kernel/ldt_32.c +++ b/arch/x86/kernel/ldt_32.c @@ -92,13 +92,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) struct mm_struct * old_mm; int retval = 0; - init_MUTEX(&mm->context.sem); + mutex_init(&mm->context.lock); mm->context.size = 0; old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); + mutex_lock(&old_mm->context.lock); retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); + mutex_unlock(&old_mm->context.lock); } return retval; } @@ -130,7 +130,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - down(&mm->context.sem); + mutex_lock(&mm->context.lock); size = mm->context.size*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -138,7 +138,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) err = 0; if (copy_to_user(ptr, mm->context.ldt, size)) err = -EFAULT; - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); if (err < 0) goto error_return; if (size != bytecount) { @@ -194,7 +194,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) goto out; } - down(&mm->context.sem); + mutex_lock(&mm->context.lock); if (ldt_info.entry_number >= mm->context.size) { error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); if (error < 0) @@ -221,7 +221,7 @@ install: error = 0; out_unlock: - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); out: return error; } diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt_64.c index 3796523d616..60e57abb8e9 100644 --- a/arch/x86/kernel/ldt_64.c +++ b/arch/x86/kernel/ldt_64.c @@ -96,13 +96,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) struct mm_struct * old_mm; int retval = 0; - init_MUTEX(&mm->context.sem); + mutex_init(&mm->context.lock); mm->context.size = 0; old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); + mutex_lock(&old_mm->context.lock); retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); + mutex_unlock(&old_mm->context.lock); } return retval; } @@ -133,7 +133,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - down(&mm->context.sem); + mutex_lock(&mm->context.lock); size = mm->context.size*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -141,7 +141,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) err = 0; if (copy_to_user(ptr, mm->context.ldt, size)) err = -EFAULT; - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); if (err < 0) goto error_return; if (size != bytecount) { @@ -193,7 +193,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) goto out; } - down(&mm->context.sem); + mutex_lock(&mm->context.lock); if (ldt_info.entry_number >= (unsigned)mm->context.size) { error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); if (error < 0) @@ -223,7 +223,7 @@ install: error = 0; out_unlock: - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); out: return error; } diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index deda9a221cf..8459ca64bc2 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -10,6 +10,7 @@ #include <linux/kexec.h> #include <linux/delay.h> #include <linux/init.h> +#include <linux/numa.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> @@ -169,3 +170,15 @@ static int __init parse_crashkernel(char *arg) return 0; } early_param("crashkernel", parse_crashkernel); + +void arch_crash_save_vmcoreinfo(void) +{ +#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif +#ifdef CONFIG_X86_PAE + VMCOREINFO_CONFIG(X86_PAE); +#endif +} + diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index cd1899a2f0c..7450b69710b 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -10,6 +10,7 @@ #include <linux/kexec.h> #include <linux/string.h> #include <linux/reboot.h> +#include <linux/numa.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> @@ -257,3 +258,11 @@ static int __init setup_crashkernel(char *arg) } early_param("crashkernel", setup_crashkernel); +void arch_crash_save_vmcoreinfo(void) +{ +#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif +} + diff --git a/arch/x86/kernel/mce_64.c b/arch/x86/kernel/mce_64.c index a66d607f5b9..8ca8f864896 100644 --- a/arch/x86/kernel/mce_64.c +++ b/arch/x86/kernel/mce_64.c @@ -76,9 +76,6 @@ void mce_log(struct mce *mce) wmb(); for (;;) { entry = rcu_dereference(mcelog.next); - /* The rmb forces the compiler to reload next in each - iteration */ - rmb(); for (;;) { /* When the buffer fills up discard new entries. Assume that the earlier errors are the more interesting. */ @@ -698,8 +695,6 @@ static int __init mcheck_disable(char *str) mce=nobootlog Don't log MCEs from before booting. */ static int __init mcheck_enable(char *str) { - if (*str == '=') - str++; if (!strcmp(str, "off")) mce_dont_init = 1; else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) @@ -712,7 +707,7 @@ static int __init mcheck_enable(char *str) } __setup("nomce", mcheck_disable); -__setup("mce", mcheck_enable); +__setup("mce=", mcheck_enable); /* * Sysfs support diff --git a/arch/x86/kernel/mce_amd_64.c b/arch/x86/kernel/mce_amd_64.c index 2f8a7f18b0f..0d2afd96aca 100644 --- a/arch/x86/kernel/mce_amd_64.c +++ b/arch/x86/kernel/mce_amd_64.c @@ -237,6 +237,7 @@ asmlinkage void mce_threshold_interrupt(void) } } out: + add_pda(irq_threshold_count, 1); irq_exit(); } @@ -472,7 +473,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifdef CONFIG_SMP if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) { /* symlink */ - i = first_cpu(cpu_core_map[cpu]); + i = first_cpu(per_cpu(cpu_core_map, cpu)); /* first core not up yet */ if (cpu_data[i].cpu_core_id) @@ -492,7 +493,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out; - b->cpus = cpu_core_map[cpu]; + b->cpus = per_cpu(cpu_core_map, cpu); per_cpu(threshold_banks, cpu)[bank] = b; goto out; } @@ -509,7 +510,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifndef CONFIG_SMP b->cpus = CPU_MASK_ALL; #else - b->cpus = cpu_core_map[cpu]; + b->cpus = per_cpu(cpu_core_map, cpu); #endif err = kobject_register(&b->kobj); if (err) diff --git a/arch/x86/kernel/mce_intel_64.c b/arch/x86/kernel/mce_intel_64.c index 6551505d8a2..c17eaf5dd6d 100644 --- a/arch/x86/kernel/mce_intel_64.c +++ b/arch/x86/kernel/mce_intel_64.c @@ -26,6 +26,7 @@ asmlinkage void smp_thermal_interrupt(void) if (therm_throt_process(msr_val & 1)) mce_log_therm_throt_event(smp_processor_id(), msr_val); + add_pda(irq_thermal_count, 1); irq_exit(); } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index c044de310b6..df85c9c1360 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -133,7 +133,7 @@ static const struct file_operations msr_fops = { .open = msr_open, }; -static int msr_device_create(int i) +static int __cpuinit msr_device_create(int i) { int err = 0; struct device *dev; @@ -144,7 +144,7 @@ static int msr_device_create(int i) return err; } -static int msr_class_cpu_callback(struct notifier_block *nfb, +static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c index 739cfb207dd..6a80d67c212 100644 --- a/arch/x86/kernel/paravirt_32.c +++ b/arch/x86/kernel/paravirt_32.c @@ -42,32 +42,33 @@ void _paravirt_nop(void) static void __init default_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", - paravirt_ops.name); + pv_info.name); } char *memory_setup(void) { - return paravirt_ops.memory_setup(); + return pv_init_ops.memory_setup(); } /* Simple instruction patching code. */ -#define DEF_NATIVE(name, code) \ - extern const char start_##name[], end_##name[]; \ - asm("start_" #name ": " code "; end_" #name ":") - -DEF_NATIVE(irq_disable, "cli"); -DEF_NATIVE(irq_enable, "sti"); -DEF_NATIVE(restore_fl, "push %eax; popf"); -DEF_NATIVE(save_fl, "pushf; pop %eax"); -DEF_NATIVE(iret, "iret"); -DEF_NATIVE(irq_enable_sysexit, "sti; sysexit"); -DEF_NATIVE(read_cr2, "mov %cr2, %eax"); -DEF_NATIVE(write_cr3, "mov %eax, %cr3"); -DEF_NATIVE(read_cr3, "mov %cr3, %eax"); -DEF_NATIVE(clts, "clts"); -DEF_NATIVE(read_tsc, "rdtsc"); - -DEF_NATIVE(ud2a, "ud2a"); +#define DEF_NATIVE(ops, name, code) \ + extern const char start_##ops##_##name[], end_##ops##_##name[]; \ + asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") + +DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); +DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); +DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); +DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); +DEF_NATIVE(pv_cpu_ops, iret, "iret"); +DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); +DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); +DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); +DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); +DEF_NATIVE(pv_cpu_ops, clts, "clts"); +DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); + +/* Undefined instruction for dealing with missing ops pointers. */ +static const unsigned char ud2a[] = { 0x0f, 0x0b }; static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) @@ -76,37 +77,29 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned ret; switch(type) { -#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site - SITE(irq_disable); - SITE(irq_enable); - SITE(restore_fl); - SITE(save_fl); - SITE(iret); - SITE(irq_enable_sysexit); - SITE(read_cr2); - SITE(read_cr3); - SITE(write_cr3); - SITE(clts); - SITE(read_tsc); +#define SITE(ops, x) \ + case PARAVIRT_PATCH(ops.x): \ + start = start_##ops##_##x; \ + end = end_##ops##_##x; \ + goto patch_site + + SITE(pv_irq_ops, irq_disable); + SITE(pv_irq_ops, irq_enable); + SITE(pv_irq_ops, restore_fl); + SITE(pv_irq_ops, save_fl); + SITE(pv_cpu_ops, iret); + SITE(pv_cpu_ops, irq_enable_sysexit); + SITE(pv_mmu_ops, read_cr2); + SITE(pv_mmu_ops, read_cr3); + SITE(pv_mmu_ops, write_cr3); + SITE(pv_cpu_ops, clts); + SITE(pv_cpu_ops, read_tsc); #undef SITE patch_site: ret = paravirt_patch_insns(ibuf, len, start, end); break; - case PARAVIRT_PATCH(make_pgd): - case PARAVIRT_PATCH(make_pte): - case PARAVIRT_PATCH(pgd_val): - case PARAVIRT_PATCH(pte_val): -#ifdef CONFIG_X86_PAE - case PARAVIRT_PATCH(make_pmd): - case PARAVIRT_PATCH(pmd_val): -#endif - /* These functions end up returning exactly what - they're passed, in the same registers. */ - ret = paravirt_patch_nop(); - break; - default: ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); break; @@ -150,7 +143,7 @@ unsigned paravirt_patch_call(void *insnbuf, return 5; } -unsigned paravirt_patch_jmp(const void *target, void *insnbuf, +unsigned paravirt_patch_jmp(void *insnbuf, const void *target, unsigned long addr, unsigned len) { struct branch *b = insnbuf; @@ -165,22 +158,37 @@ unsigned paravirt_patch_jmp(const void *target, void *insnbuf, return 5; } +/* Neat trick to map patch type back to the call within the + * corresponding structure. */ +static void *get_call_destination(u8 type) +{ + struct paravirt_patch_template tmpl = { + .pv_init_ops = pv_init_ops, + .pv_time_ops = pv_time_ops, + .pv_cpu_ops = pv_cpu_ops, + .pv_irq_ops = pv_irq_ops, + .pv_apic_ops = pv_apic_ops, + .pv_mmu_ops = pv_mmu_ops, + }; + return *((void **)&tmpl + type); +} + unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, unsigned long addr, unsigned len) { - void *opfunc = *((void **)¶virt_ops + type); + void *opfunc = get_call_destination(type); unsigned ret; if (opfunc == NULL) /* If there's no function, patch it with a ud2a (BUG) */ - ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a); + ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); else if (opfunc == paravirt_nop) /* If the operation is a nop, then nop the callsite */ ret = paravirt_patch_nop(); - else if (type == PARAVIRT_PATCH(iret) || - type == PARAVIRT_PATCH(irq_enable_sysexit)) + else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || + type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit)) /* If operation requires a jmp, then jmp */ - ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len); + ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); else /* Otherwise call the function; assume target could clobber any caller-save reg */ @@ -205,7 +213,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len, void init_IRQ(void) { - paravirt_ops.init_IRQ(); + pv_irq_ops.init_IRQ(); } static void native_flush_tlb(void) @@ -233,7 +241,7 @@ extern void native_irq_enable_sysexit(void); static int __init print_banner(void) { - paravirt_ops.banner(); + pv_init_ops.banner(); return 0; } core_initcall(print_banner); @@ -273,47 +281,96 @@ int paravirt_disable_iospace(void) return ret; } -struct paravirt_ops paravirt_ops = { +static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; + +static inline void enter_lazy(enum paravirt_lazy_mode mode) +{ + BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); + BUG_ON(preemptible()); + + x86_write_percpu(paravirt_lazy_mode, mode); +} + +void paravirt_leave_lazy(enum paravirt_lazy_mode mode) +{ + BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode); + BUG_ON(preemptible()); + + x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); +} + +void paravirt_enter_lazy_mmu(void) +{ + enter_lazy(PARAVIRT_LAZY_MMU); +} + +void paravirt_leave_lazy_mmu(void) +{ + paravirt_leave_lazy(PARAVIRT_LAZY_MMU); +} + +void paravirt_enter_lazy_cpu(void) +{ + enter_lazy(PARAVIRT_LAZY_CPU); +} + +void paravirt_leave_lazy_cpu(void) +{ + paravirt_leave_lazy(PARAVIRT_LAZY_CPU); +} + +enum paravirt_lazy_mode paravirt_get_lazy_mode(void) +{ + return x86_read_percpu(paravirt_lazy_mode); +} + +struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, .kernel_rpl = 0, .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ +}; - .patch = native_patch, +struct pv_init_ops pv_init_ops = { + .patch = native_patch, .banner = default_banner, .arch_setup = paravirt_nop, .memory_setup = machine_specific_memory_setup, +}; + +struct pv_time_ops pv_time_ops = { + .time_init = hpet_time_init, .get_wallclock = native_get_wallclock, .set_wallclock = native_set_wallclock, - .time_init = hpet_time_init, + .sched_clock = native_sched_clock, + .get_cpu_khz = native_calculate_cpu_khz, +}; + +struct pv_irq_ops pv_irq_ops = { .init_IRQ = native_init_IRQ, + .save_fl = native_save_fl, + .restore_fl = native_restore_fl, + .irq_disable = native_irq_disable, + .irq_enable = native_irq_enable, + .safe_halt = native_safe_halt, + .halt = native_halt, +}; +struct pv_cpu_ops pv_cpu_ops = { .cpuid = native_cpuid, .get_debugreg = native_get_debugreg, .set_debugreg = native_set_debugreg, .clts = native_clts, .read_cr0 = native_read_cr0, .write_cr0 = native_write_cr0, - .read_cr2 = native_read_cr2, - .write_cr2 = native_write_cr2, - .read_cr3 = native_read_cr3, - .write_cr3 = native_write_cr3, .read_cr4 = native_read_cr4, .read_cr4_safe = native_read_cr4_safe, .write_cr4 = native_write_cr4, - .save_fl = native_save_fl, - .restore_fl = native_restore_fl, - .irq_disable = native_irq_disable, - .irq_enable = native_irq_enable, - .safe_halt = native_safe_halt, - .halt = native_halt, .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, .write_msr = native_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, - .sched_clock = native_sched_clock, - .get_cpu_khz = native_calculate_cpu_khz, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, .load_gdt = native_load_gdt, @@ -327,9 +384,19 @@ struct paravirt_ops paravirt_ops = { .write_idt_entry = write_dt_entry, .load_esp0 = native_load_esp0, + .irq_enable_sysexit = native_irq_enable_sysexit, + .iret = native_iret, + .set_iopl_mask = native_set_iopl_mask, .io_delay = native_io_delay, + .lazy_mode = { + .enter = paravirt_nop, + .leave = paravirt_nop, + }, +}; + +struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC .apic_write = native_apic_write, .apic_write_atomic = native_apic_write_atomic, @@ -338,11 +405,17 @@ struct paravirt_ops paravirt_ops = { .setup_secondary_clock = setup_secondary_APIC_clock, .startup_ipi_hook = paravirt_nop, #endif - .set_lazy_mode = paravirt_nop, +}; +struct pv_mmu_ops pv_mmu_ops = { .pagetable_setup_start = native_pagetable_setup_start, .pagetable_setup_done = native_pagetable_setup_done, + .read_cr2 = native_read_cr2, + .write_cr2 = native_write_cr2, + .read_cr3 = native_read_cr3, + .write_cr3 = native_write_cr3, + .flush_tlb_user = native_flush_tlb, .flush_tlb_kernel = native_flush_tlb_global, .flush_tlb_single = native_flush_tlb_single, @@ -381,12 +454,19 @@ struct paravirt_ops paravirt_ops = { .make_pte = native_make_pte, .make_pgd = native_make_pgd, - .irq_enable_sysexit = native_irq_enable_sysexit, - .iret = native_iret, - .dup_mmap = paravirt_nop, .exit_mmap = paravirt_nop, .activate_mm = paravirt_nop, + + .lazy_mode = { + .enter = paravirt_nop, + .leave = paravirt_nop, + }, }; -EXPORT_SYMBOL(paravirt_ops); +EXPORT_SYMBOL_GPL(pv_time_ops); +EXPORT_SYMBOL_GPL(pv_cpu_ops); +EXPORT_SYMBOL_GPL(pv_mmu_ops); +EXPORT_SYMBOL_GPL(pv_apic_ops); +EXPORT_SYMBOL_GPL(pv_info); +EXPORT_SYMBOL (pv_irq_ops); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 71da01e73f0..5098f58063a 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -35,6 +35,7 @@ #include <linux/pci_ids.h> #include <linux/pci.h> #include <linux/delay.h> +#include <linux/scatterlist.h> #include <asm/iommu.h> #include <asm/calgary.h> #include <asm/tce.h> @@ -221,10 +222,10 @@ static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen) return npages; } -static inline int translate_phb(struct pci_dev* dev) +static inline int translation_enabled(struct iommu_table *tbl) { - int disabled = bus_info[dev->bus->number].translation_disabled; - return !disabled; + /* only PHBs with translation enabled have an IOMMU table */ + return (tbl != NULL); } static void iommu_range_reserve(struct iommu_table *tbl, @@ -384,31 +385,32 @@ static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, int direction) { struct iommu_table *tbl = find_iommu_table(dev); + struct scatterlist *s; + int i; - if (!translate_phb(to_pci_dev(dev))) + if (!translation_enabled(tbl)) return; - while (nelems--) { + for_each_sg(sglist, s, nelems, i) { unsigned int npages; - dma_addr_t dma = sglist->dma_address; - unsigned int dmalen = sglist->dma_length; + dma_addr_t dma = s->dma_address; + unsigned int dmalen = s->dma_length; if (dmalen == 0) break; npages = num_dma_pages(dma, dmalen); iommu_free(tbl, dma, npages); - sglist++; } } static int calgary_nontranslate_map_sg(struct device* dev, struct scatterlist *sg, int nelems, int direction) { + struct scatterlist *s; int i; - for (i = 0; i < nelems; i++ ) { - struct scatterlist *s = &sg[i]; + for_each_sg(sg, s, nelems, i) { BUG_ON(!s->page); s->dma_address = virt_to_bus(page_address(s->page) +s->offset); s->dma_length = s->length; @@ -420,16 +422,16 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, int nelems, int direction) { struct iommu_table *tbl = find_iommu_table(dev); + struct scatterlist *s; unsigned long vaddr; unsigned int npages; unsigned long entry; int i; - if (!translate_phb(to_pci_dev(dev))) + if (!translation_enabled(tbl)) return calgary_nontranslate_map_sg(dev, sg, nelems, direction); - for (i = 0; i < nelems; i++ ) { - struct scatterlist *s = &sg[i]; + for_each_sg(sg, s, nelems, i) { BUG_ON(!s->page); vaddr = (unsigned long)page_address(s->page) + s->offset; @@ -454,9 +456,9 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, return nelems; error: calgary_unmap_sg(dev, sg, nelems, direction); - for (i = 0; i < nelems; i++) { - sg[i].dma_address = bad_dma_address; - sg[i].dma_length = 0; + for_each_sg(sg, s, nelems, i) { + sg->dma_address = bad_dma_address; + sg->dma_length = 0; } return 0; } @@ -472,7 +474,7 @@ static dma_addr_t calgary_map_single(struct device *dev, void *vaddr, uaddr = (unsigned long)vaddr; npages = num_dma_pages(uaddr, size); - if (translate_phb(to_pci_dev(dev))) + if (translation_enabled(tbl)) dma_handle = iommu_alloc(tbl, vaddr, npages, direction); else dma_handle = virt_to_bus(vaddr); @@ -486,7 +488,7 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, struct iommu_table *tbl = find_iommu_table(dev); unsigned int npages; - if (!translate_phb(to_pci_dev(dev))) + if (!translation_enabled(tbl)) return; npages = num_dma_pages(dma_handle, size); @@ -511,7 +513,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, goto error; memset(ret, 0, size); - if (translate_phb(to_pci_dev(dev))) { + if (translation_enabled(tbl)) { /* set up tces to cover the allocated range */ mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL); if (mapping == bad_dma_address) @@ -1192,7 +1194,7 @@ static int __init calgary_init(void) { int ret; struct pci_dev *dev = NULL; - void *tce_space; + struct calgary_bus_info *info; ret = calgary_locate_bbars(); if (ret) @@ -1204,12 +1206,14 @@ static int __init calgary_init(void) break; if (!is_cal_pci_dev(dev->device)) continue; - if (!translate_phb(dev)) { + + info = &bus_info[dev->bus->number]; + if (info->translation_disabled) { calgary_init_one_nontraslated(dev); continue; } - tce_space = bus_info[dev->bus->number].tce_space; - if (!tce_space && !translate_empty_slots) + + if (!info->tce_space && !translate_empty_slots) continue; ret = calgary_init_one(dev); @@ -1227,11 +1231,13 @@ error: break; if (!is_cal_pci_dev(dev->device)) continue; - if (!translate_phb(dev)) { + + info = &bus_info[dev->bus->number]; + if (info->translation_disabled) { pci_dev_put(dev); continue; } - if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots) + if (!info->tce_space && !translate_empty_slots) continue; calgary_disable_translation(dev); @@ -1544,7 +1550,7 @@ static void __init calgary_fixup_one_tce_space(struct pci_dev *dev) static int __init calgary_fixup_tce_spaces(void) { struct pci_dev *dev = NULL; - void *tce_space; + struct calgary_bus_info *info; if (no_iommu || swiotlb || !calgary_detected) return -ENODEV; @@ -1557,11 +1563,12 @@ static int __init calgary_fixup_tce_spaces(void) break; if (!is_cal_pci_dev(dev->device)) continue; - if (!translate_phb(dev)) + + info = &bus_info[dev->bus->number]; + if (info->translation_disabled) continue; - tce_space = bus_info[dev->bus->number].tce_space; - if (!tce_space) + if (!info->tce_space) continue; calgary_fixup_one_tce_space(dev); diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 0aae2f3847a..51330321a5d 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -12,7 +12,6 @@ #include <linux/string.h> #include <linux/pci.h> #include <linux/module.h> -#include <linux/pci.h> #include <asm/io.h> struct dma_coherent_mem { diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 9576a2eb375..b2b42bdb0a1 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -51,11 +51,9 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) { struct page *page; int node; -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) - node = pcibus_to_node(to_pci_dev(dev)->bus); - else -#endif + + node = dev_to_node(dev); + if (node == -1) node = numa_node_id(); if (node < first_node(node_online_map)) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 4918c575d58..5cdfab65e93 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -8,6 +8,7 @@ * See Documentation/DMA-mapping.txt for the interface specification. * * Copyright 2002 Andi Kleen, SuSE Labs. + * Subject to the GNU General Public License v2 only. */ #include <linux/types.h> @@ -23,6 +24,7 @@ #include <linux/interrupt.h> #include <linux/bitops.h> #include <linux/kdebug.h> +#include <linux/scatterlist.h> #include <asm/atomic.h> #include <asm/io.h> #include <asm/mtrr.h> @@ -278,10 +280,10 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, */ static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) { + struct scatterlist *s; int i; - for (i = 0; i < nents; i++) { - struct scatterlist *s = &sg[i]; + for_each_sg(sg, s, nents, i) { if (!s->dma_length || !s->length) break; gart_unmap_single(dev, s->dma_address, s->dma_length, dir); @@ -292,14 +294,14 @@ static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, int nents, int dir) { + struct scatterlist *s; int i; #ifdef CONFIG_IOMMU_DEBUG printk(KERN_DEBUG "dma_map_sg overflow\n"); #endif - for (i = 0; i < nents; i++ ) { - struct scatterlist *s = &sg[i]; + for_each_sg(sg, s, nents, i) { unsigned long addr = page_to_phys(s->page) + s->offset; if (nonforced_iommu(dev, addr, s->length)) { addr = dma_map_area(dev, addr, s->length, dir); @@ -319,23 +321,23 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, } /* Map multiple scatterlist entries continuous into the first. */ -static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, +static int __dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { unsigned long iommu_start = alloc_iommu(pages); unsigned long iommu_page = iommu_start; + struct scatterlist *s; int i; if (iommu_start == -1) return -1; - - for (i = start; i < stopat; i++) { - struct scatterlist *s = &sg[i]; + + for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; - BUG_ON(i > start && s->offset); - if (i == start) { + BUG_ON(s != start && s->offset); + if (s == start) { *sout = *s; sout->dma_address = iommu_bus_base; sout->dma_address += iommu_page*PAGE_SIZE + s->offset; @@ -357,30 +359,32 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, return 0; } -static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, +static inline int dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages, int need) { - if (!need) { - BUG_ON(stopat - start != 1); - *sout = sg[start]; - sout->dma_length = sg[start].length; + if (!need) { + BUG_ON(nelems != 1); + *sout = *start; + sout->dma_length = start->length; return 0; - } - return __dma_map_cont(sg, start, stopat, sout, pages); + } + return __dma_map_cont(start, nelems, sout, pages); } /* * DMA map all entries in a scatterlist. * Merge chunks that have page aligned sizes into a continuous mapping. */ -int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) +static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, + int dir) { int i; int out; int start; unsigned long pages = 0; int need = 0, nextneed; + struct scatterlist *s, *ps, *start_sg, *sgmap; if (nents == 0) return 0; @@ -390,8 +394,9 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) out = 0; start = 0; - for (i = 0; i < nents; i++) { - struct scatterlist *s = &sg[i]; + start_sg = sgmap = sg; + ps = NULL; /* shut up gcc */ + for_each_sg(sg, s, nents, i) { dma_addr_t addr = page_to_phys(s->page) + s->offset; s->dma_address = addr; BUG_ON(s->length == 0); @@ -400,29 +405,33 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) /* Handle the previous not yet processed entries */ if (i > start) { - struct scatterlist *ps = &sg[i-1]; /* Can only merge when the last chunk ends on a page boundary and the new one doesn't have an offset. */ if (!iommu_merge || !nextneed || !need || s->offset || - (ps->offset + ps->length) % PAGE_SIZE) { - if (dma_map_cont(sg, start, i, sg+out, pages, - need) < 0) + (ps->offset + ps->length) % PAGE_SIZE) { + if (dma_map_cont(start_sg, i - start, sgmap, + pages, need) < 0) goto error; out++; + sgmap = sg_next(sgmap); pages = 0; - start = i; + start = i; + start_sg = s; } } need = nextneed; pages += to_pages(s->offset, s->length); + ps = s; } - if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) + if (dma_map_cont(start_sg, i - start, sgmap, pages, need) < 0) goto error; out++; flush_gart(); - if (out < nents) - sg[out].dma_length = 0; + if (out < nents) { + sgmap = sg_next(sgmap); + sgmap->dma_length = 0; + } return out; error: @@ -437,8 +446,8 @@ error: if (panic_on_overflow) panic("dma_map_sg: overflow on %lu pages\n", pages); iommu_full(dev, pages << PAGE_SHIFT, dir); - for (i = 0; i < nents; i++) - sg[i].dma_address = bad_dma_address; + for_each_sg(sg, s, nents, i) + s->dma_address = bad_dma_address; return 0; } diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index 2a34c6c025a..e85d4360360 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -5,6 +5,7 @@ #include <linux/pci.h> #include <linux/string.h> #include <linux/dma-mapping.h> +#include <linux/scatterlist.h> #include <asm/iommu.h> #include <asm/processor.h> @@ -57,10 +58,10 @@ static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size, static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction) { + struct scatterlist *s; int i; - for (i = 0; i < nents; i++ ) { - struct scatterlist *s = &sg[i]; + for_each_sg(sg, s, nents, i) { BUG_ON(!s->page); s->dma_address = virt_to_bus(page_address(s->page) +s->offset); if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 7352d4b377e..6309b275cb9 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -581,7 +581,7 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, * * Kprobes not supported here. Set the probe on schedule instead. */ -__kprobes struct task_struct * +struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c index 0cecd7513c9..99102ec5fad 100644 --- a/arch/x86/kernel/ptrace_32.c +++ b/arch/x86/kernel/ptrace_32.c @@ -165,7 +165,7 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_ seg &= ~7UL; - down(&child->mm->context.sem); + mutex_lock(&child->mm->context.lock); if (unlikely((seg >> 3) >= child->mm->context.size)) addr = -1L; /* bogus selector, access would fault */ else { @@ -179,7 +179,7 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_ addr &= 0xffff; addr += base; } - up(&child->mm->context.sem); + mutex_unlock(&child->mm->context.lock); } return addr; } @@ -524,11 +524,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = 0; break; - case PTRACE_DETACH: - /* detach a process that was attached. */ - ret = ptrace_detach(child, data); - break; - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) { ret = -EIO; diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c index c0cac42df3b..607085f3f08 100644 --- a/arch/x86/kernel/ptrace_64.c +++ b/arch/x86/kernel/ptrace_64.c @@ -103,7 +103,7 @@ unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *r seg &= ~7UL; - down(&child->mm->context.sem); + mutex_lock(&child->mm->context.lock); if (unlikely((seg >> 3) >= child->mm->context.size)) addr = -1L; /* bogus selector, access would fault */ else { @@ -117,7 +117,7 @@ unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *r addr &= 0xffff; addr += base; } - up(&child->mm->context.sem); + mutex_unlock(&child->mm->context.lock); } return addr; @@ -500,11 +500,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = 0; break; - case PTRACE_DETACH: - /* detach a process that was attached. */ - ret = ptrace_detach(child, data); - break; - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ if (!access_ok(VERIFY_WRITE, (unsigned __user *)data, sizeof(struct user_regs_struct))) { diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index 1200aaac403..ba918823505 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -11,7 +11,6 @@ #include <linux/bootmem.h> #include <linux/bitops.h> #include <linux/module.h> -#include <asm/bootsetup.h> #include <asm/pda.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -23,8 +22,9 @@ #include <asm/percpu.h> #include <asm/proto.h> #include <asm/sections.h> +#include <asm/setup.h> -char x86_boot_params[BOOT_PARAM_SIZE] __initdata; +struct boot_params __initdata boot_params; cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index c8e1bc38d42..b87a6fd5ba4 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -137,10 +137,11 @@ EXPORT_SYMBOL(edd); */ static inline void copy_edd(void) { - memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature)); - memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info)); - edd.mbr_signature_nr = EDD_MBR_SIG_NR; - edd.edd_info_nr = EDD_NR; + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, + sizeof(edd.mbr_signature)); + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; + edd.edd_info_nr = boot_params.eddbuf_entries; } #else static inline void copy_edd(void) @@ -434,17 +435,20 @@ void __init setup_bootmem_allocator(void) #endif numa_kva_reserve(); #ifdef CONFIG_BLK_DEV_INITRD - if (LOADER_TYPE && INITRD_START) { - if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { - reserve_bootmem(INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START + PAGE_OFFSET; - initrd_end = initrd_start+INITRD_SIZE; - } - else { + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; + unsigned long ramdisk_end = ramdisk_image + ramdisk_size; + unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; + + if (ramdisk_end <= end_of_lowmem) { + reserve_bootmem(ramdisk_image, ramdisk_size); + initrd_start = ramdisk_image + PAGE_OFFSET; + initrd_end = initrd_start+ramdisk_size; + } else { printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - INITRD_START + INITRD_SIZE, - max_low_pfn << PAGE_SHIFT); + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + ramdisk_end, end_of_lowmem); initrd_start = 0; } } @@ -512,28 +516,29 @@ void __init setup_arch(char **cmdline_p) * the system table is valid. If not, then initialize normally. */ #ifdef CONFIG_EFI - if ((LOADER_TYPE == 0x50) && EFI_SYSTAB) + if ((boot_params.hdr.type_of_loader == 0x50) && + boot_params.efi_info.efi_systab) efi_enabled = 1; #endif - ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); - screen_info = SCREEN_INFO; - edid_info = EDID_INFO; - apm_info.bios = APM_BIOS_INFO; - ist_info = IST_INFO; - saved_videomode = VIDEO_MODE; - if( SYS_DESC_TABLE.length != 0 ) { - set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2); - machine_id = SYS_DESC_TABLE.table[0]; - machine_submodel_id = SYS_DESC_TABLE.table[1]; - BIOS_revision = SYS_DESC_TABLE.table[2]; + ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); + screen_info = boot_params.screen_info; + edid_info = boot_params.edid_info; + apm_info.bios = boot_params.apm_bios_info; + ist_info = boot_params.ist_info; + saved_videomode = boot_params.hdr.vid_mode; + if( boot_params.sys_desc_table.length != 0 ) { + set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2); + machine_id = boot_params.sys_desc_table.table[0]; + machine_submodel_id = boot_params.sys_desc_table.table[1]; + BIOS_revision = boot_params.sys_desc_table.table[2]; } - bootloader_type = LOADER_TYPE; + bootloader_type = boot_params.hdr.type_of_loader; #ifdef CONFIG_BLK_DEV_RAM - rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); #endif ARCH_SETUP if (efi_enabled) @@ -545,7 +550,7 @@ void __init setup_arch(char **cmdline_p) copy_edd(); - if (!MOUNT_ROOT_RDONLY) + if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index b7da90e79c7..5a19f0cc5b6 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -52,7 +52,6 @@ #include <asm/dma.h> #include <asm/mpspec.h> #include <asm/mmu_context.h> -#include <asm/bootsetup.h> #include <asm/proto.h> #include <asm/setup.h> #include <asm/mach_apic.h> @@ -180,10 +179,11 @@ EXPORT_SYMBOL(edd); */ static inline void copy_edd(void) { - memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature)); - memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info)); - edd.mbr_signature_nr = EDD_MBR_SIG_NR; - edd.edd_info_nr = EDD_NR; + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, + sizeof(edd.mbr_signature)); + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; + edd.edd_info_nr = boot_params.eddbuf_entries; } #else static inline void copy_edd(void) @@ -220,21 +220,21 @@ void __init setup_arch(char **cmdline_p) { printk(KERN_INFO "Command line: %s\n", boot_command_line); - ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); - screen_info = SCREEN_INFO; - edid_info = EDID_INFO; - saved_video_mode = SAVED_VIDEO_MODE; - bootloader_type = LOADER_TYPE; + ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); + screen_info = boot_params.screen_info; + edid_info = boot_params.edid_info; + saved_video_mode = boot_params.hdr.vid_mode; + bootloader_type = boot_params.hdr.type_of_loader; #ifdef CONFIG_BLK_DEV_RAM - rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); #endif setup_memory_region(); copy_edd(); - if (!MOUNT_ROOT_RDONLY) + if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; init_mm.start_code = (unsigned long) &_text; init_mm.end_code = (unsigned long) &_etext; @@ -339,17 +339,20 @@ void __init setup_arch(char **cmdline_p) */ find_smp_config(); #ifdef CONFIG_BLK_DEV_INITRD - if (LOADER_TYPE && INITRD_START) { - if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { - reserve_bootmem_generic(INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START + PAGE_OFFSET; - initrd_end = initrd_start+INITRD_SIZE; - } - else { + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; + unsigned long ramdisk_end = ramdisk_image + ramdisk_size; + unsigned long end_of_mem = end_pfn << PAGE_SHIFT; + + if (ramdisk_end <= end_of_mem) { + reserve_bootmem_generic(ramdisk_image, ramdisk_size); + initrd_start = ramdisk_image + PAGE_OFFSET; + initrd_end = initrd_start+ramdisk_size; + } else { printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - (unsigned long)(INITRD_START + INITRD_SIZE), - (unsigned long)(end_pfn << PAGE_SHIFT)); + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + ramdisk_end, end_of_mem); initrd_start = 0; } } @@ -601,7 +604,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) level = cpuid_eax(1); if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); - if (c->x86 == 0x10) + if (c->x86 == 0x10 || c->x86 == 0x11) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); /* Enable workaround for FXSAVE leak */ @@ -965,7 +968,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) * applications want to get the raw CPUID data, they should access * /dev/cpu/<cpu_nr>/cpuid instead. */ - static char *x86_cap_flags[] = { + static const char *const x86_cap_flags[] = { /* Intel-defined */ "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", @@ -1019,7 +1022,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; - static char *x86_power_flags[] = { + static const char *const x86_power_flags[] = { "ts", /* temperature sensor */ "fid", /* frequency id control */ "vid", /* voltage id control */ @@ -1070,7 +1073,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (smp_num_siblings * c->x86_max_cores > 1) { int cpu = c - cpu_data; seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, cpu))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index d01d51fcce2..0d79df3c563 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -385,7 +385,6 @@ static int setup_frame(int sig, struct k_sigaction *ka, regs->edx = (unsigned long) 0; regs->ecx = (unsigned long) 0; - set_fs(USER_DS); regs->xds = __USER_DS; regs->xes = __USER_DS; regs->xss = __USER_DS; @@ -479,7 +478,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->edx = (unsigned long) &frame->info; regs->ecx = (unsigned long) &frame->uc; - set_fs(USER_DS); regs->xds = __USER_DS; regs->xes = __USER_DS; regs->xss = __USER_DS; diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c index 2d35d850202..791d9f8036a 100644 --- a/arch/x86/kernel/smp_32.c +++ b/arch/x86/kernel/smp_32.c @@ -342,6 +342,7 @@ fastcall void smp_invalidate_interrupt(struct pt_regs *regs) smp_mb__after_clear_bit(); out: put_cpu_no_resched(); + __get_cpu_var(irq_stat).irq_tlb_count++; } void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, @@ -640,6 +641,7 @@ static void native_smp_send_stop(void) fastcall void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); + __get_cpu_var(irq_stat).irq_resched_count++; } fastcall void smp_call_function_interrupt(struct pt_regs *regs) @@ -660,6 +662,7 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs) */ irq_enter(); (*func)(info); + __get_cpu_var(irq_stat).irq_call_count++; irq_exit(); if (wait) { @@ -705,3 +708,10 @@ struct smp_ops smp_ops = { .smp_send_reschedule = native_smp_send_reschedule, .smp_call_function_mask = native_smp_call_function_mask, }; + +int smp_call_function_mask(cpumask_t mask, void (*func) (void *info), + void *info, int wait) +{ + return smp_ops.smp_call_function_mask(mask, func, info, wait); +} +EXPORT_SYMBOL(smp_call_function_mask); diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index df4a82812ad..5c2964727d1 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -163,6 +163,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) out: ack_APIC_irq(); cpu_clear(cpu, f->flush_cpumask); + add_pda(irq_tlb_count, 1); } static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, @@ -493,6 +494,7 @@ void smp_send_stop(void) asmlinkage void smp_reschedule_interrupt(void) { ack_APIC_irq(); + add_pda(irq_resched_count, 1); } asmlinkage void smp_call_function_interrupt(void) @@ -514,6 +516,7 @@ asmlinkage void smp_call_function_interrupt(void) exit_idle(); irq_enter(); (*func)(info); + add_pda(irq_call_count, 1); irq_exit(); if (wait) { mb(); diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index e4f61d1c624..be3faac0471 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -70,12 +70,12 @@ EXPORT_SYMBOL(smp_num_siblings); int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; /* representing HT siblings of each logical CPU */ -cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(cpu_sibling_map); +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); /* representing HT and core siblings of each logical CPU */ -cpumask_t cpu_core_map[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(cpu_core_map); +DEFINE_PER_CPU(cpumask_t, cpu_core_map); +EXPORT_PER_CPU_SYMBOL(cpu_core_map); /* bitmap of online cpus */ cpumask_t cpu_online_map __read_mostly; @@ -102,8 +102,8 @@ u8 apicid_2_node[MAX_APICID]; * Trampoline 80x86 program as an array. */ -extern unsigned char trampoline_data []; -extern unsigned char trampoline_end []; +extern const unsigned char trampoline_data []; +extern const unsigned char trampoline_end []; static unsigned char *trampoline_base; static int trampoline_exec; @@ -118,7 +118,7 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 }; * has made sure it's suitably aligned. */ -static unsigned long __devinit setup_trampoline(void) +static unsigned long __cpuinit setup_trampoline(void) { memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); return virt_to_phys(trampoline_base); @@ -300,7 +300,7 @@ cpumask_t cpu_coregroup_map(int cpu) * And for power savings, we return cpu_core_map */ if (sched_mc_power_savings || sched_smt_power_savings) - return cpu_core_map[cpu]; + return per_cpu(cpu_core_map, cpu); else return c->llc_shared_map; } @@ -319,22 +319,22 @@ void __cpuinit set_cpu_sibling_map(int cpu) for_each_cpu_mask(i, cpu_sibling_setup_map) { if (c[cpu].phys_proc_id == c[i].phys_proc_id && c[cpu].cpu_core_id == c[i].cpu_core_id) { - cpu_set(i, cpu_sibling_map[cpu]); - cpu_set(cpu, cpu_sibling_map[i]); - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, per_cpu(cpu_sibling_map, cpu)); + cpu_set(cpu, per_cpu(cpu_sibling_map, i)); + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); cpu_set(i, c[cpu].llc_shared_map); cpu_set(cpu, c[i].llc_shared_map); } } } else { - cpu_set(cpu, cpu_sibling_map[cpu]); + cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); } cpu_set(cpu, c[cpu].llc_shared_map); if (current_cpu_data.x86_max_cores == 1) { - cpu_core_map[cpu] = cpu_sibling_map[cpu]; + per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); c[cpu].booted_cores = 1; return; } @@ -346,17 +346,17 @@ void __cpuinit set_cpu_sibling_map(int cpu) cpu_set(cpu, c[i].llc_shared_map); } if (c[cpu].phys_proc_id == c[i].phys_proc_id) { - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); /* * Does this new cpu bringup a new core? */ - if (cpus_weight(cpu_sibling_map[cpu]) == 1) { + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { /* * for each core in package, increment * the booted_cores for this new cpu */ - if (first_cpu(cpu_sibling_map[i]) == i) + if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) c[cpu].booted_cores++; /* * increment the core count for all @@ -983,8 +983,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) printk(KERN_NOTICE "Local APIC not detected." " Using dummy APIC emulation.\n"); map_cpu_to_logical_apicid(); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); return; } @@ -1008,8 +1008,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); smpboot_clear_io_apic_irqs(); phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); return; } @@ -1021,10 +1021,16 @@ static void __init smp_boot_cpus(unsigned int max_cpus) if (!max_cpus) { smp_found_config = 0; printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); + + if (nmi_watchdog == NMI_LOCAL_APIC) { + printk(KERN_INFO "activating minimal APIC for NMI watchdog use.\n"); + connect_bsp_APIC(); + setup_local_APIC(); + } smpboot_clear_io_apic_irqs(); phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); return; } @@ -1102,16 +1108,16 @@ static void __init smp_boot_cpus(unsigned int max_cpus) Dprintk("Boot done.\n"); /* - * construct cpu_sibling_map[], so that we can tell sibling CPUs + * construct cpu_sibling_map, so that we can tell sibling CPUs * efficiently. */ for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); } - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); smpboot_setup_io_apic(); @@ -1148,19 +1154,19 @@ void remove_siblinginfo(int cpu) int sibling; struct cpuinfo_x86 *c = cpu_data; - for_each_cpu_mask(sibling, cpu_core_map[cpu]) { - cpu_clear(cpu, cpu_core_map[sibling]); - /* + for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { + cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); + /*/ * last thread sibling in this cpu core going down */ - if (cpus_weight(cpu_sibling_map[cpu]) == 1) + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) c[sibling].booted_cores--; } - for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) - cpu_clear(cpu, cpu_sibling_map[sibling]); - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) + cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); c[cpu].phys_proc_id = 0; c[cpu].cpu_core_id = 0; cpu_clear(cpu, cpu_sibling_setup_map); diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 720a7d1f886..e351ac4ab5b 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -91,19 +91,19 @@ EXPORT_SYMBOL(cpu_data); int smp_threads_ready; /* representing HT siblings of each logical CPU */ -cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(cpu_sibling_map); +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); /* representing HT and core siblings of each logical CPU */ -cpumask_t cpu_core_map[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(cpu_core_map); +DEFINE_PER_CPU(cpumask_t, cpu_core_map); +EXPORT_PER_CPU_SYMBOL(cpu_core_map); /* * Trampoline 80x86 program as an array. */ -extern unsigned char trampoline_data[]; -extern unsigned char trampoline_end[]; +extern const unsigned char trampoline_data[]; +extern const unsigned char trampoline_end[]; /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; @@ -243,7 +243,7 @@ cpumask_t cpu_coregroup_map(int cpu) * And for power savings, we return cpu_core_map */ if (sched_mc_power_savings || sched_smt_power_savings) - return cpu_core_map[cpu]; + return per_cpu(cpu_core_map, cpu); else return c->llc_shared_map; } @@ -262,22 +262,22 @@ static inline void set_cpu_sibling_map(int cpu) for_each_cpu_mask(i, cpu_sibling_setup_map) { if (c[cpu].phys_proc_id == c[i].phys_proc_id && c[cpu].cpu_core_id == c[i].cpu_core_id) { - cpu_set(i, cpu_sibling_map[cpu]); - cpu_set(cpu, cpu_sibling_map[i]); - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, per_cpu(cpu_sibling_map, cpu)); + cpu_set(cpu, per_cpu(cpu_sibling_map, i)); + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); cpu_set(i, c[cpu].llc_shared_map); cpu_set(cpu, c[i].llc_shared_map); } } } else { - cpu_set(cpu, cpu_sibling_map[cpu]); + cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); } cpu_set(cpu, c[cpu].llc_shared_map); if (current_cpu_data.x86_max_cores == 1) { - cpu_core_map[cpu] = cpu_sibling_map[cpu]; + per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); c[cpu].booted_cores = 1; return; } @@ -289,17 +289,17 @@ static inline void set_cpu_sibling_map(int cpu) cpu_set(cpu, c[i].llc_shared_map); } if (c[cpu].phys_proc_id == c[i].phys_proc_id) { - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); /* * Does this new cpu bringup a new core? */ - if (cpus_weight(cpu_sibling_map[cpu]) == 1) { + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { /* * for each core in package, increment * the booted_cores for this new cpu */ - if (first_cpu(cpu_sibling_map[i]) == i) + if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) c[cpu].booted_cores++; /* * increment the core count for all @@ -695,7 +695,6 @@ do_rest: cpu_clear(cpu, cpu_present_map); cpu_clear(cpu, cpu_possible_map); x86_cpu_to_apicid[cpu] = BAD_APICID; - x86_cpu_to_log_apicid[cpu] = BAD_APICID; return -EIO; } @@ -735,8 +734,8 @@ static __init void disable_smp(void) phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); else phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); } #ifdef CONFIG_HOTPLUG_CPU @@ -971,19 +970,19 @@ static void remove_siblinginfo(int cpu) int sibling; struct cpuinfo_x86 *c = cpu_data; - for_each_cpu_mask(sibling, cpu_core_map[cpu]) { - cpu_clear(cpu, cpu_core_map[sibling]); + for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { + cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); /* * last thread sibling in this cpu core going down */ - if (cpus_weight(cpu_sibling_map[cpu]) == 1) + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) c[sibling].booted_cores--; } - for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) - cpu_clear(cpu, cpu_sibling_map[sibling]); - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) + cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); c[cpu].phys_proc_id = 0; c[cpu].cpu_core_id = 0; cpu_clear(cpu, cpu_sibling_setup_map); diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 413e527cdeb..6fa6cf036c7 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -33,7 +33,7 @@ static void save_stack_address(void *data, unsigned long addr) trace->entries[trace->nr_entries++] = addr; } -static struct stacktrace_ops save_stack_ops = { +static const struct stacktrace_ops save_stack_ops = { .warning = save_stack_warning, .warning_symbol = save_stack_warning_symbol, .stack = save_stack_stack, diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index f8bae9ba032..a86d26f036e 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -17,10 +17,10 @@ #include <linux/mman.h> #include <linux/file.h> #include <linux/utsname.h> +#include <linux/ipc.h> #include <asm/uaccess.h> #include <asm/unistd.h> -#include <asm/ipc.h> /* * sys_pipe() is the normal C calling standard for creating diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c index e3f2569b2c4..9e540fee700 100644 --- a/arch/x86/kernel/tce_64.c +++ b/arch/x86/kernel/tce_64.c @@ -40,9 +40,9 @@ static inline void flush_tce(void* tceaddr) { /* a single tce can't cross a cache line */ if (cpu_has_clflush) - asm volatile("clflush (%0)" :: "r" (tceaddr)); + clflush(tceaddr); else - asm volatile("wbinvd":::"memory"); + wbinvd(); } void tce_build(struct iommu_table *tbl, unsigned long index, diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index c25f23eb397..8caa0b77746 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -44,15 +44,15 @@ int arch_register_cpu(int num) * Also certain PCI quirks require not to enable hotplug control * for all CPU's. */ - if (num && enable_cpu_hotplug) +#ifdef CONFIG_HOTPLUG_CPU + if (num) cpu_devices[num].cpu.hotpluggable = 1; +#endif return register_cpu(&cpu_devices[num].cpu, num); } #ifdef CONFIG_HOTPLUG_CPU -int enable_cpu_hotplug = 1; - void arch_unregister_cpu(int num) { return unregister_cpu(&cpu_devices[num].cpu); } diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index f62815f8d06..9bcc1c6aca3 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S @@ -36,11 +36,11 @@ #include <asm/segment.h> #include <asm/page.h> -.data - /* We can free up trampoline after bootup if cpu hotplug is not supported. */ #ifndef CONFIG_HOTPLUG_CPU .section ".init.data","aw",@progbits +#else +.section .rodata,"a",@progbits #endif .code16 diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 607983b0d27..e30b67c6a9f 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -33,7 +33,12 @@ #include <asm/msr.h> #include <asm/segment.h> -.data +/* We can free up trampoline after bootup if cpu hotplug is not supported. */ +#ifndef CONFIG_HOTPLUG_CPU +.section .init.data, "aw", @progbits +#else +.section .rodata, "a", @progbits +#endif .code16 diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 05c27ecaf2a..b132d3957df 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -112,7 +112,7 @@ struct stack_frame { static inline unsigned long print_context_stack(struct thread_info *tinfo, unsigned long *stack, unsigned long ebp, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { #ifdef CONFIG_FRAME_POINTER struct stack_frame *frame = (struct stack_frame *)ebp; @@ -149,7 +149,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { unsigned long ebp = 0; @@ -221,7 +221,7 @@ static void print_trace_address(void *data, unsigned long addr) touch_nmi_watchdog(); } -static struct stacktrace_ops print_trace_ops = { +static const struct stacktrace_ops print_trace_ops = { .warning = print_trace_warning, .warning_symbol = print_trace_warning_symbol, .stack = print_trace_stack, @@ -398,31 +398,24 @@ void die(const char * str, struct pt_regs * regs, long err) local_save_flags(flags); if (++die.lock_owner_depth < 3) { - int nl = 0; unsigned long esp; unsigned short ss; report_bug(regs->eip, regs); - printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, + ++die_counter); #ifdef CONFIG_PREEMPT - printk(KERN_EMERG "PREEMPT "); - nl = 1; + printk("PREEMPT "); #endif #ifdef CONFIG_SMP - if (!nl) - printk(KERN_EMERG); printk("SMP "); - nl = 1; #endif #ifdef CONFIG_DEBUG_PAGEALLOC - if (!nl) - printk(KERN_EMERG); printk("DEBUG_PAGEALLOC"); - nl = 1; #endif - if (nl) - printk("\n"); + printk("\n"); + if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { @@ -1112,20 +1105,6 @@ asmlinkage void math_emulate(long arg) #endif /* CONFIG_MATH_EMULATION */ -#ifdef CONFIG_X86_F00F_BUG -void __init trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif - /* * This needs to use 'idt_table' rather than 'idt', and * thus use the _nonmapped_ version of the IDT, as the diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index bc7116acf8f..b4a9b3db199 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -215,7 +215,7 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; @@ -336,7 +336,7 @@ static void print_trace_address(void *data, unsigned long addr) printk_address(addr); } -static struct stacktrace_ops print_trace_ops = { +static const struct stacktrace_ops print_trace_ops = { .warning = print_trace_warning, .warning_symbol = print_trace_warning_symbol, .stack = print_trace_stack, diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index b85ad754f70..e87a3939ed4 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -349,10 +349,10 @@ __cpuinit int unsynchronized_tsc(void) static void __init check_geode_tsc_reliable(void) { - unsigned long val; + unsigned long res_low, res_high; - rdmsrl(MSR_GEODE_BUSCONT_CONF0, val); - if ((val & RTSC_SUSP)) + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + if (res_low & RTSC_SUSP) clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; } #else diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 18673e0f193..f02bad68aba 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -134,21 +134,21 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned long eip, unsigned len) { switch (type) { - case PARAVIRT_PATCH(irq_disable): + case PARAVIRT_PATCH(pv_irq_ops.irq_disable): return patch_internal(VMI_CALL_DisableInterrupts, len, insns, eip); - case PARAVIRT_PATCH(irq_enable): + case PARAVIRT_PATCH(pv_irq_ops.irq_enable): return patch_internal(VMI_CALL_EnableInterrupts, len, insns, eip); - case PARAVIRT_PATCH(restore_fl): + case PARAVIRT_PATCH(pv_irq_ops.restore_fl): return patch_internal(VMI_CALL_SetInterruptMask, len, insns, eip); - case PARAVIRT_PATCH(save_fl): + case PARAVIRT_PATCH(pv_irq_ops.save_fl): return patch_internal(VMI_CALL_GetInterruptMask, len, insns, eip); - case PARAVIRT_PATCH(iret): + case PARAVIRT_PATCH(pv_cpu_ops.iret): return patch_internal(VMI_CALL_IRET, len, insns, eip); - case PARAVIRT_PATCH(irq_enable_sysexit): + case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit): return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip); default: break; @@ -552,24 +552,22 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, } #endif -static void vmi_set_lazy_mode(enum paravirt_lazy_mode mode) +static void vmi_enter_lazy_cpu(void) { - static DEFINE_PER_CPU(enum paravirt_lazy_mode, lazy_mode); - - if (!vmi_ops.set_lazy_mode) - return; + paravirt_enter_lazy_cpu(); + vmi_ops.set_lazy_mode(2); +} - /* Modes should never nest or overlap */ - BUG_ON(__get_cpu_var(lazy_mode) && !(mode == PARAVIRT_LAZY_NONE || - mode == PARAVIRT_LAZY_FLUSH)); +static void vmi_enter_lazy_mmu(void) +{ + paravirt_enter_lazy_mmu(); + vmi_ops.set_lazy_mode(1); +} - if (mode == PARAVIRT_LAZY_FLUSH) { - vmi_ops.set_lazy_mode(0); - vmi_ops.set_lazy_mode(__get_cpu_var(lazy_mode)); - } else { - vmi_ops.set_lazy_mode(mode); - __get_cpu_var(lazy_mode) = mode; - } +static void vmi_leave_lazy(void) +{ + paravirt_leave_lazy(paravirt_get_lazy_mode()); + vmi_ops.set_lazy_mode(0); } static inline int __init check_vmi_rom(struct vrom_header *rom) @@ -690,9 +688,9 @@ do { \ reloc = call_vrom_long_func(vmi_rom, get_reloc, \ VMI_CALL_##vmicall); \ if (rel->type == VMI_RELOCATION_CALL_REL) \ - paravirt_ops.opname = (void *)rel->eip; \ + opname = (void *)rel->eip; \ else if (rel->type == VMI_RELOCATION_NOP) \ - paravirt_ops.opname = (void *)vmi_nop; \ + opname = (void *)vmi_nop; \ else if (rel->type != VMI_RELOCATION_NONE) \ printk(KERN_WARNING "VMI: Unknown relocation " \ "type %d for " #vmicall"\n",\ @@ -712,7 +710,7 @@ do { \ VMI_CALL_##vmicall); \ BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \ if (rel->type == VMI_RELOCATION_CALL_REL) { \ - paravirt_ops.opname = wrapper; \ + opname = wrapper; \ vmi_ops.cache = (void *)rel->eip; \ } \ } while (0) @@ -732,11 +730,11 @@ static inline int __init activate_vmi(void) } savesegment(cs, kernel_cs); - paravirt_ops.paravirt_enabled = 1; - paravirt_ops.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; + pv_info.paravirt_enabled = 1; + pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; + pv_info.name = "vmi"; - paravirt_ops.patch = vmi_patch; - paravirt_ops.name = "vmi"; + pv_init_ops.patch = vmi_patch; /* * Many of these operations are ABI compatible with VMI. @@ -754,26 +752,26 @@ static inline int __init activate_vmi(void) */ /* CPUID is special, so very special it gets wrapped like a present */ - para_wrap(cpuid, vmi_cpuid, cpuid, CPUID); - - para_fill(clts, CLTS); - para_fill(get_debugreg, GetDR); - para_fill(set_debugreg, SetDR); - para_fill(read_cr0, GetCR0); - para_fill(read_cr2, GetCR2); - para_fill(read_cr3, GetCR3); - para_fill(read_cr4, GetCR4); - para_fill(write_cr0, SetCR0); - para_fill(write_cr2, SetCR2); - para_fill(write_cr3, SetCR3); - para_fill(write_cr4, SetCR4); - para_fill(save_fl, GetInterruptMask); - para_fill(restore_fl, SetInterruptMask); - para_fill(irq_disable, DisableInterrupts); - para_fill(irq_enable, EnableInterrupts); - - para_fill(wbinvd, WBINVD); - para_fill(read_tsc, RDTSC); + para_wrap(pv_cpu_ops.cpuid, vmi_cpuid, cpuid, CPUID); + + para_fill(pv_cpu_ops.clts, CLTS); + para_fill(pv_cpu_ops.get_debugreg, GetDR); + para_fill(pv_cpu_ops.set_debugreg, SetDR); + para_fill(pv_cpu_ops.read_cr0, GetCR0); + para_fill(pv_mmu_ops.read_cr2, GetCR2); + para_fill(pv_mmu_ops.read_cr3, GetCR3); + para_fill(pv_cpu_ops.read_cr4, GetCR4); + para_fill(pv_cpu_ops.write_cr0, SetCR0); + para_fill(pv_mmu_ops.write_cr2, SetCR2); + para_fill(pv_mmu_ops.write_cr3, SetCR3); + para_fill(pv_cpu_ops.write_cr4, SetCR4); + para_fill(pv_irq_ops.save_fl, GetInterruptMask); + para_fill(pv_irq_ops.restore_fl, SetInterruptMask); + para_fill(pv_irq_ops.irq_disable, DisableInterrupts); + para_fill(pv_irq_ops.irq_enable, EnableInterrupts); + + para_fill(pv_cpu_ops.wbinvd, WBINVD); + para_fill(pv_cpu_ops.read_tsc, RDTSC); /* The following we emulate with trap and emulate for now */ /* paravirt_ops.read_msr = vmi_rdmsr */ @@ -781,29 +779,38 @@ static inline int __init activate_vmi(void) /* paravirt_ops.rdpmc = vmi_rdpmc */ /* TR interface doesn't pass TR value, wrap */ - para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR); + para_wrap(pv_cpu_ops.load_tr_desc, vmi_set_tr, set_tr, SetTR); /* LDT is special, too */ - para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT); - - para_fill(load_gdt, SetGDT); - para_fill(load_idt, SetIDT); - para_fill(store_gdt, GetGDT); - para_fill(store_idt, GetIDT); - para_fill(store_tr, GetTR); - paravirt_ops.load_tls = vmi_load_tls; - para_fill(write_ldt_entry, WriteLDTEntry); - para_fill(write_gdt_entry, WriteGDTEntry); - para_fill(write_idt_entry, WriteIDTEntry); - para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); - para_fill(set_iopl_mask, SetIOPLMask); - para_fill(io_delay, IODelay); - para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode); + para_wrap(pv_cpu_ops.set_ldt, vmi_set_ldt, _set_ldt, SetLDT); + + para_fill(pv_cpu_ops.load_gdt, SetGDT); + para_fill(pv_cpu_ops.load_idt, SetIDT); + para_fill(pv_cpu_ops.store_gdt, GetGDT); + para_fill(pv_cpu_ops.store_idt, GetIDT); + para_fill(pv_cpu_ops.store_tr, GetTR); + pv_cpu_ops.load_tls = vmi_load_tls; + para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry); + para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry); + para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry); + para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); + para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); + para_fill(pv_cpu_ops.io_delay, IODelay); + + para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, + set_lazy_mode, SetLazyMode); + para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy, + set_lazy_mode, SetLazyMode); + + para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, + set_lazy_mode, SetLazyMode); + para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy, + set_lazy_mode, SetLazyMode); /* user and kernel flush are just handled with different flags to FlushTLB */ - para_wrap(flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB); - para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB); - para_fill(flush_tlb_single, InvalPage); + para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB); + para_wrap(pv_mmu_ops.flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB); + para_fill(pv_mmu_ops.flush_tlb_single, InvalPage); /* * Until a standard flag format can be agreed on, we need to @@ -819,41 +826,41 @@ static inline int __init activate_vmi(void) #endif if (vmi_ops.set_pte) { - paravirt_ops.set_pte = vmi_set_pte; - paravirt_ops.set_pte_at = vmi_set_pte_at; - paravirt_ops.set_pmd = vmi_set_pmd; + pv_mmu_ops.set_pte = vmi_set_pte; + pv_mmu_ops.set_pte_at = vmi_set_pte_at; + pv_mmu_ops.set_pmd = vmi_set_pmd; #ifdef CONFIG_X86_PAE - paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; - paravirt_ops.set_pte_present = vmi_set_pte_present; - paravirt_ops.set_pud = vmi_set_pud; - paravirt_ops.pte_clear = vmi_pte_clear; - paravirt_ops.pmd_clear = vmi_pmd_clear; + pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic; + pv_mmu_ops.set_pte_present = vmi_set_pte_present; + pv_mmu_ops.set_pud = vmi_set_pud; + pv_mmu_ops.pte_clear = vmi_pte_clear; + pv_mmu_ops.pmd_clear = vmi_pmd_clear; #endif } if (vmi_ops.update_pte) { - paravirt_ops.pte_update = vmi_update_pte; - paravirt_ops.pte_update_defer = vmi_update_pte_defer; + pv_mmu_ops.pte_update = vmi_update_pte; + pv_mmu_ops.pte_update_defer = vmi_update_pte_defer; } vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); if (vmi_ops.allocate_page) { - paravirt_ops.alloc_pt = vmi_allocate_pt; - paravirt_ops.alloc_pd = vmi_allocate_pd; - paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; + pv_mmu_ops.alloc_pt = vmi_allocate_pt; + pv_mmu_ops.alloc_pd = vmi_allocate_pd; + pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone; } vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); if (vmi_ops.release_page) { - paravirt_ops.release_pt = vmi_release_pt; - paravirt_ops.release_pd = vmi_release_pd; + pv_mmu_ops.release_pt = vmi_release_pt; + pv_mmu_ops.release_pd = vmi_release_pd; } /* Set linear is needed in all cases */ vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); #ifdef CONFIG_HIGHPTE if (vmi_ops.set_linear_mapping) - paravirt_ops.kmap_atomic_pte = vmi_kmap_atomic_pte; + pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte; #endif /* @@ -863,17 +870,17 @@ static inline int __init activate_vmi(void) * the backend. They are performance critical anyway, so requiring * a patch is not a big problem. */ - paravirt_ops.irq_enable_sysexit = (void *)0xfeedbab0; - paravirt_ops.iret = (void *)0xbadbab0; + pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0; + pv_cpu_ops.iret = (void *)0xbadbab0; #ifdef CONFIG_SMP - para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); + para_wrap(pv_apic_ops.startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(apic_read, APICRead); - para_fill(apic_write, APICWrite); - para_fill(apic_write_atomic, APICWrite); + para_fill(pv_apic_ops.apic_read, APICRead); + para_fill(pv_apic_ops.apic_write, APICWrite); + para_fill(pv_apic_ops.apic_write_atomic, APICWrite); #endif /* @@ -891,15 +898,15 @@ static inline int __init activate_vmi(void) vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); vmi_timer_ops.cancel_alarm = vmi_get_function(VMI_CALL_CancelAlarm); - paravirt_ops.time_init = vmi_time_init; - paravirt_ops.get_wallclock = vmi_get_wallclock; - paravirt_ops.set_wallclock = vmi_set_wallclock; + pv_time_ops.time_init = vmi_time_init; + pv_time_ops.get_wallclock = vmi_get_wallclock; + pv_time_ops.set_wallclock = vmi_set_wallclock; #ifdef CONFIG_X86_LOCAL_APIC - paravirt_ops.setup_boot_clock = vmi_time_bsp_init; - paravirt_ops.setup_secondary_clock = vmi_time_ap_init; + pv_apic_ops.setup_boot_clock = vmi_time_bsp_init; + pv_apic_ops.setup_secondary_clock = vmi_time_ap_init; #endif - paravirt_ops.sched_clock = vmi_sched_clock; - paravirt_ops.get_cpu_khz = vmi_cpu_khz; + pv_time_ops.sched_clock = vmi_sched_clock; + pv_time_ops.get_cpu_khz = vmi_cpu_khz; /* We have true wallclock functions; disable CMOS clock sync */ no_sync_cmos_clock = 1; @@ -908,7 +915,7 @@ static inline int __init activate_vmi(void) disable_vmi_timer = 1; } - para_fill(safe_halt, Halt); + para_fill(pv_irq_ops.safe_halt, Halt); /* * Alternative instruction rewriting doesn't happen soon enough diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 93847d84815..8a67e282cb5 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -78,7 +78,6 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.sys_tz = sys_tz; - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } @@ -289,7 +288,7 @@ static void __cpuinit vsyscall_set_cpu(int cpu) unsigned long *d; unsigned long node = 0; #ifdef CONFIG_NUMA - node = cpu_to_node[cpu]; + node = cpu_to_node(cpu); #endif if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) write_rdtscp_aux((node << 12) | cpu); |