From 2c0b8a7578f7653e1e5312a5232e8ead563cf477 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 30 Jan 2008 13:30:47 +0100 Subject: x86: fall back on interrupt disable in cmpxchg8b on 80386 and 80486 Actually, on 386, cmpxchg and cmpxchg_local fall back on cmpxchg_386_u8/16/32: it disables interruptions around non atomic updates to mimic the cmpxchg behavior. The comment: /* Poor man's cmpxchg for 386. Unsuitable for SMP */ already present in cmpxchg_386_u32 tells much about how this cmpxchg implementation should not be used in a SMP context. However, the cmpxchg_local can perfectly use this fallback, since it only needs to be atomic wrt the local cpu. This patch adds a cmpxchg_486_u64 and uses it as a fallback for cmpxchg64 and cmpxchg64_local on 80386 and 80486. Q: but why is it called cmpxchg_486 when the other functions are called A: Because the standard cmpxchg is missing only on 386, but cmpxchg8b is missing both on 386 and 486. Citing Intel's Instruction set reference: cmpxchg: This instruction is not supported on Intel processors earlier than the Intel486 processors. cmpxchg8b: This instruction encoding is not supported on Intel processors earlier than the Pentium processors. Q: What's the reason to have cmpxchg64_local on 32 bit architectures? Without that need all this would just be a few simple defines. A: cmpxchg64_local on 32 bits architectures takes unsigned long long parameters, but cmpxchg_local only takes longs. Since we have cmpxchg8b to execute a 8 byte cmpxchg atomically on pentium and +, it makes sense to provide a flavor of cmpxchg and cmpxchg_local using this instruction. Also, for 32 bits architectures lacking the 64 bits atomic cmpxchg, it makes sense _not_ to define cmpxchg64 while cmpxchg could still be available. Moreover, the fallback for cmpxchg8b on i386 for 386 and 486 is a However, cmpxchg64_local will be emulated by disabling interrupts on all architectures where it is not supported atomically. Therefore, we *could* turn cmpxchg64_local into a cmpxchg_local, but it would make the 386/486 fallbacks ugly, make its design different from cmpxchg/cmpxchg64 (which really depends on atomic operations and cannot be emulated) and require the __cmpxchg_local to be expressed as a macro rather than an inline function so the parameters would not be fixed to unsigned long long in every case. So I think cmpxchg64_local makes sense there, but I am open to suggestions. Q: Are there any callers? A: I am actually using it in LTTng in my timestamping code. I use it to work around CPUs with asynchronous TSCs. I need to update 64 bits values atomically on this 32 bits architecture. Changelog: - Ran though checkpatch. Signed-off-by: Mathieu Desnoyers Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index cc8c501b9f3..867ff94579b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -342,5 +342,22 @@ unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) EXPORT_SYMBOL(cmpxchg_386_u32); #endif +#ifndef CONFIG_X86_CMPXCHG64 +unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) +{ + u64 prev; + unsigned long flags; + + /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u64 *)ptr; + if (prev == old) + *(u64 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_486_u64); +#endif + // arch_initcall(intel_cpu_init); -- cgit v1.2.3-70-g09d2 From eee3af4a2c83a97fff107ddc445d9df6fded9ce4 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Wed, 30 Jan 2008 13:31:09 +0100 Subject: x86, ptrace: support for branch trace store(BTS) Resend using different mail client Changes to the last version: - split implementation into two layers: ds/bts and ptrace - renamed TIF's - save/restore ds save area msr in __switch_to_xtra() - make block-stepping only look at BTF bit Signed-off-by: Markus Metzger Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile_32 | 1 + arch/x86/kernel/Makefile_64 | 1 + arch/x86/kernel/cpu/intel.c | 5 + arch/x86/kernel/ds.c | 429 +++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 19 +- arch/x86/kernel/process_64.c | 26 ++- arch/x86/kernel/ptrace.c | 212 +++++++++++++++++++ arch/x86/kernel/setup_64.c | 5 + arch/x86/kernel/step.c | 18 +- include/asm-x86/ds.h | 65 ++++++ include/asm-x86/processor_32.h | 3 + include/asm-x86/processor_64.h | 3 + include/asm-x86/ptrace-abi.h | 52 +++++ include/asm-x86/ptrace.h | 11 + include/asm-x86/thread_info_32.h | 12 +- include/asm-x86/thread_info_64.h | 9 +- 16 files changed, 859 insertions(+), 12 deletions(-) create mode 100644 arch/x86/kernel/ds.c create mode 100644 include/asm-x86/ds.h (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 index b2d7aea4c82..cc2651bcc07 100644 --- a/arch/x86/kernel/Makefile_32 +++ b/arch/x86/kernel/Makefile_32 @@ -11,6 +11,7 @@ obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o rtc.o obj-y += ptrace.o +obj-y += ds.o obj-y += tls.o obj-y += step.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index 19af64e1a3f..2ec96acf648 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 @@ -13,6 +13,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ i8253.o io_delay.o rtc.o obj-y += ptrace.o +obj-y += ds.o obj-y += step.o obj-$(CONFIG_IA32_EMULATION) += tls.o diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 867ff94579b..e4b7e73e902 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include "cpu.h" @@ -219,6 +221,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (!(l1 & (1<<12))) set_bit(X86_FEATURE_PEBS, c->x86_capability); } + + if (cpu_has_bts) + ds_init_intel(c); } static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c new file mode 100644 index 00000000000..996a7c4f596 --- /dev/null +++ b/arch/x86/kernel/ds.c @@ -0,0 +1,429 @@ +/* + * Debug Store support + * + * This provides a low-level interface to the hardware's Debug Store + * feature that is used for last branch recording (LBR) and + * precise-event based sampling (PEBS). + * + * Different architectures use a different DS layout/pointer size. + * The below functions therefore work on a void*. + * + * + * Since there is no user for PEBS, yet, only LBR (or branch + * trace store, BTS) is supported. + * + * + * Copyright (C) 2007 Intel Corporation. + * Markus Metzger , Dec 2007 + */ + +#include + +#include +#include +#include + + +/* + * Debug Store (DS) save area configuration (see Intel64 and IA32 + * Architectures Software Developer's Manual, section 18.5) + * + * The DS configuration consists of the following fields; different + * architetures vary in the size of those fields. + * - double-word aligned base linear address of the BTS buffer + * - write pointer into the BTS buffer + * - end linear address of the BTS buffer (one byte beyond the end of + * the buffer) + * - interrupt pointer into BTS buffer + * (interrupt occurs when write pointer passes interrupt pointer) + * - double-word aligned base linear address of the PEBS buffer + * - write pointer into the PEBS buffer + * - end linear address of the PEBS buffer (one byte beyond the end of + * the buffer) + * - interrupt pointer into PEBS buffer + * (interrupt occurs when write pointer passes interrupt pointer) + * - value to which counter is reset following counter overflow + * + * On later architectures, the last branch recording hardware uses + * 64bit pointers even in 32bit mode. + * + * + * Branch Trace Store (BTS) records store information about control + * flow changes. They at least provide the following information: + * - source linear address + * - destination linear address + * + * Netburst supported a predicated bit that had been dropped in later + * architectures. We do not suppor it. + * + * + * In order to abstract from the actual DS and BTS layout, we describe + * the access to the relevant fields. + * Thanks to Andi Kleen for proposing this design. + * + * The implementation, however, is not as general as it might seem. In + * order to stay somewhat simple and efficient, we assume an + * underlying unsigned type (mostly a pointer type) and we expect the + * field to be at least as big as that type. + */ + +/* + * A special from_ip address to indicate that the BTS record is an + * info record that needs to be interpreted or skipped. + */ +#define BTS_ESCAPE_ADDRESS (-1) + +/* + * A field access descriptor + */ +struct access_desc { + unsigned char offset; + unsigned char size; +}; + +/* + * The configuration for a particular DS/BTS hardware implementation. + */ +struct ds_configuration { + /* the DS configuration */ + unsigned char sizeof_ds; + struct access_desc bts_buffer_base; + struct access_desc bts_index; + struct access_desc bts_absolute_maximum; + struct access_desc bts_interrupt_threshold; + /* the BTS configuration */ + unsigned char sizeof_bts; + struct access_desc from_ip; + struct access_desc to_ip; + /* BTS variants used to store additional information like + timestamps */ + struct access_desc info_type; + struct access_desc info_data; + unsigned long debugctl_mask; +}; + +/* + * The global configuration used by the below accessor functions + */ +static struct ds_configuration ds_cfg; + +/* + * Accessor functions for some DS and BTS fields using the above + * global ptrace_bts_cfg. + */ +static inline void *get_bts_buffer_base(char *base) +{ + return *(void **)(base + ds_cfg.bts_buffer_base.offset); +} +static inline void set_bts_buffer_base(char *base, void *value) +{ + (*(void **)(base + ds_cfg.bts_buffer_base.offset)) = value; +} +static inline void *get_bts_index(char *base) +{ + return *(void **)(base + ds_cfg.bts_index.offset); +} +static inline void set_bts_index(char *base, void *value) +{ + (*(void **)(base + ds_cfg.bts_index.offset)) = value; +} +static inline void *get_bts_absolute_maximum(char *base) +{ + return *(void **)(base + ds_cfg.bts_absolute_maximum.offset); +} +static inline void set_bts_absolute_maximum(char *base, void *value) +{ + (*(void **)(base + ds_cfg.bts_absolute_maximum.offset)) = value; +} +static inline void *get_bts_interrupt_threshold(char *base) +{ + return *(void **)(base + ds_cfg.bts_interrupt_threshold.offset); +} +static inline void set_bts_interrupt_threshold(char *base, void *value) +{ + (*(void **)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; +} +static inline long get_from_ip(char *base) +{ + return *(long *)(base + ds_cfg.from_ip.offset); +} +static inline void set_from_ip(char *base, long value) +{ + (*(long *)(base + ds_cfg.from_ip.offset)) = value; +} +static inline long get_to_ip(char *base) +{ + return *(long *)(base + ds_cfg.to_ip.offset); +} +static inline void set_to_ip(char *base, long value) +{ + (*(long *)(base + ds_cfg.to_ip.offset)) = value; +} +static inline unsigned char get_info_type(char *base) +{ + return *(unsigned char *)(base + ds_cfg.info_type.offset); +} +static inline void set_info_type(char *base, unsigned char value) +{ + (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; +} +/* + * The info data might overlap with the info type on some architectures. + * We therefore read and write the exact number of bytes. + */ +static inline unsigned long long get_info_data(char *base) +{ + unsigned long long value = 0; + memcpy(&value, + base + ds_cfg.info_data.offset, + ds_cfg.info_data.size); + return value; +} +static inline void set_info_data(char *base, unsigned long long value) +{ + memcpy(base + ds_cfg.info_data.offset, + &value, + ds_cfg.info_data.size); +} + + +int ds_allocate(void **dsp, size_t bts_size_in_records) +{ + size_t bts_size_in_bytes = 0; + void *bts = 0; + void *ds = 0; + + if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) + return -EOPNOTSUPP; + + if (bts_size_in_records < 0) + return -EINVAL; + + bts_size_in_bytes = + bts_size_in_records * ds_cfg.sizeof_bts; + + if (bts_size_in_bytes <= 0) + return -EINVAL; + + bts = kzalloc(bts_size_in_bytes, GFP_KERNEL); + + if (!bts) + return -ENOMEM; + + ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + + if (!ds) { + kfree(bts); + return -ENOMEM; + } + + set_bts_buffer_base(ds, bts); + set_bts_index(ds, bts); + set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); + set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); + + *dsp = ds; + return 0; +} + +int ds_free(void **dsp) +{ + if (*dsp) + kfree(get_bts_buffer_base(*dsp)); + kfree(*dsp); + *dsp = 0; + + return 0; +} + +int ds_get_bts_size(void *ds) +{ + size_t size_in_bytes; + + if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) + return -EOPNOTSUPP; + + size_in_bytes = + get_bts_absolute_maximum(ds) - + get_bts_buffer_base(ds); + + return size_in_bytes / ds_cfg.sizeof_bts; +} + +int ds_get_bts_index(void *ds) +{ + size_t index_offset_in_bytes; + + if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) + return -EOPNOTSUPP; + + index_offset_in_bytes = + get_bts_index(ds) - + get_bts_buffer_base(ds); + + return index_offset_in_bytes / ds_cfg.sizeof_bts; +} + +int ds_read_bts(void *ds, size_t index, struct bts_struct *out) +{ + void *bts; + + if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) + return -EOPNOTSUPP; + + if (index < 0) + return -EINVAL; + + if (index >= ds_get_bts_size(ds)) + return -EINVAL; + + bts = get_bts_buffer_base(ds); + bts = (char *)bts + (index * ds_cfg.sizeof_bts); + + memset(out, 0, sizeof(*out)); + if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { + out->qualifier = get_info_type(bts); + out->variant.timestamp = get_info_data(bts); + } else { + out->qualifier = BTS_BRANCH; + out->variant.lbr.from_ip = get_from_ip(bts); + out->variant.lbr.to_ip = get_to_ip(bts); + } + + return 0; +} + +int ds_write_bts(void *ds, const struct bts_struct *in) +{ + void *bts; + + if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) + return -EOPNOTSUPP; + + if (ds_get_bts_size(ds) <= 0) + return -ENXIO; + + bts = get_bts_index(ds); + + memset(bts, 0, ds_cfg.sizeof_bts); + switch (in->qualifier) { + case BTS_INVALID: + break; + + case BTS_BRANCH: + set_from_ip(bts, in->variant.lbr.from_ip); + set_to_ip(bts, in->variant.lbr.to_ip); + break; + + case BTS_TASK_ARRIVES: + case BTS_TASK_DEPARTS: + set_from_ip(bts, BTS_ESCAPE_ADDRESS); + set_info_type(bts, in->qualifier); + set_info_data(bts, in->variant.timestamp); + break; + + default: + return -EINVAL; + } + + bts = (char *)bts + ds_cfg.sizeof_bts; + if (bts >= get_bts_absolute_maximum(ds)) + bts = get_bts_buffer_base(ds); + set_bts_index(ds, bts); + + return 0; +} + +unsigned long ds_debugctl_mask(void) +{ + return ds_cfg.debugctl_mask; +} + +#ifdef __i386__ +static const struct ds_configuration ds_cfg_netburst = { + .sizeof_ds = 9 * 4, + .bts_buffer_base = { 0, 4 }, + .bts_index = { 4, 4 }, + .bts_absolute_maximum = { 8, 4 }, + .bts_interrupt_threshold = { 12, 4 }, + .sizeof_bts = 3 * 4, + .from_ip = { 0, 4 }, + .to_ip = { 4, 4 }, + .info_type = { 4, 1 }, + .info_data = { 5, 7 }, + .debugctl_mask = (1<<2)|(1<<3) +}; + +static const struct ds_configuration ds_cfg_pentium_m = { + .sizeof_ds = 9 * 4, + .bts_buffer_base = { 0, 4 }, + .bts_index = { 4, 4 }, + .bts_absolute_maximum = { 8, 4 }, + .bts_interrupt_threshold = { 12, 4 }, + .sizeof_bts = 3 * 4, + .from_ip = { 0, 4 }, + .to_ip = { 4, 4 }, + .info_type = { 4, 1 }, + .info_data = { 5, 7 }, + .debugctl_mask = (1<<6)|(1<<7) +}; +#endif /* _i386_ */ + +static const struct ds_configuration ds_cfg_core2 = { + .sizeof_ds = 9 * 8, + .bts_buffer_base = { 0, 8 }, + .bts_index = { 8, 8 }, + .bts_absolute_maximum = { 16, 8 }, + .bts_interrupt_threshold = { 24, 8 }, + .sizeof_bts = 3 * 8, + .from_ip = { 0, 8 }, + .to_ip = { 8, 8 }, + .info_type = { 8, 1 }, + .info_data = { 9, 7 }, + .debugctl_mask = (1<<6)|(1<<7)|(1<<9) +}; + +static inline void +ds_configure(const struct ds_configuration *cfg) +{ + ds_cfg = *cfg; +} + +void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 0x6: + switch (c->x86_model) { +#ifdef __i386__ + case 0xD: + case 0xE: /* Pentium M */ + ds_configure(&ds_cfg_pentium_m); + break; +#endif /* _i386_ */ + case 0xF: /* Core2 */ + ds_configure(&ds_cfg_core2); + break; + default: + /* sorry, don't know about them */ + break; + } + break; + case 0xF: + switch (c->x86_model) { +#ifdef __i386__ + case 0x0: + case 0x1: + case 0x2: /* Netburst */ + ds_configure(&ds_cfg_netburst); + break; +#endif /* _i386_ */ + default: + /* sorry, don't know about them */ + break; + } + break; + default: + /* sorry, don't know about them */ + break; + } +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5350763a2d0..2b9db937106 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -614,11 +614,21 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) { struct thread_struct *prev, *next; + unsigned long debugctl; prev = &prev_p->thread; next = &next_p->thread; - if (next->debugctlmsr != prev->debugctlmsr) + debugctl = prev->debugctlmsr; + if (next->ds_area_msr != prev->ds_area_msr) { + /* we clear debugctl to make sure DS + * is not in use when we change it */ + debugctl = 0; + wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); + wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); + } + + if (next->debugctlmsr != debugctl) wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0); if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { @@ -642,6 +652,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, } #endif + if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) + ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); + + if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) + ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); + + if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* * Disable the bitmap via an invalid offset. We still cache diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 057b5442ffd..843bf0c978a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -568,11 +568,21 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, struct tss_struct *tss) { struct thread_struct *prev, *next; + unsigned long debugctl; prev = &prev_p->thread, next = &next_p->thread; - if (next->debugctlmsr != prev->debugctlmsr) + debugctl = prev->debugctlmsr; + if (next->ds_area_msr != prev->ds_area_msr) { + /* we clear debugctl to make sure DS + * is not in use when we change it */ + debugctl = 0; + wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); + wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); + } + + if (next->debugctlmsr != debugctl) wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr); if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { @@ -598,6 +608,16 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, */ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } + + /* + * Last branch recording recofiguration of trace hardware and + * disentangling of trace data per task. + */ + if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) + ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); + + if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) + ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); } /* @@ -701,8 +721,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Now maybe reload the debug registers and handle I/O bitmaps */ - if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) - || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) + if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || + task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) __switch_to_xtra(prev_p, next_p, tss); /* If the task has used fpu the last 5 timeslices, just do a full diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 3399c1be79b..8d0dd8b5eff 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -2,6 +2,9 @@ /* * Pentium III FXSR, SSE support * Gareth Hughes , May 2000 + * + * BTS tracing + * Markus Metzger , Dec 2007 */ #include @@ -26,6 +29,14 @@ #include #include #include +#include + + +/* + * The maximal size of a BTS buffer per traced task in number of BTS + * records. + */ +#define PTRACE_BTS_BUFFER_MAX 4000 /* * does not yet catch signals sent when the child dies. @@ -455,6 +466,165 @@ static int ptrace_set_debugreg(struct task_struct *child, return 0; } +static int ptrace_bts_max_buffer_size(void) +{ + return PTRACE_BTS_BUFFER_MAX; +} + +static int ptrace_bts_get_buffer_size(struct task_struct *child) +{ + if (!child->thread.ds_area_msr) + return -ENXIO; + + return ds_get_bts_size((void *)child->thread.ds_area_msr); +} + +static int ptrace_bts_get_index(struct task_struct *child) +{ + if (!child->thread.ds_area_msr) + return -ENXIO; + + return ds_get_bts_index((void *)child->thread.ds_area_msr); +} + +static int ptrace_bts_read_record(struct task_struct *child, + long index, + struct bts_struct __user *out) +{ + struct bts_struct ret; + int retval; + + if (!child->thread.ds_area_msr) + return -ENXIO; + + retval = ds_read_bts((void *)child->thread.ds_area_msr, + index, &ret); + if (retval) + return retval; + + if (copy_to_user(out, &ret, sizeof(ret))) + return -EFAULT; + + return sizeof(ret); +} + +static int ptrace_bts_write_record(struct task_struct *child, + const struct bts_struct *in) +{ + int retval; + + if (!child->thread.ds_area_msr) + return -ENXIO; + + retval = ds_write_bts((void *)child->thread.ds_area_msr, in); + if (retval) + return retval; + + return sizeof(*in); +} + +static int ptrace_bts_config(struct task_struct *child, + unsigned long options) +{ + unsigned long debugctl_mask = ds_debugctl_mask(); + int retval; + + retval = ptrace_bts_get_buffer_size(child); + if (retval < 0) + return retval; + if (retval == 0) + return -ENXIO; + + if (options & PTRACE_BTS_O_TRACE_TASK) { + child->thread.debugctlmsr |= debugctl_mask; + set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + } else { + /* there is no way for us to check whether we 'own' + * the respective bits in the DEBUGCTL MSR, we're + * about to clear */ + child->thread.debugctlmsr &= ~debugctl_mask; + + if (!child->thread.debugctlmsr) + clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + } + + if (options & PTRACE_BTS_O_TIMESTAMPS) + set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); + else + clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); + + return 0; +} + +static int ptrace_bts_status(struct task_struct *child) +{ + unsigned long debugctl_mask = ds_debugctl_mask(); + int retval, status = 0; + + retval = ptrace_bts_get_buffer_size(child); + if (retval < 0) + return retval; + if (retval == 0) + return -ENXIO; + + if (ptrace_bts_get_buffer_size(child) <= 0) + return -ENXIO; + + if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && + child->thread.debugctlmsr & debugctl_mask) + status |= PTRACE_BTS_O_TRACE_TASK; + if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) + status |= PTRACE_BTS_O_TIMESTAMPS; + + return status; +} + +static int ptrace_bts_allocate_bts(struct task_struct *child, + int size_in_records) +{ + int retval = 0; + void *ds; + + if (size_in_records < 0) + return -EINVAL; + + if (size_in_records > ptrace_bts_max_buffer_size()) + return -EINVAL; + + if (size_in_records == 0) { + ptrace_bts_config(child, /* options = */ 0); + } else { + retval = ds_allocate(&ds, size_in_records); + if (retval) + return retval; + } + + if (child->thread.ds_area_msr) + ds_free((void **)&child->thread.ds_area_msr); + + child->thread.ds_area_msr = (unsigned long)ds; + if (child->thread.ds_area_msr) + set_tsk_thread_flag(child, TIF_DS_AREA_MSR); + else + clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); + + return retval; +} + +void ptrace_bts_take_timestamp(struct task_struct *tsk, + enum bts_qualifier qualifier) +{ + struct bts_struct rec = { + .qualifier = qualifier, + .variant.timestamp = sched_clock() + }; + + if (ptrace_bts_get_buffer_size(tsk) <= 0) + return; + + ptrace_bts_write_record(tsk, &rec); +} + /* * Called by kernel/ptrace.c when detaching.. * @@ -466,6 +636,11 @@ void ptrace_disable(struct task_struct *child) #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif + ptrace_bts_config(child, /* options = */ 0); + if (child->thread.ds_area_msr) { + ds_free((void **)&child->thread.ds_area_msr); + clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); + } } long arch_ptrace(struct task_struct *child, long request, long addr, long data) @@ -626,6 +801,36 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; #endif + case PTRACE_BTS_MAX_BUFFER_SIZE: + ret = ptrace_bts_max_buffer_size(); + break; + + case PTRACE_BTS_ALLOCATE_BUFFER: + ret = ptrace_bts_allocate_bts(child, data); + break; + + case PTRACE_BTS_GET_BUFFER_SIZE: + ret = ptrace_bts_get_buffer_size(child); + break; + + case PTRACE_BTS_GET_INDEX: + ret = ptrace_bts_get_index(child); + break; + + case PTRACE_BTS_READ_RECORD: + ret = ptrace_bts_read_record + (child, data, + (struct bts_struct __user *) addr); + break; + + case PTRACE_BTS_CONFIG: + ret = ptrace_bts_config(child, data); + break; + + case PTRACE_BTS_STATUS: + ret = ptrace_bts_status(child); + break; + default: ret = ptrace_request(child, request, addr, data); break; @@ -809,6 +1014,13 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) case PTRACE_SETOPTIONS: case PTRACE_SET_THREAD_AREA: case PTRACE_GET_THREAD_AREA: + case PTRACE_BTS_MAX_BUFFER_SIZE: + case PTRACE_BTS_ALLOCATE_BUFFER: + case PTRACE_BTS_GET_BUFFER_SIZE: + case PTRACE_BTS_GET_INDEX: + case PTRACE_BTS_READ_RECORD: + case PTRACE_BTS_CONFIG: + case PTRACE_BTS_STATUS: return sys_ptrace(request, pid, addr, data); default: diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index ce4d6b52ce3..f2b131ef844 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -60,6 +60,7 @@ #include #include #include +#include /* * Machine setup.. @@ -823,6 +824,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_PEBS); } + + if (cpu_has_bts) + ds_init_intel(c); + n = c->extended_cpuid_level; if (n >= 0x80000008) { unsigned eax = cpuid_eax(0x80000008); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index f55c003f5b6..21ea22fda5f 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -169,9 +169,14 @@ static void enable_step(struct task_struct *child, bool block) */ if (enable_single_step(child) && block) { set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - write_debugctlmsr(child, DEBUGCTLMSR_BTF); - } else if (test_and_clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR)) { - write_debugctlmsr(child, 0); + write_debugctlmsr(child, + child->thread.debugctlmsr | DEBUGCTLMSR_BTF); + } else { + write_debugctlmsr(child, + child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); + + if (!child->thread.debugctlmsr) + clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); } } @@ -190,8 +195,11 @@ void user_disable_single_step(struct task_struct *child) /* * Make sure block stepping (BTF) is disabled. */ - if (test_and_clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR)) - write_debugctlmsr(child, 0); + write_debugctlmsr(child, + child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); + + if (!child->thread.debugctlmsr) + clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h new file mode 100644 index 00000000000..edd8467740a --- /dev/null +++ b/include/asm-x86/ds.h @@ -0,0 +1,65 @@ +/* + * Debug Store (DS) support + * + * This provides a low-level interface to the hardware's Debug Store + * feature that is used for last branch recording (LBR) and + * precise-event based sampling (PEBS). + * + * Different architectures use a different DS layout/pointer size. + * The below functions therefore work on a void*. + * + * + * Since there is no user for PEBS, yet, only LBR (or branch + * trace store, BTS) is supported. + * + * + * Copyright (C) 2007 Intel Corporation. + * Markus Metzger , Dec 2007 + */ + +#ifndef _ASM_X86_DS_H +#define _ASM_X86_DS_H + +#include +#include + +struct cpuinfo_x86; + + +/* a branch trace record entry + * + * In order to unify the interface between various processor versions, + * we use the below data structure for all processors. + */ +enum bts_qualifier { + BTS_INVALID = 0, + BTS_BRANCH, + BTS_TASK_ARRIVES, + BTS_TASK_DEPARTS +}; + +struct bts_struct { + enum bts_qualifier qualifier; + union { + /* BTS_BRANCH */ + struct { + long from_ip; + long to_ip; + } lbr; + /* BTS_TASK_ARRIVES or + BTS_TASK_DEPARTS */ + unsigned long long timestamp; + } variant; +}; + + +extern int ds_allocate(void **, size_t); +extern int ds_free(void **); +extern int ds_get_bts_size(void *); +extern int ds_get_bts_index(void *); +extern int ds_read_bts(void *, size_t, struct bts_struct *); +extern int ds_write_bts(void *, const struct bts_struct *); +extern unsigned long ds_debugctl_mask(void); +extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c); + +#endif /* _ASM_X86_DS_H */ diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h index 0d83da19812..9c0ab7f26bd 100644 --- a/include/asm-x86/processor_32.h +++ b/include/asm-x86/processor_32.h @@ -360,6 +360,9 @@ struct thread_struct { unsigned long io_bitmap_max; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; +/* Debug Store - if not 0 points to a DS Save Area configuration; + * goes into MSR_IA32_DS_AREA */ + unsigned long ds_area_msr; }; #define INIT_THREAD { \ diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h index 0780f3e3fdf..7b7f8a142e2 100644 --- a/include/asm-x86/processor_64.h +++ b/include/asm-x86/processor_64.h @@ -240,6 +240,9 @@ struct thread_struct { unsigned io_bitmap_max; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; +/* Debug Store - if not 0 points to a DS Save Area configuration; + * goes into MSR_IA32_DS_AREA */ + unsigned long ds_area_msr; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; } __attribute__((aligned(16))); diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h index adce6b51df2..6fadc5214e1 100644 --- a/include/asm-x86/ptrace-abi.h +++ b/include/asm-x86/ptrace-abi.h @@ -80,4 +80,56 @@ #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ +/* Return maximal BTS buffer size in number of records, + if successuf; -1, otherwise. + EOPNOTSUPP...processor does not support bts tracing */ +#define PTRACE_BTS_MAX_BUFFER_SIZE 40 + +/* Allocate new bts buffer (free old one, if exists) of size DATA bts records; + parameter ADDR is ignored. + Return 0, if successful; -1, otherwise. + EOPNOTSUPP...processor does not support bts tracing + EINVAL.......invalid size in records + ENOMEM.......out of memory */ +#define PTRACE_BTS_ALLOCATE_BUFFER 41 + +/* Return the size of the bts buffer in number of bts records, + if successful; -1, otherwise. + EOPNOTSUPP...processor does not support bts tracing + ENXIO........no buffer allocated */ +#define PTRACE_BTS_GET_BUFFER_SIZE 42 + +/* Return the index of the next bts record to be written, + if successful; -1, otherwise. + EOPNOTSUPP...processor does not support bts tracing + ENXIO........no buffer allocated + After the first warp-around, this is the start of the circular bts buffer. */ +#define PTRACE_BTS_GET_INDEX 43 + +/* Read the DATA'th bts record into a ptrace_bts_record buffer provided in ADDR. + Return 0, if successful; -1, otherwise + EOPNOTSUPP...processor does not support bts tracing + ENXIO........no buffer allocated + EINVAL.......invalid index */ +#define PTRACE_BTS_READ_RECORD 44 + +/* Configure last branch trace; the configuration is given as a bit-mask of + PTRACE_BTS_O_* options in DATA; parameter ADDR is ignored. + Return 0, if successful; -1, otherwise + EOPNOTSUPP...processor does not support bts tracing + ENXIO........no buffer allocated */ +#define PTRACE_BTS_CONFIG 45 + +/* Return the configuration as bit-mask of PTRACE_BTS_O_* options + if successful; -1, otherwise. + EOPNOTSUPP...processor does not support bts tracing + ENXIO........no buffer allocated */ +#define PTRACE_BTS_STATUS 46 + +/* Trace configuration options */ +/* Collect last branch trace */ +#define PTRACE_BTS_O_TRACE_TASK 0x1 +/* Take timestamps when the task arrives and departs */ +#define PTRACE_BTS_O_TIMESTAMPS 0x2 + #endif diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index 9228870f615..a9a1bab1451 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -4,8 +4,19 @@ #include /* For __user */ #include + #ifndef __ASSEMBLY__ +#ifdef __KERNEL__ + +#include + +struct task_struct; +extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); + +#endif /* __KERNEL__ */ + + #ifdef __i386__ /* this struct defines the way the registers are stored on the stack during a system call. */ diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h index 306fc80800e..5bd508260ff 100644 --- a/include/asm-x86/thread_info_32.h +++ b/include/asm-x86/thread_info_32.h @@ -140,6 +140,8 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NOTSC 20 /* TSC is not accessible in userland */ #define TIF_FORCED_TF 21 /* true if TF in eflags artificially */ #define TIF_DEBUGCTLMSR 22 /* uses thread_struct.debugctlmsr */ +#define TIF_DS_AREA_MSR 23 /* uses thread_struct.ds_area_msr */ +#define TIF_BTS_TRACE_TS 24 /* record scheduling event timestamps */ #define _TIF_SYSCALL_TRACE (1< Date: Wed, 30 Jan 2008 13:32:37 +0100 Subject: x86: Implement support to synchronize RDTSC with LFENCE on Intel CPUs According to Intel RDTSC can be always synchronized with LFENCE on all current CPUs. Implement the necessary CPUID bit for that. It is unclear yet if that is true for all future CPUs too, but if there's another way the kernel can be always updated. Cc: asit.k.mallick@intel.com Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel.c | 3 ++- arch/x86/kernel/setup_64.c | 5 +---- include/asm-x86/cpufeature.h | 1 + 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index e4b7e73e902..0a4abdb61ae 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -203,9 +203,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) } #endif + if (cpu_has_xmm) + set_bit(X86_FEATURE_LFENCE_RDTSC, c->x86_capability); if (c->x86 == 15) { set_bit(X86_FEATURE_P4, c->x86_capability); - set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability); } if (c->x86 == 6) set_bit(X86_FEATURE_P3, c->x86_capability); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 2139aa6ac46..bc7758ea06a 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -888,10 +888,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - if (c->x86 == 15) - set_cpu_cap(c, X86_FEATURE_SYNC_RDTSC); - else - clear_cpu_cap(c, X86_FEATURE_SYNC_RDTSC); + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 7d53eea8b94..c1a7e07859c 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -80,6 +80,7 @@ #define X86_FEATURE_SYNC_RDTSC (3*32+15) /* RDTSC synchronizes the CPU */ #define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ -- cgit v1.2.3-70-g09d2 From 6d5f718a497375f853d90247f5f6963368e89803 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:32:38 +0100 Subject: x86: lfence fix LFENCE is available on XMM2 or higher Intel CPUs - not XMM or higher... this caused boot failures on XMM1 & !XMM1 capable CPUs. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0a4abdb61ae..5731de3e1bd 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -203,7 +203,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) } #endif - if (cpu_has_xmm) + if (cpu_has_xmm2) set_bit(X86_FEATURE_LFENCE_RDTSC, c->x86_capability); if (c->x86 == 15) { set_bit(X86_FEATURE_P4, c->x86_capability); -- cgit v1.2.3-70-g09d2 From 2b16a2353814a513cdb5c5c739b76a19d7ea39ce Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 30 Jan 2008 13:32:40 +0100 Subject: x86: move X86_FEATURE_CONSTANT_TSC into early cpu feature detection Need this in the next patch in time_init and that happens early. This includes a minor fix on i386 where early_intel_workarounds() [which is now called early_init_intel] really executes early as the comments say. Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/amd.c | 17 +++++++++++------ arch/x86/kernel/cpu/common.c | 11 +++++++++-- arch/x86/kernel/cpu/cpu.h | 3 ++- arch/x86/kernel/cpu/intel.c | 13 ++++++------- arch/x86/kernel/setup_64.c | 39 +++++++++++++++++++++++++++++++-------- 5 files changed, 59 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index aaa8101d3d8..cd2fe15ff4b 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -63,6 +63,15 @@ static __cpuinit int amd_apic_timer_broken(void) int force_mwait __cpuinitdata; +void __cpuinit early_init_amd(struct cpuinfo_x86 *c) +{ + if (cpuid_eax(0x80000000) >= 0x80000007) { + c->x86_power = cpuid_edx(0x80000007); + if (c->x86_power & (1<<8)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + } +} + static void __cpuinit init_amd(struct cpuinfo_x86 *c) { u32 l, h; @@ -85,6 +94,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } #endif + early_init_amd(c); + /* * FIXME: We should handle the K5 here. Set up the write * range and also turn on MSR 83 bits 4 and 31 (write alloc, @@ -257,12 +268,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; } - if (cpuid_eax(0x80000000) >= 0x80000007) { - c->x86_power = cpuid_edx(0x80000007); - if (c->x86_power & (1<<8)) - set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); - } - #ifdef CONFIG_X86_HT /* * On a AMD multi core setup the lower bits of the APIC id diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e48832a6c2a..dbb9142a824 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -307,6 +307,15 @@ static void __init early_cpu_detect(void) cpu_detect(c); get_cpu_vendor(c, 1); + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + early_init_amd(c); + break; + case X86_VENDOR_INTEL: + early_init_intel(c); + break; + } } static void __cpuinit generic_identify(struct cpuinfo_x86 * c) @@ -364,8 +373,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c) init_scattered_cpuid_features(c); } - early_intel_workaround(c); - #ifdef CONFIG_X86_HT c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; #endif diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 2f6432cef6f..ad6527a5beb 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -24,5 +24,6 @@ extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); -extern void early_intel_workaround(struct cpuinfo_x86 *c); +extern void early_init_intel(struct cpuinfo_x86 *c); +extern void early_init_amd(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5731de3e1bd..f1136115279 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -29,13 +29,14 @@ struct movsl_mask movsl_mask __read_mostly; #endif -void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c) +void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { - if (c->x86_vendor != X86_VENDOR_INTEL) - return; /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ if (c->x86 == 15 && c->x86_cache_alignment == 64) c->x86_cache_alignment = 128; + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } /* @@ -115,6 +116,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) unsigned int l2 = 0; char *p = NULL; + early_init_intel(c); + #ifdef CONFIG_X86_F00F_BUG /* * All current models of Pentium and Pentium with MMX technology CPUs @@ -210,10 +213,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) } if (c->x86 == 6) set_bit(X86_FEATURE_P3, c->x86_capability); - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); - if (cpu_has_ds) { unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 8618178db84..3cae326093c 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -544,9 +544,6 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", c->x86_cache_size, ecx & 0xFF); } - - if (n >= 0x80000007) - cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power); if (n >= 0x80000008) { cpuid(0x80000008, &eax, &dummy, &dummy, &dummy); c->x86_virt_bits = (eax >> 8) & 0xff; @@ -624,7 +621,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) #endif } -static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) +static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP unsigned bits, ecx; @@ -682,6 +679,15 @@ static __cpuinit int amd_apic_timer_broken(void) return 0; } +static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) +{ + early_init_amd_mc(c); + + /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ + if (c->x86_power & (1<<8)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +} + static void __cpuinit init_amd(struct cpuinfo_x86 *c) { unsigned level; @@ -731,10 +737,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } display_cacheinfo(c); - /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - /* Multi core CPU? */ if (c->extended_cpuid_level >= 0x80000008) amd_detect_cmp(c); @@ -845,6 +847,13 @@ static void srat_detect_node(void) #endif } +static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) +{ + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); +} + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { /* Cache sizes */ @@ -1056,6 +1065,20 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif + + c->extended_cpuid_level = cpuid_eax(0x80000000); + + if (c->extended_cpuid_level >= 0x80000007) + c->x86_power = cpuid_edx(0x80000007); + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + early_init_amd(c); + break; + case X86_VENDOR_INTEL: + early_init_intel(c); + break; + } } void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) -- cgit v1.2.3-70-g09d2 From 30d432dfab2bcfd021d352e2058fae6b9405caeb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 30 Jan 2008 13:33:16 +0100 Subject: x86: move MWAIT idle check to generic CPU initialization on 32-bit Previously it was only run for Intel CPUs, but AMD Fam10h implements MWAIT too. This matches 64bit behaviour. Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 2 ++ arch/x86/kernel/cpu/intel.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ed05c7a0ca9..4bd326d0322 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -499,6 +499,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) /* Init Machine Check Exception if available. */ mcheck_init(c); + + select_idle_routine(c); } void __init identify_boot_cpu(void) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f1136115279..d1c372b018d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -137,7 +137,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) } #endif - select_idle_routine(c); l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9 ) { unsigned eax = cpuid_eax(10); -- cgit v1.2.3-70-g09d2