From 155dd720d06a219ddf5a56b473cb3325441fc879 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 13 Mar 2009 14:49:53 +1030 Subject: cpumask: convert struct cpuinfo_x86's llc_shared_map to cpumask_var_t Impact: reduce kernel memory usage when CONFIG_CPUMASK_OFFSTACK=y Signed-off-by: Rusty Russell --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 76139506c3e..d794d9483c5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -94,7 +94,7 @@ struct cpuinfo_x86 { unsigned long loops_per_jiffy; #ifdef CONFIG_SMP /* cpus sharing the last level cache: */ - cpumask_t llc_shared_map; + cpumask_var_t llc_shared_map; #endif /* cpuid returned max cores value: */ u16 x86_max_cores; -- cgit v1.2.3-70-g09d2 From 30e1e6d1af2b67558bccf322af2b3e0676b209ae Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Mar 2009 14:50:34 +1030 Subject: cpumask: fix CONFIG_CPUMASK_OFFSTACK=y cpu hotunplug crash Impact: Fix cpu offline when CONFIG_MAXSMP=y Changeset bc9b83dd1f66402b870301c3c7117b9c1484abb4 "cpumask: convert c1e_mask in arch/x86/kernel/process.c to cpumask_var_t" contained a bug: c1e_mask is manipulated even if C1E isn't detected (and hence not allocated). This is simply fixed by checking for NULL (which gcc optimizes out anyway of CONFIG_CPUMASK_OFFSTACK=n, since it knows ce1_mask can never be NULL). In addition, fix a leak where select_idle_routine re-allocates (and re-clears) c1e_mask on every cpu init. Reported-by: Ingo Molnar Signed-off-by: Rusty Russell Cc: Mike Travis LKML-Reference: <200903171450.34549.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/process.c | 14 +++++++++++--- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d794d9483c5..9874dd98a29 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -733,6 +733,7 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); extern void select_idle_routine(const struct cpuinfo_x86 *c); +extern void init_c1e_mask(void); extern unsigned long boot_option_idle_override; extern unsigned long idle_halt; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 82f6cc045ad..d7dd3c294e2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -812,6 +812,7 @@ static void vgetcpu_set_mode(void) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); + init_c1e_mask(); #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6638294cec8..78533a519d8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -479,7 +479,8 @@ static int c1e_detected; void c1e_remove_cpu(int cpu) { - cpumask_clear_cpu(cpu, c1e_mask); + if (c1e_mask != NULL) + cpumask_clear_cpu(cpu, c1e_mask); } /* @@ -556,13 +557,20 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) pm_idle = mwait_idle; } else if (check_c1e_idle(c)) { printk(KERN_INFO "using C1E aware idle routine\n"); - alloc_cpumask_var(&c1e_mask, GFP_KERNEL); - cpumask_clear(c1e_mask); pm_idle = c1e_idle; } else pm_idle = default_idle; } +void __init init_c1e_mask(void) +{ + /* If we're using c1e_idle, we need to allocate c1e_mask. */ + if (pm_idle == c1e_idle) { + alloc_cpumask_var(&c1e_mask, GFP_KERNEL); + cpumask_clear(c1e_mask); + } +} + static int __init idle_setup(char *str) { if (!str) -- cgit v1.2.3-70-g09d2 From a30469e7921a6dd2067e9e836d7787cfa0105627 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 10 Apr 2009 15:21:24 -0700 Subject: x86: add linux kernel support for YMM state Impact: save/restore Intel-AVX state properly between tasks Intel Advanced Vector Extensions (AVX) introduce 256-bit vector processing capability. More about AVX at http://software.intel.com/sites/avx Add OS support for YMM state management using xsave/xrstor infrastructure to support AVX. Signed-off-by: Suresh Siddha LKML-Reference: <1239402084.27006.8057.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 6 ++++++ arch/x86/include/asm/sigcontext.h | 6 ++++++ arch/x86/include/asm/xsave.h | 3 ++- arch/x86/kernel/xsave.c | 2 +- 4 files changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 34c52370f2f..fcf4d92e7e0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -352,6 +352,11 @@ struct i387_soft_struct { u32 entry_eip; }; +struct ymmh_struct { + /* 16 * 16 bytes for each YMMH-reg = 256 bytes */ + u32 ymmh_space[64]; +}; + struct xsave_hdr_struct { u64 xstate_bv; u64 reserved1[2]; @@ -361,6 +366,7 @@ struct xsave_hdr_struct { struct xsave_struct { struct i387_fxsave_struct i387; struct xsave_hdr_struct xsave_hdr; + struct ymmh_struct ymmh; /* new processor state extensions will go here */ } __attribute__ ((packed, aligned (64))); diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index ec666491aaa..72e5a449166 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -269,6 +269,11 @@ struct _xsave_hdr { __u64 reserved2[5]; }; +struct _ymmh_state { + /* 16 * 16 bytes for each YMMH-reg */ + __u32 ymmh_space[64]; +}; + /* * Extended state pointed by the fpstate pointer in the sigcontext. * In addition to the fpstate, information encoded in the xstate_hdr @@ -278,6 +283,7 @@ struct _xsave_hdr { struct _xstate { struct _fpstate fpstate; struct _xsave_hdr xstate_hdr; + struct _ymmh_state ymmh; /* new processor state extensions go here */ }; diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 08e9a1ac07a..727acc15234 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -7,6 +7,7 @@ #define XSTATE_FP 0x1 #define XSTATE_SSE 0x2 +#define XSTATE_YMM 0x4 #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) @@ -15,7 +16,7 @@ /* * These are the features that the OS can handle currently. */ -#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE) +#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) #ifdef CONFIG_X86_64 #define REX_PREFIX "0x48, " diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 2b54fe002e9..0a5b04aa98f 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -324,7 +324,7 @@ void __ref xsave_cntxt_init(void) } /* - * for now OS knows only about FP/SSE + * Support only the state known to OS. */ pcntxt_mask = pcntxt_mask & XCNTXT_MASK; xsave_init(); -- cgit v1.2.3-70-g09d2 From 9b8de7479d0dbab1ed98b5b015d44232c9d3d08e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 21 Apr 2009 23:00:24 +0100 Subject: FRV: Fix the section attribute on UP DECLARE_PER_CPU() In non-SMP mode, the variable section attribute specified by DECLARE_PER_CPU() does not agree with that specified by DEFINE_PER_CPU(). This means that architectures that have a small data section references relative to a base register may throw up linkage errors due to too great a displacement between where the base register points and the per-CPU variable. On FRV, the .h declaration says that the variable is in the .sdata section, but the .c definition says it's actually in the .data section. The linker throws up the following errors: kernel/built-in.o: In function `release_task': kernel/exit.c:78: relocation truncated to fit: R_FRV_GPREL12 against symbol `per_cpu__process_counts' defined in .data section in kernel/built-in.o kernel/exit.c:78: relocation truncated to fit: R_FRV_GPREL12 against symbol `per_cpu__process_counts' defined in .data section in kernel/built-in.o To fix this, DECLARE_PER_CPU() should simply apply the same section attribute as does DEFINE_PER_CPU(). However, this is made slightly more complex by virtue of the fact that there are several variants on DEFINE, so these need to be matched by variants on DECLARE. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/percpu.h | 2 +- arch/ia64/include/asm/smp.h | 2 +- arch/x86/include/asm/desc.h | 2 +- arch/x86/include/asm/hardirq.h | 2 +- arch/x86/include/asm/processor.h | 6 +++--- arch/x86/include/asm/tlbflush.h | 2 +- include/asm-generic/percpu.h | 43 ++++++++++++++++++++++++++++++++++++++-- include/linux/percpu.h | 24 ---------------------- net/rds/rds.h | 2 +- 9 files changed, 50 insertions(+), 35 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/alpha/include/asm/percpu.h b/arch/alpha/include/asm/percpu.h index 3495e8e00d7..e9e0bb5a23b 100644 --- a/arch/alpha/include/asm/percpu.h +++ b/arch/alpha/include/asm/percpu.h @@ -73,6 +73,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #endif /* SMP */ -#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu_var(name) +#include #endif /* __ALPHA_PERCPU_H */ diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h index 59840833625..d217d1d4e05 100644 --- a/arch/ia64/include/asm/smp.h +++ b/arch/ia64/include/asm/smp.h @@ -58,7 +58,7 @@ extern struct smp_boot_data { extern char no_int_routing __devinitdata; extern cpumask_t cpu_core_map[NR_CPUS]; -DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); +DECLARE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); extern int smp_num_siblings; extern void __iomem *ipi_base_addr; extern unsigned char smp_int_redirect; diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 5623c50d67b..c45f415ce31 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -37,7 +37,7 @@ extern gate_desc idt_table[]; struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); -DECLARE_PER_CPU(struct gdt_page, gdt_page); +DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 039db6aa8e0..37555e52f98 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -26,7 +26,7 @@ typedef struct { #endif } ____cacheline_aligned irq_cpustat_t; -DECLARE_PER_CPU(irq_cpustat_t, irq_stat); +DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); /* We can have at most NR_VECTORS irqs routed to a cpu at a time */ #define MAX_HARDIRQS_PER_CPU NR_VECTORS diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index fcf4d92e7e0..c2cceae709c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -138,7 +138,7 @@ extern struct tss_struct doublefault_tss; extern __u32 cleared_cpu_caps[NCAPINTS]; #ifdef CONFIG_SMP -DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) #define current_cpu_data __get_cpu_var(cpu_info) #else @@ -270,7 +270,7 @@ struct tss_struct { } ____cacheline_aligned; -DECLARE_PER_CPU(struct tss_struct, init_tss); +DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss); /* * Save the original ist values for checking stack pointers during debugging @@ -393,7 +393,7 @@ union irq_stack_union { }; }; -DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); +DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union); DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d3539f998f8..16a5c84b032 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -152,7 +152,7 @@ struct tlb_state { struct mm_struct *active_mm; int state; }; -DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); +DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); static inline void reset_lazy_tlbstate(void) { diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b0e63c672eb..af47b9e1006 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -73,11 +73,50 @@ extern void setup_per_cpu_areas(void); #endif /* SMP */ +#ifndef PER_CPU_BASE_SECTION +#ifdef CONFIG_SMP +#define PER_CPU_BASE_SECTION ".data.percpu" +#else +#define PER_CPU_BASE_SECTION ".data" +#endif +#endif + +#ifdef CONFIG_SMP + +#ifdef MODULE +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#else +#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" +#endif +#define PER_CPU_FIRST_SECTION ".first" + +#else + +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#define PER_CPU_FIRST_SECTION "" + +#endif + #ifndef PER_CPU_ATTRIBUTES #define PER_CPU_ATTRIBUTES #endif -#define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \ - __typeof__(type) per_cpu_var(name) +#define DECLARE_PER_CPU_SECTION(type, name, section) \ + extern \ + __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DECLARE_PER_CPU(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "") + +#define DECLARE_PER_CPU_SHARED_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") + +#define DECLARE_PER_CPU_FIRST(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) #endif /* _ASM_GENERIC_PERCPU_H_ */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index cfda2d5ad31..f052d818499 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -9,30 +9,6 @@ #include -#ifndef PER_CPU_BASE_SECTION -#ifdef CONFIG_SMP -#define PER_CPU_BASE_SECTION ".data.percpu" -#else -#define PER_CPU_BASE_SECTION ".data" -#endif -#endif - -#ifdef CONFIG_SMP - -#ifdef MODULE -#define PER_CPU_SHARED_ALIGNED_SECTION "" -#else -#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" -#endif -#define PER_CPU_FIRST_SECTION ".first" - -#else - -#define PER_CPU_SHARED_ALIGNED_SECTION "" -#define PER_CPU_FIRST_SECTION "" - -#endif - #define DEFINE_PER_CPU_SECTION(type, name, section) \ __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name diff --git a/net/rds/rds.h b/net/rds/rds.h index 619f0a30a4e..71794449ca4 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -638,7 +638,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *, void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); /* stats.c */ -DECLARE_PER_CPU(struct rds_statistics, rds_stats); +DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); #define rds_stats_inc_which(which, member) do { \ per_cpu(which, get_cpu()).member++; \ put_cpu(); \ -- cgit v1.2.3-70-g09d2