From 95ffa2438d0e9c48779f0106b1c0eb36165e759c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Apr 2008 03:52:33 -0700 Subject: x86: mtrr cleanup for converting continuous to discrete layout, v8 some BIOS like to use continus MTRR layout, and X driver can not add WB entries for graphical cards when 4g or more RAM installed. the patch will change MTRR to discrete. mtrr_chunk_size= could be used to have smaller continuous block to hold holes. default is 256m, could be set according to size of graphics card memory. mtrr_gran_size= could be used to send smallest mtrr block to avoid run out of MTRRs v2: fix -1 for UC checking v3: default to disable, and need use enable_mtrr_cleanup to enable this feature skip the var state change warning. remove next_basek in range_to_mtrr() v4: correct warning mask. v5: CONFIG_MTRR_SANITIZER v6: fix 1g, 2g, 512 aligment with extra hole v7: gran_sizek to prevent running out of MTRRs. v8: fix hole_basek caculation caused when removing next_basek gran_sizek using when basek is 0. need to apply [PATCH] x86: fix trimming e820 with MTRR holes. right after this one. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe361ae7ef2..97a1764a3d2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1092,6 +1092,32 @@ config MTRR See for more information. +config MTRR_SANITIZER + def_bool y + prompt "MTRR cleanup support" + depends on MTRR + help + Convert MTRR layout from continuous to discrete, so some X driver + could add WB entries. + + Say N here if you see bootup problems (boot crash, boot hang, + spontaneous reboots). + + Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size + could be used to send largest mtrr entry size for continuous block + to hold holes (aka. UC entries) + + If unsure, say Y. + +config MTRR_SANITIZER_ENABLE_DEFAULT + def_bool y + prompt "Enable MTRR cleanup by default" + depends on MTRR_SANITIZER + help + Enable mtrr cleanup by default + + If unsure, say Y. + config X86_PAT bool prompt "x86 PAT support" -- cgit v1.2.3-70-g09d2 From f5098d62c1d1cede8ff23d01bbf50a421f110562 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Apr 2008 20:25:58 -0700 Subject: x86: mtrr cleanup for converting continuous to discrete layout v8 - fix v9: address format change requests by Ingo more case handling in range_to_var_with_hole Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 9 +-- arch/x86/kernel/cpu/mtrr/main.c | 164 ++++++++++++++++++++++------------------ 2 files changed, 94 insertions(+), 79 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 97a1764a3d2..f417fe33d85 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1110,13 +1110,12 @@ config MTRR_SANITIZER If unsure, say Y. config MTRR_SANITIZER_ENABLE_DEFAULT - def_bool y - prompt "Enable MTRR cleanup by default" + int "MTRR cleanup enable value (0-1)" + range 0 1 + default "0" depends on MTRR_SANITIZER help - Enable mtrr cleanup by default - - If unsure, say Y. + Enable mtrr cleanup default value config X86_PAT bool diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 5a2a4c14633..79597d3c343 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -611,17 +611,9 @@ static struct sysdev_driver mtrr_sysdev_driver = { }; #ifdef CONFIG_MTRR_SANITIZER - -#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT -static int enable_mtrr_cleanup __initdata = 1; -#else -static int enable_mtrr_cleanup __initdata; -#endif - +static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; #else - static int enable_mtrr_cleanup __initdata = -1; - #endif static int __init disable_mtrr_cleanup_setup(char *str) @@ -640,6 +632,7 @@ static int __init enable_mtrr_cleanup_setup(char *str) } early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); +/* should be related to MTRR_VAR_RANGES nums */ #define RANGE_NUM 256 struct res_range { @@ -647,13 +640,27 @@ struct res_range { unsigned long end; }; -static int __init add_range(struct res_range *range, int nr_range, unsigned long start, - unsigned long end, int merge) +static int __init +add_range(struct res_range *range, int nr_range, unsigned long start, + unsigned long end) { - int i; + /* out of slots */ + if (nr_range >= RANGE_NUM) + return nr_range; - if (!merge) - goto addit; + range[nr_range].start = start; + range[nr_range].end = end; + + nr_range++; + + return nr_range; +} + +static int __init +add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, + unsigned long end) +{ + int i; /* try to merge it with old one */ for (i = 0; i < nr_range; i++) { @@ -676,24 +683,14 @@ static int __init add_range(struct res_range *range, int nr_range, unsigned long return nr_range; } -addit: /* need to add that */ - if (nr_range >= RANGE_NUM) - return nr_range; - - range[nr_range].start = start; - range[nr_range].end = end; - - nr_range++; - - return nr_range; - + return add_range(range, nr_range, start, end); } -static void __init subtract_range(struct res_range *range, unsigned long start, - unsigned long end) + +static void __init +subtract_range(struct res_range *range, unsigned long start, unsigned long end) { - int i; - int j; + int i, j; for (j = 0; j < RANGE_NUM; j++) { if (!range[j].end) @@ -747,46 +744,47 @@ static int __init cmp_range(const void *x1, const void *x2) } struct var_mtrr_state { - unsigned long range_startk, range_sizek; - unsigned long chunk_sizek; - unsigned long gran_sizek; - unsigned int reg; - unsigned address_bits; + unsigned long range_startk; + unsigned long range_sizek; + unsigned long chunk_sizek; + unsigned long gran_sizek; + unsigned int reg; + unsigned int address_bits; }; -static void __init set_var_mtrr( - unsigned int reg, unsigned long basek, unsigned long sizek, - unsigned char type, unsigned address_bits) +static void __init +set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, + unsigned char type, unsigned address_bits) { u32 base_lo, base_hi, mask_lo, mask_hi; - unsigned address_mask_high; + u64 base, mask; if (!sizek) { fill_mtrr_var_range(reg, 0, 0, 0, 0); return; } - address_mask_high = ((1u << (address_bits - 32u)) - 1u); + mask = (1ULL << address_bits) - 1; + mask &= ~((((u64)sizek) << 10) - 1); - base_hi = basek >> 22; - base_lo = basek << 10; + base = ((u64)basek) << 10; - if (sizek < 4*1024*1024) { - mask_hi = address_mask_high; - mask_lo = ~((sizek << 10) - 1); - } else { - mask_hi = address_mask_high & (~((sizek >> 22) - 1)); - mask_lo = 0; - } + base |= type; + mask |= 0x800; + + base_lo = base & ((1ULL<<32) - 1); + base_hi = base >> 32; + + mask_lo = mask & ((1ULL<<32) - 1); + mask_hi = mask >> 32; - base_lo |= type; - mask_lo |= 0x800; fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); } -static unsigned int __init range_to_mtrr(unsigned int reg, - unsigned long range_startk, unsigned long range_sizek, - unsigned char type, unsigned address_bits) +static unsigned int __init +range_to_mtrr(unsigned int reg, unsigned long range_startk, + unsigned long range_sizek, unsigned char type, + unsigned address_bits) { if (!range_sizek || (reg >= num_var_ranges)) return reg; @@ -794,6 +792,7 @@ static unsigned int __init range_to_mtrr(unsigned int reg, while (range_sizek) { unsigned long max_align, align; unsigned long sizek; + /* Compute the maximum size I can make a range */ if (range_startk) max_align = ffs(range_startk) - 1; @@ -818,7 +817,8 @@ static unsigned int __init range_to_mtrr(unsigned int reg, return reg; } -static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek) +static void __init +range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek) { unsigned long hole_basek, hole_sizek; unsigned long range0_basek, range0_sizek; @@ -848,23 +848,31 @@ static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigne /* try to append some small hole */ range0_basek = state->range_startk; range0_sizek = ALIGN(state->range_sizek, chunk_sizek); - if ((range0_sizek == state->range_sizek) || - ((range0_basek + range0_sizek - chunk_sizek > basek) && basek)) { + if (range0_sizek == state->range_sizek) { printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10); state->reg = range_to_mtrr(state->reg, range0_basek, state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits); return; + } else if (basek) { + while (range0_basek + range0_sizek - chunk_sizek > basek) { + range0_sizek -= chunk_sizek; + if (!range0_sizek) + break; + } } - range0_sizek -= chunk_sizek; + if (range0_sizek > chunk_sizek) + range0_sizek -= chunk_sizek; printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10); state->reg = range_to_mtrr(state->reg, range0_basek, range0_sizek, MTRR_TYPE_WRBACK, state->address_bits); range_basek = range0_basek + range0_sizek; range_sizek = chunk_sizek; - if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) { + + if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) && + (range_basek + range_sizek <= basek)) { hole_sizek = range_sizek - (state->range_sizek - range0_sizek); hole_basek = range_basek + range_sizek - hole_sizek; } else @@ -880,7 +888,9 @@ static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigne } } -static void __init set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, unsigned long size_pfn) +static void __init +set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, + unsigned long size_pfn) { unsigned long basek, sizek; @@ -921,7 +931,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p) early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); /* granity of mtrr of block */ -static u64 mtrr_gran_size __initdata = (64ULL<<20); +static u64 mtrr_gran_size __initdata = (1ULL<<20); static int __init parse_mtrr_gran_size_opt(char *p) { @@ -932,17 +942,19 @@ static int __init parse_mtrr_gran_size_opt(char *p) } early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); -static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits) +static void __init +x86_setup_var_mtrrs(struct res_range *range, int nr_range, + unsigned address_bits) { struct var_mtrr_state var_state; int i; - var_state.range_startk = 0; - var_state.range_sizek = 0; - var_state.reg = 0; - var_state.address_bits = address_bits; - var_state.chunk_sizek = mtrr_chunk_size >> 10; - var_state.gran_sizek = mtrr_gran_size >> 10; + var_state.range_startk = 0; + var_state.range_sizek = 0; + var_state.reg = 0; + var_state.address_bits = address_bits; + var_state.chunk_sizek = mtrr_chunk_size >> 10; + var_state.gran_sizek = mtrr_gran_size >> 10; /* Write the range etc */ for (i = 0; i < nr_range; i++) @@ -952,11 +964,16 @@ static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, un range_to_mtrr_with_hole(&var_state, 0); printk(KERN_INFO "DONE variable MTRRs\n"); /* Clear out the extra MTRR's */ - while (var_state.reg < num_var_ranges) - set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits); + while (var_state.reg < num_var_ranges) { + set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits); + var_state.reg++; + } } -static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, unsigned long extra_remove_base, unsigned long extra_remove_size) +static int __init +x86_get_mtrr_mem_range(struct res_range *range, int nr_range, + unsigned long extra_remove_base, + unsigned long extra_remove_size) { unsigned long i, base, size; mtrr_type type; @@ -965,7 +982,7 @@ static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, mtrr_if->get(i, &base, &size, &type); if (type != MTRR_TYPE_WRBACK) continue; - nr_range = add_range(range, nr_range, base, base + size - 1, 1); + nr_range = add_range_with_merge(range, nr_range, base, base + size - 1); } printk(KERN_INFO "After WB checking\n"); for (i = 0; i < nr_range; i++) @@ -1005,11 +1022,11 @@ static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, static int __init mtrr_cleanup(unsigned address_bits) { + unsigned long extra_remove_base, extra_remove_size; unsigned long i, base, size, def, dummy; - mtrr_type type; struct res_range range[RANGE_NUM]; + mtrr_type type; int nr_range; - unsigned long extra_remove_base, extra_remove_size; /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; @@ -1053,7 +1070,6 @@ static int __init mtrr_cleanup(unsigned address_bits) x86_setup_var_mtrrs(range, nr_range, address_bits); return 1; - } static int disable_mtrr_trim; -- cgit v1.2.3-70-g09d2 From 12031a624af7816ec7660b82be648aa3703b4ebe Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 2 May 2008 02:40:22 -0700 Subject: x86: mtrr cleanup for converting continuous to discrete - auto detect v4 Loop through mtrr chunk_size and gran_size from 1M to 2G to find out the optimal value so user does not need to add mtrr_chunk_size and mtrr_gran_size to the kernel command line. If optimal value is not found, print out all list to help select less optimal value. Add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries. v2: find the one with more spare entries v3: fix hole_basek offset v4: tight the compare between range and range_new loop stop with 4g Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Gabriel C Cc: Mika Fischer Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 13 +- arch/x86/Kconfig | 9 + arch/x86/kernel/cpu/mtrr/main.c | 610 +++++++++++++++++++++++++++--------- 3 files changed, 484 insertions(+), 148 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 44427607b7c..2000977af77 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -610,8 +610,17 @@ and is between 256 and 4096 characters. It is defined in the file that could hold holes aka. UC entries. mtrr_gran_size=nn[KMG] [X86] - used for mtrr cleanup. It is granity of mtrr block. - Big value could prevent small alignment use up MTRRs. + Used for mtrr cleanup. It is granularity of mtrr block. + Default is 1. + Large value could prevent small alignment from + using up MTRRs. + + mtrr_spare_reg_nr=n [X86] + Format: + Range: 0,7 : spare reg number + Default : 1 + Used for mtrr cleanup. It is spare mtrr entries number. + Set to 2 or more if your graphical card needs more. disable_mtrr_trim [X86, Intel and AMD only] By default the kernel will trim any uncacheable diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f417fe33d85..9e976150442 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1117,6 +1117,15 @@ config MTRR_SANITIZER_ENABLE_DEFAULT help Enable mtrr cleanup default value +config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT + int "MTRR cleanup spare reg num (0-7)" + range 0 7 + default "1" + depends on MTRR_SANITIZER + help + mtrr cleanup spare entries default, it can be changed via + mtrr_spare_reg_nr= + config X86_PAT bool prompt "x86 PAT support" diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 79597d3c343..e6c162c379a 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_driver = { .resume = mtrr_restore, }; -#ifdef CONFIG_MTRR_SANITIZER -static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; -#else -static int enable_mtrr_cleanup __initdata = -1; -#endif - -static int __init disable_mtrr_cleanup_setup(char *str) -{ - if (enable_mtrr_cleanup != -1) - enable_mtrr_cleanup = 0; - return 0; -} -early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); - -static int __init enable_mtrr_cleanup_setup(char *str) -{ - if (enable_mtrr_cleanup != -1) - enable_mtrr_cleanup = 1; - return 0; -} -early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); - /* should be related to MTRR_VAR_RANGES nums */ #define RANGE_NUM 256 @@ -702,13 +680,15 @@ subtract_range(struct res_range *range, unsigned long start, unsigned long end) continue; } - if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) { + if (start <= range[j].start && end < range[j].end && + range[j].start < end + 1) { range[j].start = end + 1; continue; } - if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) { + if (start > range[j].start && end >= range[j].end && + range[j].end > start - 1) { range[j].end = start - 1; continue; } @@ -743,18 +723,123 @@ static int __init cmp_range(const void *x1, const void *x2) return start1 - start2; } +struct var_mtrr_range_state { + unsigned long base_pfn; + unsigned long size_pfn; + mtrr_type type; +}; + +struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; + +static int __init +x86_get_mtrr_mem_range(struct res_range *range, int nr_range, + unsigned long extra_remove_base, + unsigned long extra_remove_size) +{ + unsigned long i, base, size; + mtrr_type type; + + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type != MTRR_TYPE_WRBACK) + continue; + base = range_state[i].base_pfn; + size = range_state[i].size_pfn; + nr_range = add_range_with_merge(range, nr_range, base, + base + size - 1); + } + printk(KERN_DEBUG "After WB checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + + /* take out UC ranges */ + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type != MTRR_TYPE_UNCACHABLE) + continue; + size = range_state[i].size_pfn; + if (!size) + continue; + base = range_state[i].base_pfn; + subtract_range(range, base, base + size - 1); + } + if (extra_remove_size) + subtract_range(range, extra_remove_base, + extra_remove_base + extra_remove_size - 1); + + /* get new range num */ + nr_range = 0; + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + nr_range++; + } + printk(KERN_DEBUG "After UC checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + + /* sort the ranges */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + printk(KERN_DEBUG "After sorting\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + + /* clear those is not used */ + for (i = nr_range; i < RANGE_NUM; i++) + memset(&range[i], 0, sizeof(range[i])); + + return nr_range; +} + +static struct res_range __initdata range[RANGE_NUM]; + +#ifdef CONFIG_MTRR_SANITIZER + +static unsigned long __init sum_ranges(struct res_range *range, int nr_range) +{ + unsigned long sum; + int i; + + sum = 0; + for (i = 0; i < nr_range; i++) + sum += range[i].end + 1 - range[i].start; + + return sum; +} + +static int enable_mtrr_cleanup __initdata = + CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; + +static int __init disable_mtrr_cleanup_setup(char *str) +{ + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 0; + return 0; +} +early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); + +static int __init enable_mtrr_cleanup_setup(char *str) +{ + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 1; + return 0; +} +early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); + struct var_mtrr_state { unsigned long range_startk; unsigned long range_sizek; unsigned long chunk_sizek; unsigned long gran_sizek; unsigned int reg; - unsigned int address_bits; }; static void __init set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, - unsigned char type, unsigned address_bits) + unsigned char type, unsigned int address_bits) { u32 base_lo, base_hi, mask_lo, mask_hi; u64 base, mask; @@ -781,10 +866,34 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); } +static void __init +save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, + unsigned char type) +{ + range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); + range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); + range_state[reg].type = type; +} + +static void __init +set_var_mtrr_all(unsigned int address_bits) +{ + unsigned long basek, sizek; + unsigned char type; + unsigned int reg; + + for (reg = 0; reg < num_var_ranges; reg++) { + basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); + sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); + type = range_state[reg].type; + + set_var_mtrr(reg, basek, sizek, type, address_bits); + } +} + static unsigned int __init range_to_mtrr(unsigned int reg, unsigned long range_startk, - unsigned long range_sizek, unsigned char type, - unsigned address_bits) + unsigned long range_sizek, unsigned char type) { if (!range_sizek || (reg >= num_var_ranges)) return reg; @@ -803,12 +912,13 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, align = max_align; sizek = 1 << align; - printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n", + printk(KERN_DEBUG "Setting variable MTRR %d, base: %ldMB, " + "range: %ldMB, type %s\n", reg, range_startk >> 10, sizek >> 10, (type == MTRR_TYPE_UNCACHABLE)?"UC": ((type == MTRR_TYPE_WRBACK)?"WB":"Other") ); - set_var_mtrr(reg++, range_startk, sizek, type, address_bits); + save_var_mtrr(reg++, range_startk, sizek, type); range_startk += sizek; range_sizek -= sizek; if (reg >= num_var_ranges) @@ -817,10 +927,12 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, return reg; } -static void __init -range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek) +static unsigned __init +range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, + unsigned long sizek) { unsigned long hole_basek, hole_sizek; + unsigned long second_basek, second_sizek; unsigned long range0_basek, range0_sizek; unsigned long range_basek, range_sizek; unsigned long chunk_sizek; @@ -828,64 +940,95 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek) hole_basek = 0; hole_sizek = 0; + second_basek = 0; + second_sizek = 0; chunk_sizek = state->chunk_sizek; gran_sizek = state->gran_sizek; /* align with gran size, prevent small block used up MTRRs */ range_basek = ALIGN(state->range_startk, gran_sizek); if ((range_basek > basek) && basek) - return; - range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek); + return second_sizek; + state->range_sizek -= (range_basek - state->range_startk); + range_sizek = ALIGN(state->range_sizek, gran_sizek); - while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) { + while (range_sizek > state->range_sizek) { range_sizek -= gran_sizek; if (!range_sizek) - return; + return 0; } - state->range_startk = range_basek; state->range_sizek = range_sizek; /* try to append some small hole */ range0_basek = state->range_startk; range0_sizek = ALIGN(state->range_sizek, chunk_sizek); if (range0_sizek == state->range_sizek) { - printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range0_basek, - state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits); - return; - } else if (basek) { - while (range0_basek + range0_sizek - chunk_sizek > basek) { + printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", range0_basek<<10, + (range0_basek + state->range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + state->range_sizek, MTRR_TYPE_WRBACK); + return 0; + } + + range0_sizek -= chunk_sizek; + if (range0_sizek && sizek) { + while (range0_basek + range0_sizek > (basek + sizek)) { range0_sizek -= chunk_sizek; if (!range0_sizek) break; } } + if (range0_sizek) { + printk(KERN_DEBUG "range0: %016lx - %016lx\n", range0_basek<<10, + (range0_basek + range0_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + range0_sizek, MTRR_TYPE_WRBACK); - if (range0_sizek > chunk_sizek) - range0_sizek -= chunk_sizek; - printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range0_basek, - range0_sizek, MTRR_TYPE_WRBACK, state->address_bits); + } range_basek = range0_basek + range0_sizek; range_sizek = chunk_sizek; - if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) && - (range_basek + range_sizek <= basek)) { - hole_sizek = range_sizek - (state->range_sizek - range0_sizek); - hole_basek = range_basek + range_sizek - hole_sizek; - } else + if (range_basek + range_sizek > basek && + range_basek + range_sizek <= (basek + sizek)) { + /* one hole */ + second_basek = basek; + second_sizek = range_basek + range_sizek - basek; + } + + /* if last piece, only could one hole near end */ + if ((second_basek || !basek) && + range_sizek - (state->range_sizek - range0_sizek) - second_sizek < + (chunk_sizek >> 1)) { + /* + * one hole in middle (second_sizek is 0) or at end + * (second_sizek is 0 ) + */ + hole_sizek = range_sizek - (state->range_sizek - range0_sizek) + - second_sizek; + hole_basek = range_basek + range_sizek - hole_sizek + - second_sizek; + } else { + /* fallback for big hole, or several holes */ range_sizek = state->range_sizek - range0_sizek; + second_basek = 0; + second_sizek = 0; + } - printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range_basek, - range_sizek, MTRR_TYPE_WRBACK, state->address_bits); + printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, + (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, + MTRR_TYPE_WRBACK); if (hole_sizek) { - printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10); - state->reg = range_to_mtrr(state->reg, hole_basek, - hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits); + printk(KERN_DEBUG "hole: %016lx - %016lx\n", hole_basek<<10, + (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, + MTRR_TYPE_UNCACHABLE); + } + + return second_sizek; } static void __init @@ -893,6 +1036,7 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, unsigned long size_pfn) { unsigned long basek, sizek; + unsigned long second_sizek = 0; if (state->reg >= num_var_ranges) return; @@ -901,21 +1045,19 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, sizek = size_pfn << (PAGE_SHIFT - 10); /* See if I can merge with the last range */ - if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) { + if ((basek <= 1024) || + (state->range_startk + state->range_sizek == basek)) { unsigned long endk = basek + sizek; state->range_sizek = endk - state->range_startk; return; } /* Write the range mtrrs */ - if (state->range_sizek != 0) { - range_to_mtrr_with_hole(state, basek); + if (state->range_sizek != 0) + second_sizek = range_to_mtrr_with_hole(state, basek, sizek); - state->range_startk = 0; - state->range_sizek = 0; - } /* Allocate an msr */ - state->range_startk = basek; - state->range_sizek = sizek; + state->range_startk = basek + second_sizek; + state->range_sizek = sizek - second_sizek; } /* mininum size of mtrr block that can take hole */ @@ -931,7 +1073,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p) early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); /* granity of mtrr of block */ -static u64 mtrr_gran_size __initdata = (1ULL<<20); +static u64 mtrr_gran_size __initdata; static int __init parse_mtrr_gran_size_opt(char *p) { @@ -942,91 +1084,84 @@ static int __init parse_mtrr_gran_size_opt(char *p) } early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); -static void __init +static int nr_mtrr_spare_reg __initdata = + CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; + +static int __init parse_mtrr_spare_reg(char *arg) +{ + if (arg) + nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); + return 0; +} + +early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); + +static int __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, - unsigned address_bits) + u64 chunk_size, u64 gran_size) { struct var_mtrr_state var_state; int i; + int num_reg; var_state.range_startk = 0; var_state.range_sizek = 0; var_state.reg = 0; - var_state.address_bits = address_bits; - var_state.chunk_sizek = mtrr_chunk_size >> 10; - var_state.gran_sizek = mtrr_gran_size >> 10; + var_state.chunk_sizek = chunk_size >> 10; + var_state.gran_sizek = gran_size >> 10; + + memset(range_state, 0, sizeof(range_state)); /* Write the range etc */ for (i = 0; i < nr_range; i++) - set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1); + set_var_mtrr_range(&var_state, range[i].start, + range[i].end - range[i].start + 1); /* Write the last range */ - range_to_mtrr_with_hole(&var_state, 0); - printk(KERN_INFO "DONE variable MTRRs\n"); + if (var_state.range_sizek != 0) + range_to_mtrr_with_hole(&var_state, 0, 0); + printk(KERN_DEBUG "DONE variable MTRRs\n"); + + num_reg = var_state.reg; /* Clear out the extra MTRR's */ while (var_state.reg < num_var_ranges) { - set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits); + save_var_mtrr(var_state.reg, 0, 0, 0); var_state.reg++; } -} - -static int __init -x86_get_mtrr_mem_range(struct res_range *range, int nr_range, - unsigned long extra_remove_base, - unsigned long extra_remove_size) -{ - unsigned long i, base, size; - mtrr_type type; - - for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); - if (type != MTRR_TYPE_WRBACK) - continue; - nr_range = add_range_with_merge(range, nr_range, base, base + size - 1); - } - printk(KERN_INFO "After WB checking\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); - /* take out UC ranges */ - for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); - if (type != MTRR_TYPE_UNCACHABLE) - continue; - if (!size) - continue; - subtract_range(range, base, base + size - 1); - } - if (extra_remove_size) - subtract_range(range, extra_remove_base, extra_remove_base + extra_remove_size - 1); + return num_reg; +} - /* get new range num */ - nr_range = 0; - for (i = 0; i < RANGE_NUM; i++) { - if (!range[i].end) - continue; - nr_range++; - } - printk(KERN_INFO "After UC checking\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); +struct mtrr_cleanup_result { + unsigned long gran_sizek; + unsigned long chunk_sizek; + unsigned long lose_cover_sizek; + unsigned int num_reg; + int bad; +}; - /* sort the ranges */ - sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); - printk(KERN_INFO "After sorting\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); +/* + * gran_size: 1M, 2M, ..., 2G + * chunk size: gran_size, ..., 4G + * so we need (2+13)*6 + */ +#define NUM_RESULT 90 +#define PSHIFT (PAGE_SHIFT - 10) - return nr_range; -} +static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; +static struct res_range __initdata range_new[RANGE_NUM]; +static unsigned long __initdata min_loss_pfn[RANGE_NUM]; static int __init mtrr_cleanup(unsigned address_bits) { unsigned long extra_remove_base, extra_remove_size; unsigned long i, base, size, def, dummy; - struct res_range range[RANGE_NUM]; mtrr_type type; - int nr_range; + int nr_range, nr_range_new; + u64 chunk_size, gran_size; + unsigned long range_sums, range_sums_new; + int index_good; + int num_reg_good; /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; @@ -1038,10 +1173,20 @@ static int __init mtrr_cleanup(unsigned address_bits) if (def != MTRR_TYPE_UNCACHABLE) return 0; + /* get it and store it aside */ + memset(range_state, 0, sizeof(range_state)); + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + range_state[i].base_pfn = base; + range_state[i].size_pfn = size; + range_state[i].type = type; + } + /* check entries number */ memset(num, 0, sizeof(num)); for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); + type = range_state[i].type; + size = range_state[i].size_pfn; if (type >= MTRR_NUM_TYPES) continue; if (!size) @@ -1062,15 +1207,172 @@ static int __init mtrr_cleanup(unsigned address_bits) extra_remove_size = 0; if (mtrr_tom2) { extra_remove_base = 1 << (32 - PAGE_SHIFT); - extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base; + extra_remove_size = + (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; + } + nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, + extra_remove_size); + range_sums = sum_ranges(range, nr_range); + printk(KERN_INFO "total RAM coverred: %ldM\n", + range_sums >> (20 - PAGE_SHIFT)); + + if (mtrr_chunk_size && mtrr_gran_size) { + int num_reg; + + /* convert ranges to var ranges state */ + num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, + mtrr_gran_size); + + /* we got new setting in range_state, check it */ + memset(range_new, 0, sizeof(range_new)); + nr_range_new = x86_get_mtrr_mem_range(range_new, 0, + extra_remove_base, + extra_remove_size); + range_sums_new = sum_ranges(range_new, nr_range_new); + + i = 0; + result[i].chunk_sizek = mtrr_chunk_size >> 10; + result[i].gran_sizek = mtrr_gran_size >> 10; + result[i].num_reg = num_reg; + if (range_sums < range_sums_new) { + result[i].lose_cover_sizek = + (range_sums_new - range_sums) << PSHIFT; + result[i].bad = 1; + } else + result[i].lose_cover_sizek = + (range_sums - range_sums_new) << PSHIFT; + + printk(KERN_INFO " %sgran_size: %ldM \tchunk_size: %ldM \t", + result[i].bad?" BAD ":"", result[i].gran_sizek >> 10, + result[i].chunk_sizek >> 10); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", + result[i].num_reg, result[i].bad?"-":"", + result[i].lose_cover_sizek >> 10); + if (!result[i].bad) { + set_var_mtrr_all(address_bits); + return 1; + } + printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " + "will find optimal one\n"); + memset(result, 0, sizeof(result[0])); + } + + i = 0; + memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); + memset(result, 0, sizeof(result)); + for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { + for (chunk_size = gran_size; chunk_size < (1ULL<<33); + chunk_size <<= 1) { + int num_reg; + + printk(KERN_INFO + "\ngran_size: %lldM chunk_size_size: %lldM\n", + gran_size >> 20, chunk_size >> 20); + if (i >= NUM_RESULT) + continue; + + /* convert ranges to var ranges state */ + num_reg = x86_setup_var_mtrrs(range, nr_range, + chunk_size, gran_size); + + /* we got new setting in range_state, check it */ + memset(range_new, 0, sizeof(range_new)); + nr_range_new = x86_get_mtrr_mem_range(range_new, 0, + extra_remove_base, extra_remove_size); + range_sums_new = sum_ranges(range_new, nr_range_new); + + result[i].chunk_sizek = chunk_size >> 10; + result[i].gran_sizek = gran_size >> 10; + result[i].num_reg = num_reg; + if (range_sums < range_sums_new) { + result[i].lose_cover_sizek = + (range_sums_new - range_sums) << PSHIFT; + result[i].bad = 1; + } else + result[i].lose_cover_sizek = + (range_sums - range_sums_new) << PSHIFT; + + /* double check it */ + if (!result[i].bad && !result[i].lose_cover_sizek) { + if (nr_range_new != nr_range || + memcmp(range, range_new, sizeof(range))) + result[i].bad = 1; + } + + if (!result[i].bad && (range_sums - range_sums_new < + min_loss_pfn[num_reg])) { + min_loss_pfn[num_reg] = + range_sums - range_sums_new; + } + i++; + } } - nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size); - /* convert ranges to var ranges state */ - x86_setup_var_mtrrs(range, nr_range, address_bits); + /* print out all */ + for (i = 0; i < NUM_RESULT; i++) { + printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", + result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, + result[i].chunk_sizek >> 10); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", + result[i].num_reg, result[i].bad?"-":"", + result[i].lose_cover_sizek >> 10); + } - return 1; + /* try to find the optimal index */ + if (nr_mtrr_spare_reg >= num_var_ranges) + nr_mtrr_spare_reg = num_var_ranges - 1; + num_reg_good = -1; + for (i = 1; i < num_var_ranges + 1 - nr_mtrr_spare_reg; i++) { + if (!min_loss_pfn[i]) { + num_reg_good = i; + break; + } + } + + index_good = -1; + if (num_reg_good != -1) { + for (i = 0; i < NUM_RESULT; i++) { + if (!result[i].bad && + result[i].num_reg == num_reg_good && + !result[i].lose_cover_sizek) { + index_good = i; + break; + } + } + } + + if (index_good != -1) { + printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); + i = index_good; + printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", + result[i].gran_sizek >> 10, + result[i].chunk_sizek >> 10); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %ldM \n", + result[i].num_reg, + result[i].lose_cover_sizek >> 10); + /* convert ranges to var ranges state */ + chunk_size = result[i].chunk_sizek; + chunk_size <<= 10; + gran_size = result[i].gran_sizek; + gran_size <<= 10; + x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); + set_var_mtrr_all(address_bits); + return 1; + } + + printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); + printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); + + return 0; } +#else +static int __init mtrr_cleanup(unsigned address_bits) +{ + return 0; +} +#endif + +static int __initdata changed_by_mtrr_cleanup; static int disable_mtrr_trim; @@ -1111,7 +1413,8 @@ int __init amd_special_default_mtrr(void) return 0; } -static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn) +static u64 __init real_trim_memory(unsigned long start_pfn, + unsigned long limit_pfn) { u64 trim_start, trim_size; trim_start = start_pfn; @@ -1138,9 +1441,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) { unsigned long i, base, size, highest_pfn = 0, def, dummy; mtrr_type type; - struct res_range range[RANGE_NUM]; int nr_range; - u64 total_real_trim_size; + u64 total_trim_size; /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; @@ -1155,11 +1457,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) if (def != MTRR_TYPE_UNCACHABLE) return 0; - /* Find highest cached pfn */ + /* get it and store it aside */ + memset(range_state, 0, sizeof(range_state)); for (i = 0; i < num_var_ranges; i++) { mtrr_if->get(i, &base, &size, &type); + range_state[i].base_pfn = base; + range_state[i].size_pfn = size; + range_state[i].type = type; + } + + /* Find highest cached pfn */ + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; if (type != MTRR_TYPE_WRBACK) continue; + base = range_state[i].base_pfn; + size = range_state[i].size_pfn; if (highest_pfn < base + size) highest_pfn = base + size; } @@ -1177,9 +1490,10 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) /* check entries number */ memset(num, 0, sizeof(num)); for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); + type = range_state[i].type; if (type >= MTRR_NUM_TYPES) continue; + size = range_state[i].size_pfn; if (!size) type = MTRR_NUM_TYPES; num[type]++; @@ -1205,26 +1519,28 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) } nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); - total_real_trim_size = 0; + total_trim_size = 0; /* check the head */ if (range[0].start) - total_real_trim_size += real_trim_memory(0, range[0].start); + total_trim_size += real_trim_memory(0, range[0].start); /* check the holes */ for (i = 0; i < nr_range - 1; i++) { if (range[i].end + 1 < range[i+1].start) - total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start); + total_trim_size += real_trim_memory(range[i].end + 1, + range[i+1].start); } /* check the top */ i = nr_range - 1; if (range[i].end + 1 < end_pfn) - total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn); + total_trim_size += real_trim_memory(range[i].end + 1, + end_pfn); - if (total_real_trim_size) { + if (total_trim_size) { printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" " all of memory, losing %lluMB of RAM.\n", - total_real_trim_size >> 20); + total_trim_size >> 20); - if (enable_mtrr_cleanup < 1) + if (!changed_by_mtrr_cleanup) WARN_ON(1); printk(KERN_INFO "update e820 for mtrr\n"); @@ -1314,8 +1630,10 @@ void __init mtrr_bp_init(void) if (use_intel()) { get_mtrr_state(); - if (mtrr_cleanup(phys_addr)) + if (mtrr_cleanup(phys_addr)) { + changed_by_mtrr_cleanup = 1; mtrr_if->set_all(); + } } } @@ -1355,7 +1673,7 @@ static int __init mtrr_init_finialize(void) if (!mtrr_if) return 0; if (use_intel()) { - if (enable_mtrr_cleanup < 1) + if (!changed_by_mtrr_cleanup) mtrr_state_warn(); } else { /* The CPUs haven't MTRR and seem to not support SMP. They have -- cgit v1.2.3-70-g09d2 From d49c4288407b2ffa8cab270cb5bc6882abe969f6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Jun 2008 18:31:54 -0700 Subject: x86: make generic arch support NUMAQ ... so it could fall back to normal numa and we'd reduce the impact of the NUMAQ subarch. NUMAQ depends on GENERICARCH also decouple genericarch numa from acpi. also make it fall back to bigsmp if apicid > 8. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 74 ++++++++++++++--------------- arch/x86/Makefile | 18 ------- arch/x86/boot/compressed/misc.c | 4 -- arch/x86/kernel/acpi/boot.c | 4 +- arch/x86/kernel/io_apic_32.c | 9 ++-- arch/x86/kernel/mpparse.c | 73 ++++++++++++++++++++++++++-- arch/x86/kernel/numaq_32.c | 2 - arch/x86/kernel/summit_32.c | 2 + arch/x86/mach-es7000/Makefile | 1 - arch/x86/mach-es7000/es7000plat.c | 47 ------------------ arch/x86/mach-generic/Makefile | 10 ++-- arch/x86/mach-generic/bigsmp.c | 2 - arch/x86/mach-generic/numaq.c | 41 ++++++++++++++++ arch/x86/mach-generic/probe.c | 15 +++++- arch/x86/pci/Makefile_32 | 5 +- arch/x86/pci/numa.c | 29 ++--------- drivers/acpi/Kconfig | 1 - include/asm-x86/mach-generic/mach_mpparse.h | 7 ++- include/asm-x86/mach-numaq/mach_apic.h | 39 ++++----------- include/asm-x86/mach-numaq/mach_mpparse.h | 11 +---- include/asm-x86/mmzone_32.h | 18 ++----- include/asm-x86/mpspec.h | 6 +++ include/asm-x86/numaq.h | 5 +- include/asm-x86/srat.h | 12 +++-- 24 files changed, 219 insertions(+), 216 deletions(-) create mode 100644 arch/x86/mach-generic/numaq.c (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9e976150442..8b89810fe3f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -268,36 +268,6 @@ config X86_VOYAGER If you do not specifically know you have a Voyager based machine, say N here, otherwise the kernel you build will not be bootable. -config X86_NUMAQ - bool "NUMAQ (IBM/Sequent)" - depends on SMP && X86_32 - select NUMA - help - This option is used for getting Linux to run on a (IBM/Sequent) NUMA - multiquad box. This changes the way that processors are bootstrapped, - and uses Clustered Logical APIC addressing mode instead of Flat Logical. - You will need a new lynxer.elf file to flash your firmware with - send - email to . - -config X86_SUMMIT - bool "Summit/EXA (IBM x440)" - depends on X86_32 && SMP - help - This option is needed for IBM systems that use the Summit/EXA chipset. - In particular, it is needed for the x440. - - If you don't have one of these computers, you should say N here. - If you want to build a NUMA kernel, you must select ACPI. - -config X86_BIGSMP - bool "Support for other sub-arch SMP systems with more than 8 CPUs" - depends on X86_32 && SMP - help - This option is needed for the systems that have more than 8 CPUs - and if the system is not of any sub-arch type above. - - If you don't have such a system, you should say N here. - config X86_VISWS bool "SGI 320/540 (Visual Workstation)" depends on X86_32 @@ -311,12 +281,33 @@ config X86_VISWS and vice versa. See for details. config X86_GENERICARCH - bool "Generic architecture (Summit, bigsmp, ES7000, default)" + bool "Generic architecture" depends on X86_32 help - This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. - It is intended for a generic binary kernel. - If you want a NUMA kernel, select ACPI. We need SRAT for NUMA. + This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default + subarchitectures. It is intended for a generic binary kernel. + if you select them all, kernel will probe it one by one. and will + fallback to default. + +if X86_GENERICARCH + +config X86_NUMAQ + bool "NUMAQ (IBM/Sequent)" + depends on SMP && X86_32 + select NUMA + help + This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) + NUMA multiquad box. This changes the way that processors are + bootstrapped, and uses Clustered Logical APIC addressing mode instead + of Flat Logical. You will need a new lynxer.elf file to flash your + firmware with - send email to . + +config X86_SUMMIT + bool "Summit/EXA (IBM x440)" + depends on X86_32 && SMP + help + This option is needed for IBM systems that use the Summit/EXA chipset. + In particular, it is needed for the x440. config X86_ES7000 bool "Support for Unisys ES7000 IA32 series" @@ -324,8 +315,15 @@ config X86_ES7000 help Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. - Only choose this option if you have such a system, otherwise you - should say N here. + +config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" + depends on X86_32 && SMP + help + This option is needed for the systems that have more than 8 CPUs + and if the system is not of any sub-arch type above. + +endif config X86_RDC321X bool "RDC R-321x SoC" @@ -913,9 +911,9 @@ config X86_PAE config NUMA bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" depends on SMP - depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL) + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || X86_SUMMIT && ACPI) && EXPERIMENTAL) default n if X86_PC - default y if (X86_NUMAQ || X86_SUMMIT) + default y if (X86_NUMAQ || X86_SUMMIT || X86_GENERICARCH) help Enable NUMA (Non Uniform Memory Access) support. The kernel will try to allocate memory used by a CPU on the diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 30b046bcccb..d6650131659 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -117,29 +117,11 @@ mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/ -# NUMAQ subarch support -mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-x86/mach-numaq -mcore-$(CONFIG_X86_NUMAQ) := arch/x86/mach-default/ - -# BIGSMP subarch support -mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp -mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default/ - -#Summit subarch support -mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit -mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default/ - # generic subarchitecture mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ - -# ES7000 subarch support -mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-x86/mach-es7000 -fcore-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/ -mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default/ - # RDC R-321x subarch support mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 90456cee47c..ba0be6a25ff 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -221,10 +221,6 @@ static char *vidmem; static int vidport; static int lines, cols; -#ifdef CONFIG_X86_NUMAQ -void *xquad_portio; -#endif - #include "../../../../lib/inflate.c" static void *malloc(int size) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f226bdc19f6..7dc2130046d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -848,7 +848,7 @@ static int __init acpi_parse_madt_lapic_entries(void) #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 -#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH) +#ifdef CONFIG_X86_ES7000 extern int es7000_plat; #endif @@ -997,7 +997,7 @@ void __init mp_config_acpi_legacy_irqs(void) set_bit(MP_ISA_BUS, mp_bus_not_pci); Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); -#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH) +#ifdef CONFIG_X86_ES7000 /* * Older generations of ES7000 have no legacy identity mappings */ diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 2285b81ad1d..97e62a01f1e 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1722,7 +1722,6 @@ void disable_IO_APIC(void) * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 */ -#ifndef CONFIG_X86_NUMAQ static void __init setup_ioapic_ids_from_mpc(void) { union IO_APIC_reg_00 reg_00; @@ -1732,6 +1731,11 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned char old_id; unsigned long flags; +#ifdef CONFIG_X86_NUMAQ + if (found_numaq) + return; +#endif + /* * Don't check I/O APIC IDs for xAPIC systems. They have * no meaning without the serial APIC bus. @@ -1828,9 +1832,6 @@ static void __init setup_ioapic_ids_from_mpc(void) apic_printk(APIC_VERBOSE, " ok.\n"); } } -#else -static void __init setup_ioapic_ids_from_mpc(void) { } -#endif int no_timer_check __initdata; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 7591325e616..1cc7a4b8643 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -49,15 +49,73 @@ static int __init mpf_checksum(unsigned char *mp, int len) } #ifdef CONFIG_X86_NUMAQ +int found_numaq; /* * Have to match translation table entries to main table entries by counter * hence the mpc_record variable .... can't see a less disgusting way of * doing this .... */ +struct mpc_config_translation { + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; +}; + static int mpc_record; static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; + +static inline int generate_logical_apicid(int quad, int phys_apicid) +{ + return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); +} + + +static inline int mpc_apic_id(struct mpc_config_processor *m, + struct mpc_config_translation *translation_record) +{ + int quad = translation_record->trans_quad; + int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); + + printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, + m->mpc_apicver, quad, logical_apicid); + return logical_apicid; +} + +int mp_bus_id_to_node[MAX_MP_BUSSES]; + +int mp_bus_id_to_local[MAX_MP_BUSSES]; + +static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, + struct mpc_config_translation *translation) +{ + int quad = translation->trans_quad; + int local = translation->trans_local; + + mp_bus_id_to_node[m->mpc_busid] = quad; + mp_bus_id_to_local[m->mpc_busid] = local; + printk(KERN_INFO "Bus #%d is %s (node %d)\n", + m->mpc_busid, name, quad); +} + +int quad_local_to_mp_bus_id [NR_CPUS/4][4]; +static void mpc_oem_pci_bus(struct mpc_config_bus *m, + struct mpc_config_translation *translation) +{ + int quad = translation->trans_quad; + int local = translation->trans_local; + + quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; +} + #endif static void __cpuinit MP_processor_info(struct mpc_config_processor *m) @@ -321,11 +379,11 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, } } -static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, +void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) { if (strncmp(oem, "IBM NUMA", 8)) - printk("Warning! May not be a NUMA-Q system!\n"); + printk("Warning! Not a NUMA-Q system!\n"); else found_numaq = 1; @@ -388,7 +446,16 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) return 0; #ifdef CONFIG_X86_32 - mps_oem_check(mpc, oem, str); + /* + * need to make sure summit and es7000's mps_oem_check is safe to be + * called early via genericarch 's mps_oem_check + */ + if (early) { +#ifdef CONFIG_X86_NUMAQ + numaq_mps_oem_check(mpc, oem, str); +#endif + } else + mps_oem_check(mpc, oem, str); #endif /* save the local APIC address, it might be non-default */ diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 27b908254fb..f0f1de1c4a1 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -36,8 +36,6 @@ #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) -int found_numaq; - /* * Function: smp_dump_qct() * diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index ae751094eba..d67ce5f044b 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -36,7 +36,9 @@ static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; +#ifndef CONFIG_X86_NUMAQ static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata; +#endif static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) { diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile index 69dd4da218d..3ef8b43b62f 100644 --- a/arch/x86/mach-es7000/Makefile +++ b/arch/x86/mach-es7000/Makefile @@ -3,4 +3,3 @@ # obj-$(CONFIG_X86_ES7000) := es7000plat.o -obj-$(CONFIG_X86_GENERICARCH) := es7000plat.o diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c index a41c77a4722..4354ce80488 100644 --- a/arch/x86/mach-es7000/es7000plat.c +++ b/arch/x86/mach-es7000/es7000plat.c @@ -177,53 +177,6 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr) } #endif -/* - * This file also gets compiled if CONFIG_X86_GENERICARCH is set. Generic - * arch already has got following function definitions (asm-generic/es7000.c) - * hence no need to define these for that case. - */ -#ifndef CONFIG_X86_GENERICARCH -void es7000_sw_apic(void); -void __init enable_apic_mode(void) -{ - es7000_sw_apic(); - return; -} - -__init int mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (mpc->mpc_oemptr) { - struct mp_config_oemtable *oem_table = - (struct mp_config_oemtable *)mpc->mpc_oemptr; - if (!strncmp(oem, "UNISYS", 6)) - return parse_unisys_oem((char *)oem_table); - } - return 0; -} -#ifdef CONFIG_ACPI -/* Hook from generic ACPI tables.c */ -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - unsigned long oem_addr; - if (!find_unisys_acpi_oem_table(&oem_addr)) { - if (es7000_check_dsdt()) - return parse_unisys_oem((char *)oem_addr); - else { - setup_unisys(); - return 1; - } - } - return 0; -} -#else -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} -#endif -#endif /* COFIG_X86_GENERICARCH */ - static void es7000_spin(int n) { diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 19d6d407737..0dbd7803a1d 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -2,7 +2,11 @@ # Makefile for the generic architecture # -EXTRA_CFLAGS := -Iarch/x86/kernel +EXTRA_CFLAGS := -Iarch/x86/kernel -obj-y := probe.o summit.o bigsmp.o es7000.o default.o -obj-y += ../../x86/mach-es7000/ +obj-y := probe.o default.o +obj-$(CONFIG_X86_NUMAQ) += numaq.o +obj-$(CONFIG_X86_SUMMIT) += summit.o +obj-$(CONFIG_X86_BIGSMP) += bigsmp.o +obj-$(CONFIG_X86_ES7000) += es7000.o +obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/ diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 2a243019aca..59d77171455 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -23,10 +23,8 @@ static int dmi_bigsmp; /* can be set by dmi scanners */ static int hp_ht_bigsmp(const struct dmi_system_id *d) { -#ifdef CONFIG_X86_GENERICARCH printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); dmi_bigsmp = 1; -#endif return 0; } diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c new file mode 100644 index 00000000000..8091e68764c --- /dev/null +++ b/arch/x86/mach-generic/numaq.c @@ -0,0 +1,41 @@ +/* + * APIC driver for the IBM NUMAQ chipset. + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) +{ + numaq_mps_oem_check(mpc, oem, productid); + return found_numaq; +} + +static int probe_numaq(void) +{ + /* already know from get_memcfg_numaq() */ + return found_numaq; +} + +/* Hook from generic ACPI tables.c */ +static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 0; +} + +struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index c5ae751b994..ba18dec4855 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -16,6 +16,7 @@ #include #include +extern struct genapic apic_numaq; extern struct genapic apic_summit; extern struct genapic apic_bigsmp; extern struct genapic apic_es7000; @@ -24,9 +25,18 @@ extern struct genapic apic_default; struct genapic *genapic = &apic_default; static struct genapic *apic_probe[] __initdata = { +#ifdef CONFIG_X86_NUMAQ + &apic_numaq, +#endif +#ifdef CONFIG_X86_SUMMIT &apic_summit, +#endif +#ifdef CONFIG_X86_BIGSMP &apic_bigsmp, +#endif +#ifdef CONFIG_X86_ES7000 &apic_es7000, +#endif &apic_default, /* must be last */ NULL, }; @@ -54,6 +64,7 @@ early_param("apic", parse_apic); void __init generic_bigsmp_probe(void) { +#if CONFIG_X86_BIGSMP /* * This routine is used to switch to bigsmp mode when * - There is no apic= option specified by the user @@ -67,6 +78,7 @@ void __init generic_bigsmp_probe(void) printk(KERN_INFO "Overriding APIC driver with %s\n", genapic->name); } +#endif } void __init generic_apic_probe(void) @@ -88,7 +100,8 @@ void __init generic_apic_probe(void) /* These functions can switch the APIC even after the initial ->probe() */ -int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) +int __init mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) { int i; for (i = 0; apic_probe[i]; ++i) { diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index 89ec35d00ef..962d96c0495 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 @@ -13,10 +13,11 @@ pci-y := fixup.o pci-$(CONFIG_ACPI) += acpi.o pci-y += legacy.o irq.o -# Careful: VISWS and NUMAQ overrule the pci-y above. The colons are +# Careful: VISWS overrule the pci-y above. The colons are # therefor correct. This needs a proper fix by distangling the code. pci-$(CONFIG_X86_VISWS) := visws.o fixup.o -pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o + +pci-$(CONFIG_X86_NUMAQ) += numa.o # Necessary for NUMAQ as well pci-$(CONFIG_NUMA) += mp_bus_to_node.o diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c index d9afbae5092..99f1ecd485b 100644 --- a/arch/x86/pci/numa.c +++ b/arch/x86/pci/numa.c @@ -6,45 +6,21 @@ #include #include #include +#include #include "pci.h" #define XQUAD_PORTIO_BASE 0xfe400000 #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ -int mp_bus_id_to_node[MAX_MP_BUSSES]; #define BUS2QUAD(global) (mp_bus_id_to_node[global]) -int mp_bus_id_to_local[MAX_MP_BUSSES]; #define BUS2LOCAL(global) (mp_bus_id_to_local[global]) -void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, - struct mpc_config_translation *translation) -{ - int quad = translation->trans_quad; - int local = translation->trans_local; - - mp_bus_id_to_node[m->mpc_busid] = quad; - mp_bus_id_to_local[m->mpc_busid] = local; - printk(KERN_INFO "Bus #%d is %s (node %d)\n", - m->mpc_busid, name, quad); -} - -int quad_local_to_mp_bus_id [NR_CPUS/4][4]; #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) -void mpc_oem_pci_bus(struct mpc_config_bus *m, - struct mpc_config_translation *translation) -{ - int quad = translation->trans_quad; - int local = translation->trans_local; - - quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; -} /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; -#ifdef CONFIG_X86_NUMAQ EXPORT_SYMBOL(xquad_portio); -#endif #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) @@ -179,6 +155,9 @@ static int __init pci_numa_init(void) { int quad; + if (!found_numaq) + return 0; + raw_pci_ops = &pci_direct_conf1_mq; if (pcibios_scanned++) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index c52fca83326..860f15f36ce 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -4,7 +4,6 @@ menuconfig ACPI bool "ACPI (Advanced Configuration and Power Interface) Support" - depends on !X86_NUMAQ depends on !X86_VISWS depends on !IA64_HP_SIM depends on IA64 || X86 diff --git a/include/asm-x86/mach-generic/mach_mpparse.h b/include/asm-x86/mach-generic/mach_mpparse.h index 0d0b5ba2e9d..586cadbf378 100644 --- a/include/asm-x86/mach-generic/mach_mpparse.h +++ b/include/asm-x86/mach-generic/mach_mpparse.h @@ -1,7 +1,10 @@ #ifndef _MACH_MPPARSE_H #define _MACH_MPPARSE_H 1 -int mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid); -int acpi_madt_oem_check(char *oem_id, char *oem_table_id); + +extern int mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid); + +extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id); #endif diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/mach-numaq/mach_apic.h index 75a56e5afbe..d802465e026 100644 --- a/include/asm-x86/mach-numaq/mach_apic.h +++ b/include/asm-x86/mach-numaq/mach_apic.h @@ -20,8 +20,14 @@ static inline cpumask_t target_cpus(void) #define INT_DELIVERY_MODE dest_LowestPrio #define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */ -#define check_apicid_used(bitmap, apicid) physid_isset(apicid, bitmap) -#define check_apicid_present(bit) physid_isset(bit, phys_cpu_present_map) +static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} +static inline unsigned long check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} #define apicid_cluster(apicid) (apicid & 0xF0) static inline int apic_id_registered(void) @@ -77,11 +83,6 @@ static inline int cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline int generate_logical_apicid(int quad, int phys_apicid) -{ - return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); -} - static inline int apicid_to_node(int logical_apicid) { return logical_apicid >> 4; @@ -95,30 +96,6 @@ static inline physid_mask_t apicid_to_cpu_present(int logical_apicid) return physid_mask_of_physid(cpu + 4*node); } -struct mpc_config_translation { - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; -}; - -static inline int mpc_apic_id(struct mpc_config_processor *m, - struct mpc_config_translation *translation_record) -{ - int quad = translation_record->trans_quad; - int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); - - printk("Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", - m->mpc_apicid, - (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, - (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, - m->mpc_apicver, quad, logical_apicid); - return logical_apicid; -} - extern void *xquad_portio; static inline void setup_portio_remap(void) diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h index 459b1240118..626aef6b155 100644 --- a/include/asm-x86/mach-numaq/mach_mpparse.h +++ b/include/asm-x86/mach-numaq/mach_mpparse.h @@ -1,14 +1,7 @@ #ifndef __ASM_MACH_MPPARSE_H #define __ASM_MACH_MPPARSE_H -extern void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, - struct mpc_config_translation *translation); -extern void mpc_oem_pci_bus(struct mpc_config_bus *m, - struct mpc_config_translation *translation); - -/* Hook from generic ACPI tables.c */ -static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ -} +extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid); #endif /* __ASM_MACH_MPPARSE_H */ diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h index ab0012888c4..b2298a22756 100644 --- a/include/asm-x86/mmzone_32.h +++ b/include/asm-x86/mmzone_32.h @@ -12,11 +12,9 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) -#ifdef CONFIG_X86_NUMAQ - #include -#elif defined(CONFIG_ACPI_SRAT)/* summit or generic arch */ - #include -#endif +#include +/* summit or generic arch */ +#include extern int get_memcfg_numa_flat(void); /* @@ -26,14 +24,11 @@ extern int get_memcfg_numa_flat(void); */ static inline void get_memcfg_numa(void) { -#ifdef CONFIG_X86_NUMAQ + if (get_memcfg_numaq()) return; -#elif defined(CONFIG_ACPI_SRAT) if (get_memcfg_from_srat()) return; -#endif - get_memcfg_numa_flat(); } @@ -42,7 +37,6 @@ extern int early_pfn_to_nid(unsigned long pfn); #else /* !CONFIG_NUMA */ #define get_memcfg_numa get_memcfg_numa_flat -#define get_zholes_size(n) (0) #endif /* CONFIG_NUMA */ @@ -83,9 +77,6 @@ static inline int pfn_to_nid(unsigned long pfn) __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ }) -#ifdef CONFIG_X86_NUMAQ /* we have contiguous memory on NUMA-Q */ -#define pfn_valid(pfn) ((pfn) < num_physpages) -#else static inline int pfn_valid(int pfn) { int nid = pfn_to_nid(pfn); @@ -94,7 +85,6 @@ static inline int pfn_valid(int pfn) return (pfn < node_end_pfn(nid)); return 0; } -#endif /* CONFIG_X86_NUMAQ */ #endif /* CONFIG_DISCONTIGMEM */ diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h index 4451d720ee0..6e9c9588b1f 100644 --- a/include/asm-x86/mpspec.h +++ b/include/asm-x86/mpspec.h @@ -13,6 +13,12 @@ extern int apic_version[MAX_APICS]; extern u8 apicid_2_node[]; extern int pic_mode; +#ifdef CONFIG_X86_NUMAQ +extern int mp_bus_id_to_node[MAX_MP_BUSSES]; +extern int mp_bus_id_to_local[MAX_MP_BUSSES]; +extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; +#endif + #define MAX_APICID 256 #else diff --git a/include/asm-x86/numaq.h b/include/asm-x86/numaq.h index 739d16484b8..ef068d2465d 100644 --- a/include/asm-x86/numaq.h +++ b/include/asm-x86/numaq.h @@ -157,9 +157,10 @@ struct sys_cfg_data { struct eachquadmem eq[MAX_NUMNODES]; /* indexed by quad id */ }; -static inline unsigned long *get_zholes_size(int nid) +#else +static inline int get_memcfg_numaq(void) { - return NULL; + return 0; } #endif /* CONFIG_X86_NUMAQ */ #endif /* NUMAQ_H */ diff --git a/include/asm-x86/srat.h b/include/asm-x86/srat.h index f4bba131d06..456fe0b5a92 100644 --- a/include/asm-x86/srat.h +++ b/include/asm-x86/srat.h @@ -27,11 +27,13 @@ #ifndef _ASM_SRAT_H_ #define _ASM_SRAT_H_ -#ifndef CONFIG_ACPI_SRAT -#error CONFIG_ACPI_SRAT not defined, and srat.h header has been included -#endif - +#ifdef CONFIG_ACPI_SRAT extern int get_memcfg_from_srat(void); -extern unsigned long *get_zholes_size(int); +#else +static inline int get_memcfg_from_srat(void) +{ + return 0; +} +#endif #endif /* _ASM_SRAT_H_ */ -- cgit v1.2.3-70-g09d2 From 0699eae140a3eeca976df4e3b7699b1fa3f763cd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 17 Jun 2008 15:39:01 -0700 Subject: x86: Kconfig cleanup with genericarch we already have summit and etc depends on genericarch, so use genericarch only. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 07276ac01c2..7ebd986050a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -446,7 +446,7 @@ config MEMTEST_BOOTPARAM_VALUE config ACPI_SRAT def_bool y - depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) + depends on X86_32 && ACPI && NUMA && X86_GENERICARCH select ACPI_NUMA config HAVE_ARCH_PARSE_SRAT @@ -455,11 +455,11 @@ config HAVE_ARCH_PARSE_SRAT config X86_SUMMIT_NUMA def_bool y - depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH) + depends on X86_32 && NUMA && X86_GENERICARCH config X86_CYCLONE_TIMER def_bool y - depends on X86_32 && X86_SUMMIT || X86_GENERICARCH + depends on X86_GENERICARCH config ES7000_CLUSTERED_APIC def_bool y @@ -909,9 +909,9 @@ config X86_PAE config NUMA bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" depends on SMP - depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || X86_SUMMIT && ACPI) && EXPERIMENTAL) + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) default n if X86_PC - default y if (X86_NUMAQ || X86_SUMMIT || X86_GENERICARCH) + default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) help Enable NUMA (Non Uniform Memory Access) support. The kernel will try to allocate memory used by a CPU on the -- cgit v1.2.3-70-g09d2 From 6695c85b2e6f9f2e9ccaa8af38b72f5ab4a5184f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 19 Jun 2008 12:13:09 -0700 Subject: x86: let MPS support be selectable, v2 v2: seperate "fix for compiling when MPPARSE is not set" to another patch make X86_MPPARSE to be selectable only when acpi is set and X86_MPPARSE will be set if acpi is not set. Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Maciej W. Rozycki Cc: Len Brown Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 21 +++++++++++++++++++++ arch/x86/Kconfig.debug | 9 --------- 2 files changed, 21 insertions(+), 9 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7ebd986050a..0804a889c27 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -230,6 +230,27 @@ config SMP If you don't know what to do here, say N. +config X86_FIND_SMP_CONFIG + def_bool y + depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS + depends on X86_32 + +if ACPI +config X86_MPPARSE + def_bool y + bool "Enable MPS table" + depends on ((X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64) + help + For old smp systems that do not have proper acpi support. Newer systems + (esp with 64bit cpus) with acpi support, MADT and DSDT will override it +endif + +if !ACPI +config X86_MPPARSE + def_bool y + depends on ((X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64) +endif + choice prompt "Subarchitecture Type" default X86_PC diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 253e7a5706d..f7d413d6101 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -129,15 +129,6 @@ config 4KSTACKS on the VM subsystem for higher order allocations. This option will also use IRQ stacks to compensate for the reduced stackspace. -config X86_FIND_SMP_CONFIG - def_bool y - depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS - depends on X86_32 - -config X86_MPPARSE - def_bool y - depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64 - config DOUBLEFAULT default y bool "Enable doublefault exception handler" if EMBEDDED -- cgit v1.2.3-70-g09d2 From a4caa18efe468acb3522e30763de57a67b3e438b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 19 Jun 2008 12:15:01 -0700 Subject: x86: fix compiling when CONFIG_X86_MPPARSE is not set Signed-off-by: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/mm/k8topology_64.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0804a889c27..25251799337 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -307,7 +307,7 @@ if X86_GENERICARCH config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" - depends on SMP && X86_32 + depends on SMP && X86_32 && X86_MPPARSE select NUMA help This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 1f476e47784..f75aa2ae54a 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -56,18 +56,22 @@ static __init void early_get_boot_cpu_id(void) /* * Find possible boot-time SMP configuration: */ +#ifdef CONFIG_X86_MPPARSE early_find_smp_config(); +#endif #ifdef CONFIG_ACPI /* * Read APIC information from ACPI tables. */ early_acpi_boot_init(); #endif +#ifdef CONFIG_X86_MPPARSE /* * get boot-time SMP configuration: */ if (smp_found_config) early_get_smp_config(); +#endif early_init_lapic_mapping(); } -- cgit v1.2.3-70-g09d2 From bad48f4b313a756ccde454c25c14c828e2fd5819 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 20 Jun 2008 07:33:31 -0700 Subject: x86: simplify x86_mpparse dependency check "Maciej W. Rozycki" said: > Given X86_64 selects X86_LOCAL_APIC I am not sure the redundancy seen >above does not actually obscure the logic behind... I think: > > depends on X86_LOCAL_APIC && !X86_VISWS > >would be clearer and get the same. Suggested-by: Maciej W. Rozycki Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Len Brown Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 25251799337..23c352e408a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -239,7 +239,7 @@ if ACPI config X86_MPPARSE def_bool y bool "Enable MPS table" - depends on ((X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64) + depends on X86_LOCAL_APIC && !X86_VISWS help For old smp systems that do not have proper acpi support. Newer systems (esp with 64bit cpus) with acpi support, MADT and DSDT will override it @@ -248,7 +248,7 @@ endif if !ACPI config X86_MPPARSE def_bool y - depends on ((X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64) + depends on X86_LOCAL_APIC && !X86_VISWS endif choice -- cgit v1.2.3-70-g09d2