From 49c3df6aaa6a51071fc135273d1a2515d019099f Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:07 +0200 Subject: [PATCH] x86: Move swsusp __pa() dependent code to arch portion o __pa() should be used only on kernel linearly mapped virtual addresses and not on kernel text and data addresses. o Hibernation code needs to determine the physical address associated with kernel symbol to mark a section boundary which contains pages which don't have to be saved and restored during hibernate/resume operation. o Move this piece of code in arch dependent section. So that architectures which don't have kernel text/data mapped into kernel linearly mapped region can come up with their own ways of determining physical addresses associated with a kernel text. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- kernel/power/power.h | 5 ++--- kernel/power/snapshot.c | 11 ----------- 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/power/power.h b/kernel/power/power.h index eb461b816bf..1c6eef8df4a 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -23,6 +23,8 @@ static inline int pm_suspend_disk(void) } #endif +extern int pfn_is_nosave(unsigned long); + extern struct mutex pm_mutex; #define power_attr(_name) \ @@ -37,9 +39,6 @@ static struct subsys_attribute _name##_attr = { \ extern struct subsystem power_subsys; -/* References to section boundaries */ -extern const void __nosave_begin, __nosave_end; - /* Preferred image size in bytes (default 500 MB) */ extern unsigned long image_size; extern int in_suspend; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index fc53ad06812..704c25a3ffe 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -650,17 +650,6 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } static inline unsigned int count_highmem_pages(void) { return 0; } #endif /* CONFIG_HIGHMEM */ -/** - * pfn_is_nosave - check if given pfn is in the 'nosave' section - */ - -static inline int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; - unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; - return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); -} - /** * saveable - Determine whether a non-highmem page should be included in * the suspend image. -- cgit v1.2.3-70-g09d2 From 1b29c1643c0d82512477ccd97dc290198fe23e22 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:07 +0200 Subject: [PATCH] x86-64: do not use virt_to_page on kernel data address o virt_to_page() call should be used on kernel linear addresses and not on kernel text and data addresses. Swsusp code uses it on kernel data (statically allocated swsusp_header). o Allocate swsusp_header dynamically so that virt_to_page() can be used safely. o I am changing this because in next few patches, __pa() on x86_64 will no longer support kernel text and data addresses and hibernation breaks. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- kernel/power/swap.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 3581f8f86ac..b18c155cbb6 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -33,12 +33,14 @@ extern char resume_file[]; #define SWSUSP_SIG "S1SUSPEND" -static struct swsusp_header { +struct swsusp_header { char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; sector_t image; char orig_sig[10]; char sig[10]; -} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; +} __attribute__((packed)); + +static struct swsusp_header *swsusp_header; /* * General things @@ -141,14 +143,14 @@ static int mark_swapfiles(sector_t start) { int error; - bio_read_page(swsusp_resume_block, &swsusp_header, NULL); - if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || - !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { - memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); - memcpy(swsusp_header.sig,SWSUSP_SIG, 10); - swsusp_header.image = start; + bio_read_page(swsusp_resume_block, swsusp_header, NULL); + if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || + !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { + memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); + memcpy(swsusp_header->sig,SWSUSP_SIG, 10); + swsusp_header->image = start; error = bio_write_page(swsusp_resume_block, - &swsusp_header, NULL); + swsusp_header, NULL); } else { printk(KERN_ERR "swsusp: Swap header not found!\n"); error = -ENODEV; @@ -564,7 +566,7 @@ int swsusp_read(void) if (error < PAGE_SIZE) return error < 0 ? error : -EFAULT; header = (struct swsusp_info *)data_of(snapshot); - error = get_swap_reader(&handle, swsusp_header.image); + error = get_swap_reader(&handle, swsusp_header->image); if (!error) error = swap_read_page(&handle, header, NULL); if (!error) @@ -591,17 +593,17 @@ int swsusp_check(void) resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); if (!IS_ERR(resume_bdev)) { set_blocksize(resume_bdev, PAGE_SIZE); - memset(&swsusp_header, 0, sizeof(swsusp_header)); + memset(swsusp_header, 0, sizeof(PAGE_SIZE)); error = bio_read_page(swsusp_resume_block, - &swsusp_header, NULL); + swsusp_header, NULL); if (error) return error; - if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { - memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); + if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) { + memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); /* Reset swap signature now */ error = bio_write_page(swsusp_resume_block, - &swsusp_header, NULL); + swsusp_header, NULL); } else { return -EINVAL; } @@ -632,3 +634,13 @@ void swsusp_close(void) blkdev_put(resume_bdev); } + +static int swsusp_header_init(void) +{ + swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL); + if (!swsusp_header) + panic("Could not allocate memory for swsusp_header\n"); + return 0; +} + +core_initcall(swsusp_header_init); -- cgit v1.2.3-70-g09d2 From b00742d399513a4100c24cc2accefdc1bb1e0b15 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] x86-64: Account for module percpu space separately from kernel percpu Rather than using a single constant PERCPU_ENOUGH_ROOM, compute it as the sum of kernel_percpu + PERCPU_MODULE_RESERVE. This is now common to all architectures; if an architecture wants to set PERCPU_ENOUGH_ROOM to something special, then it may do so (ia64 is the only one which does). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Eric W. Biederman Cc: Andi Kleen --- include/asm-alpha/percpu.h | 14 -------------- include/asm-sparc64/percpu.h | 10 ---------- include/asm-x86_64/percpu.h | 10 ---------- include/linux/percpu.h | 9 ++++++++- kernel/module.c | 2 +- 5 files changed, 9 insertions(+), 36 deletions(-) (limited to 'kernel') diff --git a/include/asm-alpha/percpu.h b/include/asm-alpha/percpu.h index 651ebb141b2..48348fe34c1 100644 --- a/include/asm-alpha/percpu.h +++ b/include/asm-alpha/percpu.h @@ -1,20 +1,6 @@ #ifndef __ALPHA_PERCPU_H #define __ALPHA_PERCPU_H -/* - * Increase the per cpu area for Alpha so that - * modules using percpu area can load. - */ -#ifdef CONFIG_MODULES -# define PERCPU_MODULE_RESERVE 8192 -#else -# define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - #include #endif /* __ALPHA_PERCPU_H */ diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h index 0d3df76aa47..ced8cbde046 100644 --- a/include/asm-sparc64/percpu.h +++ b/include/asm-sparc64/percpu.h @@ -5,16 +5,6 @@ #ifdef CONFIG_SMP -#ifdef CONFIG_MODULES -# define PERCPU_MODULE_RESERVE 8192 -#else -# define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - extern void setup_per_cpu_areas(void); extern unsigned long __per_cpu_base; diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h index 5ed0ef34084..c6fbb67eac9 100644 --- a/include/asm-x86_64/percpu.h +++ b/include/asm-x86_64/percpu.h @@ -11,16 +11,6 @@ #include -#ifdef CONFIG_MODULES -# define PERCPU_MODULE_RESERVE 8192 -#else -# define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - #define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) #define __my_cpu_offset() read_pda(data_offset) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 600e3d387ff..b72be2f79e6 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -11,9 +11,16 @@ /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ #ifndef PERCPU_ENOUGH_ROOM -#define PERCPU_ENOUGH_ROOM 32768 +#ifdef CONFIG_MODULES +#define PERCPU_MODULE_RESERVE 8192 +#else +#define PERCPU_MODULE_RESERVE 0 #endif +#define PERCPU_ENOUGH_ROOM \ + (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) +#endif /* PERCPU_ENOUGH_ROOM */ + /* * Must be an lvalue. Since @var must be a simple identifier, * we force a syntax error here if it isn't. diff --git a/kernel/module.c b/kernel/module.c index 9da5af668a2..cf49ca25fcc 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -430,7 +430,7 @@ static int percpu_modinit(void) pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, GFP_KERNEL); /* Static in-kernel percpu data (used). */ - pcpu_size[0] = -ALIGN(__per_cpu_end-__per_cpu_start, SMP_CACHE_BYTES); + pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); /* Free room. */ pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; if (pcpu_size[1] < 0) { -- cgit v1.2.3-70-g09d2 From b6e3590f8145c77b8fcef3247e2412335221412f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] x86: Allow percpu variables to be page-aligned Let's allow page-alignment in general for per-cpu data (wanted by Xen, and Ingo suggested KVM as well). Because larger alignments can use more room, we increase the max per-cpu memory to 64k rather than 32k: it's getting a little tight. Signed-off-by: Rusty Russell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: Ingo Molnar Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/alpha/kernel/vmlinux.lds.S | 2 +- arch/arm/kernel/vmlinux.lds.S | 2 +- arch/cris/arch-v32/vmlinux.lds.S | 1 + arch/frv/kernel/vmlinux.lds.S | 1 + arch/i386/kernel/vmlinux.lds.S | 2 +- arch/m32r/kernel/vmlinux.lds.S | 2 +- arch/mips/kernel/vmlinux.lds.S | 2 +- arch/parisc/kernel/vmlinux.lds.S | 2 +- arch/powerpc/kernel/setup_64.c | 4 ++-- arch/powerpc/kernel/vmlinux.lds.S | 6 +----- arch/ppc/kernel/vmlinux.lds.S | 2 +- arch/s390/kernel/vmlinux.lds.S | 2 +- arch/sh/kernel/vmlinux.lds.S | 2 +- arch/sh64/kernel/vmlinux.lds.S | 2 +- arch/sparc/kernel/vmlinux.lds.S | 2 +- arch/sparc64/kernel/smp.c | 6 +++--- arch/x86_64/kernel/setup64.c | 4 ++-- arch/x86_64/kernel/vmlinux.lds.S | 2 +- arch/xtensa/kernel/vmlinux.lds.S | 2 +- init/main.c | 8 ++------ kernel/module.c | 8 ++++---- 21 files changed, 29 insertions(+), 35 deletions(-) (limited to 'kernel') diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 4cc44bd33d3..cf1e6fc6c68 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -69,7 +69,7 @@ SECTIONS . = ALIGN(8); SECURITY_INIT - . = ALIGN(64); + . = ALIGN(8192); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index ddbdad48f5b..d1a6a597ed9 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -59,7 +59,7 @@ SECTIONS usr/built-in.o(.init.ramfs) __initramfs_end = .; #endif - . = ALIGN(64); + . = ALIGN(4096); __per_cpu_start = .; *(.data.percpu) __per_cpu_end = .; diff --git a/arch/cris/arch-v32/vmlinux.lds.S b/arch/cris/arch-v32/vmlinux.lds.S index e124fcd766d..dfa25e1542b 100644 --- a/arch/cris/arch-v32/vmlinux.lds.S +++ b/arch/cris/arch-v32/vmlinux.lds.S @@ -91,6 +91,7 @@ SECTIONS } SECURITY_INIT + . = ALIGN (8192); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 97910e01682..28eae9735ad 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -57,6 +57,7 @@ SECTIONS __alt_instructions_end = .; .altinstr_replacement : { *(.altinstr_replacement) } + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index f4ec7223183..97fe6eac47c 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -194,7 +194,7 @@ SECTIONS __initramfs_end = .; } #endif - . = ALIGN(L1_CACHE_BYTES); + . = ALIGN(4096); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; *(.data.percpu) diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index 439cc257cd1..6c73bca3f47 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -110,7 +110,7 @@ SECTIONS __initramfs_end = .; #endif - . = ALIGN(32); + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index c76b793310c..043f637e3d1 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -119,7 +119,7 @@ SECTIONS .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; #endif - . = ALIGN(32); + . = ALIGN(_PAGE_SIZE); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 2a8253358c6..c7458599059 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -181,7 +181,7 @@ SECTIONS .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; #endif - . = ALIGN(32); + . = ALIGN(ASM_PAGE_SIZE); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 22083ce3cc3..6018178708a 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -582,14 +582,14 @@ void __init setup_per_cpu_areas(void) char *ptr; /* Copy section for each CPU (we discard the original) */ - size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); + size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); #ifdef CONFIG_MODULES if (size < PERCPU_ENOUGH_ROOM) size = PERCPU_ENOUGH_ROOM; #endif for_each_possible_cpu(i) { - ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); + ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7eefeb4a30e..13206731314 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -139,11 +139,7 @@ SECTIONS __initramfs_end = .; } #endif -#ifdef CONFIG_PPC32 - . = ALIGN(32); -#else - . = ALIGN(128); -#endif + . = ALIGN(PAGE_SIZE); .data.percpu : { __per_cpu_start = .; *(.data.percpu) diff --git a/arch/ppc/kernel/vmlinux.lds.S b/arch/ppc/kernel/vmlinux.lds.S index a0625562a44..44cd128fb71 100644 --- a/arch/ppc/kernel/vmlinux.lds.S +++ b/arch/ppc/kernel/vmlinux.lds.S @@ -130,7 +130,7 @@ SECTIONS __ftr_fixup : { *(__ftr_fixup) } __stop___ftr_fixup = .; - . = ALIGN(32); + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 418f6426a94..e9d3432aba6 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -107,7 +107,7 @@ SECTIONS . = ALIGN(2); __initramfs_end = .; #endif - . = ALIGN(256); + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index 78a6c09875b..2f606d0ce1f 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -54,7 +54,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.page_aligned : { *(.data.page_aligned) } - . = ALIGN(L1_CACHE_BYTES); + . = ALIGN(PAGE_SIZE); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/sh64/kernel/vmlinux.lds.S b/arch/sh64/kernel/vmlinux.lds.S index a59c5e99813..4f9616f3983 100644 --- a/arch/sh64/kernel/vmlinux.lds.S +++ b/arch/sh64/kernel/vmlinux.lds.S @@ -85,7 +85,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.page_aligned : C_PHYS(.data.page_aligned) { *(.data.page_aligned) } - . = ALIGN(L1_CACHE_BYTES); + . = ALIGN(PAGE_SIZE); __per_cpu_start = .; .data.percpu : C_PHYS(.data.percpu) { *(.data.percpu) } __per_cpu_end = . ; diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index e5c24e0521d..f0bb6e60e62 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -65,7 +65,7 @@ SECTIONS __initramfs_end = .; #endif - . = ALIGN(32); + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index d4f0a70f484..1fac215252e 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -1343,11 +1343,11 @@ void __init setup_per_cpu_areas(void) /* Copy section for each CPU (we discard the original) */ goal = PERCPU_ENOUGH_ROOM; - __per_cpu_shift = 0; - for (size = 1UL; size < goal; size <<= 1UL) + __per_cpu_shift = PAGE_SHIFT; + for (size = PAGE_SIZE; size < goal; size <<= 1UL) __per_cpu_shift++; - ptr = alloc_bootmem(size * NR_CPUS); + ptr = alloc_bootmem_pages(size * NR_CPUS); __per_cpu_base = ptr - __per_cpu_start; diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 53064a9a365..64379a80d76 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -103,9 +103,9 @@ void __init setup_per_cpu_areas(void) if (!NODE_DATA(cpu_to_node(i))) { printk("cpu with no node %d, num_online_nodes %d\n", i, num_online_nodes()); - ptr = alloc_bootmem(size); + ptr = alloc_bootmem_pages(size); } else { - ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); + ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); } if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 3bdeb88d28f..7ef0b8820cd 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -195,7 +195,7 @@ SECTIONS __initramfs_end = .; #endif - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index ab6370054ce..4fbd66a52a8 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -198,7 +198,7 @@ SECTIONS __ftr_fixup : { *(__ftr_fixup) } __stop___ftr_fixup = .; - . = ALIGN(32); + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/init/main.c b/init/main.c index a92989e7836..80f09f31bfa 100644 --- a/init/main.c +++ b/init/main.c @@ -369,12 +369,8 @@ static void __init setup_per_cpu_areas(void) unsigned long nr_possible_cpus = num_possible_cpus(); /* Copy section for each CPU (we discard the original) */ - size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); -#ifdef CONFIG_MODULES - if (size < PERCPU_ENOUGH_ROOM) - size = PERCPU_ENOUGH_ROOM; -#endif - ptr = alloc_bootmem(size * nr_possible_cpus); + size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE); + ptr = alloc_bootmem_pages(size * nr_possible_cpus); for_each_possible_cpu(i) { __per_cpu_offset[i] = ptr - __per_cpu_start; diff --git a/kernel/module.c b/kernel/module.c index cf49ca25fcc..4dc4a257545 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -346,10 +346,10 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, unsigned int i; void *ptr; - if (align > SMP_CACHE_BYTES) { - printk(KERN_WARNING "%s: per-cpu alignment %li > %i\n", - name, align, SMP_CACHE_BYTES); - align = SMP_CACHE_BYTES; + if (align > PAGE_SIZE) { + printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", + name, align, PAGE_SIZE); + align = PAGE_SIZE; } ptr = __per_cpu_start; -- cgit v1.2.3-70-g09d2 From d6dd61c831226f9cd7750885da04d360d6455101 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] x86: PARAVIRT: add hooks to intercept mm creation and destruction Add hooks to allow a paravirt implementation to track the lifetime of an mm. Paravirtualization requires three hooks, but only two are needed in common code. They are: arch_dup_mmap, which is called when a new mmap is created at fork arch_exit_mmap, which is called when the last process reference to an mm is dropped, which typically happens on exit and exec. The third hook is activate_mm, which is called from the arch-specific activate_mm() macro/function, and so doesn't need stub versions for other architectures. It's called when an mm is first used. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: linux-arch@vger.kernel.org Cc: James Bottomley Acked-by: Ingo Molnar --- arch/i386/kernel/paravirt.c | 4 ++++ include/asm-alpha/mmu_context.h | 1 + include/asm-arm/mmu_context.h | 1 + include/asm-arm26/mmu_context.h | 2 ++ include/asm-avr32/mmu_context.h | 1 + include/asm-cris/mmu_context.h | 2 ++ include/asm-frv/mmu_context.h | 1 + include/asm-generic/mm_hooks.h | 18 ++++++++++++++++++ include/asm-h8300/mmu_context.h | 1 + include/asm-i386/mmu_context.h | 17 +++++++++++++++-- include/asm-i386/paravirt.h | 23 +++++++++++++++++++++++ include/asm-ia64/mmu_context.h | 1 + include/asm-m32r/mmu_context.h | 1 + include/asm-m68k/mmu_context.h | 1 + include/asm-m68knommu/mmu_context.h | 1 + include/asm-mips/mmu_context.h | 1 + include/asm-parisc/mmu_context.h | 1 + include/asm-powerpc/mmu_context.h | 1 + include/asm-ppc/mmu_context.h | 1 + include/asm-s390/mmu_context.h | 2 ++ include/asm-sh/mmu_context.h | 1 + include/asm-sh64/mmu_context.h | 2 +- include/asm-sparc/mmu_context.h | 2 ++ include/asm-sparc64/mmu_context.h | 1 + include/asm-um/mmu_context.h | 2 ++ include/asm-v850/mmu_context.h | 2 ++ include/asm-x86_64/mmu_context.h | 1 + include/asm-xtensa/mmu_context.h | 1 + kernel/fork.c | 2 ++ mm/mmap.c | 4 ++++ 30 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 include/asm-generic/mm_hooks.h (limited to 'kernel') diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 2040a831d5b..54cc14d9974 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -237,6 +237,10 @@ struct paravirt_ops paravirt_ops = { .irq_enable_sysexit = native_irq_enable_sysexit, .iret = native_iret, + .dup_mmap = paravirt_nop, + .exit_mmap = paravirt_nop, + .activate_mm = paravirt_nop, + .startup_ipi_hook = paravirt_nop, }; diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h index fe249e9d336..0bd7bd2ccb9 100644 --- a/include/asm-alpha/mmu_context.h +++ b/include/asm-alpha/mmu_context.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * Force a context reload. This is needed when we change the page diff --git a/include/asm-arm/mmu_context.h b/include/asm-arm/mmu_context.h index d1a65b1edca..f8755c818b5 100644 --- a/include/asm-arm/mmu_context.h +++ b/include/asm-arm/mmu_context.h @@ -16,6 +16,7 @@ #include #include #include +#include void __check_kvm_seq(struct mm_struct *mm); diff --git a/include/asm-arm26/mmu_context.h b/include/asm-arm26/mmu_context.h index 1a929bfe5c3..16c821f81b8 100644 --- a/include/asm-arm26/mmu_context.h +++ b/include/asm-arm26/mmu_context.h @@ -13,6 +13,8 @@ #ifndef __ASM_ARM_MMU_CONTEXT_H #define __ASM_ARM_MMU_CONTEXT_H +#include + #define init_new_context(tsk,mm) 0 #define destroy_context(mm) do { } while(0) diff --git a/include/asm-avr32/mmu_context.h b/include/asm-avr32/mmu_context.h index 31add1ae808..c37c391faef 100644 --- a/include/asm-avr32/mmu_context.h +++ b/include/asm-avr32/mmu_context.h @@ -15,6 +15,7 @@ #include #include #include +#include /* * The MMU "context" consists of two things: diff --git a/include/asm-cris/mmu_context.h b/include/asm-cris/mmu_context.h index e6e659dc757..72ba08dcfd1 100644 --- a/include/asm-cris/mmu_context.h +++ b/include/asm-cris/mmu_context.h @@ -1,6 +1,8 @@ #ifndef __CRIS_MMU_CONTEXT_H #define __CRIS_MMU_CONTEXT_H +#include + extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm); extern void get_mmu_context(struct mm_struct *mm); extern void destroy_context(struct mm_struct *mm); diff --git a/include/asm-frv/mmu_context.h b/include/asm-frv/mmu_context.h index 72edcaaccd5..c7daa395156 100644 --- a/include/asm-frv/mmu_context.h +++ b/include/asm-frv/mmu_context.h @@ -15,6 +15,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h new file mode 100644 index 00000000000..67dea812368 --- /dev/null +++ b/include/asm-generic/mm_hooks.h @@ -0,0 +1,18 @@ +/* + * Define generic no-op hooks for arch_dup_mmap and arch_exit_mmap, to + * be included in asm-FOO/mmu_context.h for any arch FOO which doesn't + * need to hook these. + */ +#ifndef _ASM_GENERIC_MM_HOOKS_H +#define _ASM_GENERIC_MM_HOOKS_H + +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ +} + +#endif /* _ASM_GENERIC_MM_HOOKS_H */ diff --git a/include/asm-h8300/mmu_context.h b/include/asm-h8300/mmu_context.h index 5c165f7bee0..f44b730da54 100644 --- a/include/asm-h8300/mmu_context.h +++ b/include/asm-h8300/mmu_context.h @@ -4,6 +4,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index e6aa30f8de5..8198d1cca1f 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -5,6 +5,16 @@ #include #include #include +#include +#ifndef CONFIG_PARAVIRT +#include + +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ +} +#endif /* !CONFIG_PARAVIRT */ + /* * Used for LDT copy/destruction. @@ -65,7 +75,10 @@ static inline void switch_mm(struct mm_struct *prev, #define deactivate_mm(tsk, mm) \ asm("movl %0,%%gs": :"r" (0)); -#define activate_mm(prev, next) \ - switch_mm((prev),(next),NULL) +#define activate_mm(prev, next) \ + do { \ + paravirt_activate_mm(prev, next); \ + switch_mm((prev),(next),NULL); \ + } while(0); #endif diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index f93599dc775..61c03f1e0c2 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -119,6 +119,12 @@ struct paravirt_ops void (*io_delay)(void); + void (*activate_mm)(struct mm_struct *prev, + struct mm_struct *next); + void (*dup_mmap)(struct mm_struct *oldmm, + struct mm_struct *mm); + void (*exit_mmap)(struct mm_struct *mm); + #ifdef CONFIG_X86_LOCAL_APIC void (*apic_write)(unsigned long reg, unsigned long v); void (*apic_write_atomic)(unsigned long reg, unsigned long v); @@ -395,6 +401,23 @@ static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, } #endif +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ + paravirt_ops.activate_mm(prev, next); +} + +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ + paravirt_ops.dup_mmap(oldmm, mm); +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ + paravirt_ops.exit_mmap(mm); +} + #define __flush_tlb() paravirt_ops.flush_tlb_user() #define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() #define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h index b5c65081a3a..cef2400983f 100644 --- a/include/asm-ia64/mmu_context.h +++ b/include/asm-ia64/mmu_context.h @@ -29,6 +29,7 @@ #include #include +#include struct ia64_ctx { spinlock_t lock; diff --git a/include/asm-m32r/mmu_context.h b/include/asm-m32r/mmu_context.h index 1f40d4a0acf..91909e5dd9d 100644 --- a/include/asm-m32r/mmu_context.h +++ b/include/asm-m32r/mmu_context.h @@ -15,6 +15,7 @@ #include #include #include +#include /* * Cache of MMU context last used. diff --git a/include/asm-m68k/mmu_context.h b/include/asm-m68k/mmu_context.h index 231d11bd8e3..894dacbcee1 100644 --- a/include/asm-m68k/mmu_context.h +++ b/include/asm-m68k/mmu_context.h @@ -1,6 +1,7 @@ #ifndef __M68K_MMU_CONTEXT_H #define __M68K_MMU_CONTEXT_H +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-m68knommu/mmu_context.h b/include/asm-m68knommu/mmu_context.h index 6c077d3a257..9ccee4278c9 100644 --- a/include/asm-m68knommu/mmu_context.h +++ b/include/asm-m68knommu/mmu_context.h @@ -4,6 +4,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-mips/mmu_context.h b/include/asm-mips/mmu_context.h index fe065d6070c..65024ffd787 100644 --- a/include/asm-mips/mmu_context.h +++ b/include/asm-mips/mmu_context.h @@ -20,6 +20,7 @@ #include #include #endif /* SMTC */ +#include /* * For the fast tlb miss handlers, we keep a per cpu array of pointers diff --git a/include/asm-parisc/mmu_context.h b/include/asm-parisc/mmu_context.h index 9c05836239a..bad690298f0 100644 --- a/include/asm-parisc/mmu_context.h +++ b/include/asm-parisc/mmu_context.h @@ -5,6 +5,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-powerpc/mmu_context.h b/include/asm-powerpc/mmu_context.h index 083ac917bd2..c0d7795e3d2 100644 --- a/include/asm-powerpc/mmu_context.h +++ b/include/asm-powerpc/mmu_context.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * Copyright (C) 2001 PPC 64 Team, IBM Corp diff --git a/include/asm-ppc/mmu_context.h b/include/asm-ppc/mmu_context.h index 2bc8589cc45..a6441a063e5 100644 --- a/include/asm-ppc/mmu_context.h +++ b/include/asm-ppc/mmu_context.h @@ -6,6 +6,7 @@ #include #include #include +#include /* * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h index 1d21da220d4..501cb9b0631 100644 --- a/include/asm-s390/mmu_context.h +++ b/include/asm-s390/mmu_context.h @@ -10,6 +10,8 @@ #define __S390_MMU_CONTEXT_H #include +#include + /* * get a new mmu context.. S390 don't know about contexts. */ diff --git a/include/asm-sh/mmu_context.h b/include/asm-sh/mmu_context.h index 342024425b7..01acaaae975 100644 --- a/include/asm-sh/mmu_context.h +++ b/include/asm-sh/mmu_context.h @@ -12,6 +12,7 @@ #include #include #include +#include /* * The MMU "context" consists of two things: diff --git a/include/asm-sh64/mmu_context.h b/include/asm-sh64/mmu_context.h index 8c860dab2d0..507bf72bb8e 100644 --- a/include/asm-sh64/mmu_context.h +++ b/include/asm-sh64/mmu_context.h @@ -27,7 +27,7 @@ extern unsigned long mmu_context_cache; #include - +#include /* Current mm's pgd */ extern pgd_t *mmu_pdtp_cache; diff --git a/include/asm-sparc/mmu_context.h b/include/asm-sparc/mmu_context.h index ed1e01d04d2..671a997b9e6 100644 --- a/include/asm-sparc/mmu_context.h +++ b/include/asm-sparc/mmu_context.h @@ -5,6 +5,8 @@ #ifndef __ASSEMBLY__ +#include + static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h index 2337eb48771..8d129032013 100644 --- a/include/asm-sparc64/mmu_context.h +++ b/include/asm-sparc64/mmu_context.h @@ -9,6 +9,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-um/mmu_context.h b/include/asm-um/mmu_context.h index f709c784bf1..9aa4b44e8cc 100644 --- a/include/asm-um/mmu_context.h +++ b/include/asm-um/mmu_context.h @@ -6,6 +6,8 @@ #ifndef __UM_MMU_CONTEXT_H #define __UM_MMU_CONTEXT_H +#include + #include "linux/sched.h" #include "choose-mode.h" #include "um_mmu.h" diff --git a/include/asm-v850/mmu_context.h b/include/asm-v850/mmu_context.h index f521c8050d3..01daacd5474 100644 --- a/include/asm-v850/mmu_context.h +++ b/include/asm-v850/mmu_context.h @@ -1,6 +1,8 @@ #ifndef __V850_MMU_CONTEXT_H__ #define __V850_MMU_CONTEXT_H__ +#include + #define destroy_context(mm) ((void)0) #define init_new_context(tsk,mm) 0 #define switch_mm(prev,next,tsk) ((void)0) diff --git a/include/asm-x86_64/mmu_context.h b/include/asm-x86_64/mmu_context.h index af03b9f852d..0cce83a7837 100644 --- a/include/asm-x86_64/mmu_context.h +++ b/include/asm-x86_64/mmu_context.h @@ -7,6 +7,7 @@ #include #include #include +#include /* * possibly do the LDT unload here? diff --git a/include/asm-xtensa/mmu_context.h b/include/asm-xtensa/mmu_context.h index f14851f086c..92f948392eb 100644 --- a/include/asm-xtensa/mmu_context.h +++ b/include/asm-xtensa/mmu_context.h @@ -18,6 +18,7 @@ #include #include #include +#include #define XCHAL_MMU_ASID_BITS 8 diff --git a/kernel/fork.c b/kernel/fork.c index 6af959c034d..ffccefb28b6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -286,6 +286,8 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (retval) goto out; } + /* a new mm has just been created */ + arch_dup_mmap(oldmm, mm); retval = 0; out: up_write(&mm->mmap_sem); diff --git a/mm/mmap.c b/mm/mmap.c index 84f997da78d..88da687bde8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -29,6 +29,7 @@ #include #include #include +#include #ifndef arch_mmap_check #define arch_mmap_check(addr, len, flags) (0) @@ -1979,6 +1980,9 @@ void exit_mmap(struct mm_struct *mm) unsigned long nr_accounted = 0; unsigned long end; + /* mm's last user has gone, and its about to be pulled down */ + arch_exit_mmap(mm); + lru_add_drain(); flush_cache_mm(mm); tlb = tlb_gather_mmu(mm, 1); -- cgit v1.2.3-70-g09d2