From 3eefae994d9224fb7771a3ddb683868363c23510 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:04 +0200 Subject: ftrace: limit trace entries Currently there is no protection from the root user to use up all of memory for trace buffers. If the root user allocates too many entries, the OOM killer might start kill off all tasks. This patch adds an algorith to check the following condition: pages_requested > (freeable_memory + current_trace_buffer_pages) / 4 If the above is met then the allocation fails. The above prevents more than 1/4th of freeable memory from being used by trace buffers. To determine the freeable_memory, I made determine_dirtyable_memory in mm/page-writeback.c global. Special thanks goes to Peter Zijlstra for suggesting the above calculation. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- mm/page-writeback.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 789b6adbef3..b38f700825f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -126,8 +126,6 @@ static void background_writeout(unsigned long _min_pages); static struct prop_descriptor vm_completions; static struct prop_descriptor vm_dirties; -static unsigned long determine_dirtyable_memory(void); - /* * couple the period to the dirty_ratio: * @@ -347,7 +345,13 @@ static unsigned long highmem_dirtyable_memory(unsigned long total) #endif } -static unsigned long determine_dirtyable_memory(void) +/** + * determine_dirtyable_memory - amount of memory that may be used + * + * Returns the numebr of pages that can currently be freed and used + * by the kernel for direct mappings. + */ +unsigned long determine_dirtyable_memory(void) { unsigned long x; -- cgit v1.2.3-70-g09d2 From e8c27ac9191ab9e6506ae5cbe70d87ac50f8e960 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 1 Jun 2008 13:15:22 -0700 Subject: x86, numa, 32-bit: print out debug info on all kvas also fix the print out of node_remap_end_vaddr Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/discontig_32.c | 9 +++++++-- mm/page_alloc.c | 5 +++++ 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index ebbbba33815..3150ad38567 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -168,6 +168,8 @@ static void __init allocate_pgdat(int nid) reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), "NODE_DATA"); } + printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", + nid, (unsigned long)NODE_DATA(nid)); } #ifdef CONFIG_DISCONTIGMEM @@ -208,8 +210,12 @@ void __init remap_numa_kva(void) int node; for_each_online_node(node) { + printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { vaddr = node_remap_start_vaddr[node]+(pfn<node_mem_map); +#endif free_area_init_core(pgdat, zones_size, zholes_size); } -- cgit v1.2.3-70-g09d2 From cc1a9d86ce989083703c4bdc11b75a87e1cc404a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Jun 2008 19:39:16 -0700 Subject: mm, x86: shrink_active_range() should check all Now we are using register_e820_active_regions() instead of add_active_range() directly. So end_pfn could be different between the value in early_node_map to node_end_pfn. So we need to make shrink_active_range() smarter. shrink_active_range() is a generic MM function in mm/page_alloc.c but it is only used on 32-bit x86. Should we move it back to some file in arch/x86? Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/discontig_32.c | 2 +- include/linux/mm.h | 3 +-- mm/page_alloc.c | 44 ++++++++++++++++++++++++++++++++++---------- 3 files changed, 36 insertions(+), 13 deletions(-) (limited to 'mm') diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index a89ccf3d4c1..489605bab85 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -282,7 +282,7 @@ static unsigned long calculate_numa_remap_pages(void) node_end_pfn[nid] -= size; node_remap_start_pfn[nid] = node_end_pfn[nid]; - shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); + shrink_active_range(nid, node_end_pfn[nid]); } printk("Reserving total of %ld pages for numa KVA remap\n", reserve_pages); diff --git a/include/linux/mm.h b/include/linux/mm.h index c31a9cd2a30..7cbd949f251 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -997,8 +997,7 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat, extern void free_area_init_nodes(unsigned long *max_zone_pfn); extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn, - unsigned long new_end_pfn); +extern void shrink_active_range(unsigned int nid, unsigned long new_end_pfn); extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 502223c3c2c..21540868407 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3579,25 +3579,49 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, /** * shrink_active_range - Shrink an existing registered range of PFNs * @nid: The node id the range is on that should be shrunk - * @old_end_pfn: The old end PFN of the range * @new_end_pfn: The new PFN of the range * * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. - * The map is kept at the end physical page range that has already been - * registered with add_active_range(). This function allows an arch to shrink - * an existing registered range. + * The map is kept near the end physical page range that has already been + * registered. This function allows an arch to shrink an existing registered + * range. */ -void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn, - unsigned long new_end_pfn) +void __init shrink_active_range(unsigned int nid, unsigned long new_end_pfn) { - int i; + int i, j; + int removed = 0; /* Find the old active region end and shrink */ - for_each_active_range_index_in_nid(i, nid) - if (early_node_map[i].end_pfn == old_end_pfn) { + for_each_active_range_index_in_nid(i, nid) { + if (early_node_map[i].start_pfn >= new_end_pfn) { + /* clear it */ + early_node_map[i].end_pfn = 0; + removed = 1; + continue; + } + if (early_node_map[i].end_pfn > new_end_pfn) { early_node_map[i].end_pfn = new_end_pfn; - break; + continue; } + } + + if (!removed) + return; + + /* remove the blank ones */ + for (i = nr_nodemap_entries - 1; i > 0; i--) { + if (early_node_map[i].nid != nid) + continue; + if (early_node_map[i].end_pfn) + continue; + /* we found it, get rid of it */ + for (j = i; j < nr_nodemap_entries - 1; j++) + memcpy(&early_node_map[j], &early_node_map[j+1], + sizeof(early_node_map[j])); + j = nr_nodemap_entries - 1; + memset(&early_node_map[j], 0, sizeof(early_node_map[j])); + nr_nodemap_entries--; + } } /** -- cgit v1.2.3-70-g09d2 From 1ea0704e0da65b2b46f9142ff1391163aac24060 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 16 Jun 2008 04:30:00 -0700 Subject: mm: add a ptep_modify_prot transaction abstraction This patch adds an API for doing read-modify-write updates to a pte's protection bits which may race against hardware updates to the pte. After reading the pte, the hardware may asynchonously set the accessed or dirty bits on a pte, which would be lost when writing back the modified pte value. The existing technique to handle this race is to use ptep_get_and_clear() atomically fetch the old pte value and clear it in memory. This has the effect of marking the pte as non-present, which will prevent the hardware from updating its state. When the new value is written back, the pte will be present again, and the hardware can resume updating the access/dirty flags. When running in a virtualized environment, pagetable updates are relatively expensive, since they generally involve some trap into the hypervisor. To mitigate the cost of these updates, we tend to batch them. However, because of the atomic nature of ptep_get_and_clear(), it is inherently non-batchable. This new interface allows batching by giving the underlying implementation enough information to open a transaction between the read and write phases: ptep_modify_prot_start() returns the current pte value, and puts the pte entry into a state where either the hardware will not update the pte, or if it does, the updates will be preserved on commit. ptep_modify_prot_commit() writes back the updated pte, makes sure that any hardware updates made since ptep_modify_prot_start() are preserved. ptep_modify_prot_start() and _commit() must be exactly paired, and used while holding the appropriate pte lock. They do not protect against other software updates of the pte in any way. The current implementations of ptep_modify_prot_start and _commit are functionally unchanged from before: _start() uses ptep_get_and_clear() fetch the pte and zero the entry, preventing any hardware updates. _commit() simply writes the new pte value back knowing that the hardware has not updated the pte in the meantime. The only current user of this interface is mprotect Signed-off-by: Jeremy Fitzhardinge Acked-by: Linus Torvalds Acked-by: Hugh Dickins Signed-off-by: Ingo Molnar --- include/asm-generic/pgtable.h | 57 +++++++++++++++++++++++++++++++++++++++++++ mm/mprotect.c | 10 +++----- 2 files changed, 61 insertions(+), 6 deletions(-) (limited to 'mm') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 44ef329531c..4fce3db2cec 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -197,6 +197,63 @@ static inline int pmd_none_or_clear_bad(pmd_t *pmd) } #endif /* CONFIG_MMU */ +static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep) +{ + /* + * Get the current pte state, but zero it out to make it + * non-present, preventing the hardware from asynchronously + * updating it. + */ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline void __ptep_modify_prot_commit(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pte_t pte) +{ + /* + * The pte is non-present, so there's no hardware state to + * preserve. + */ + set_pte_at(mm, addr, ptep, pte); +} + +#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +/* + * Start a pte protection read-modify-write transaction, which + * protects against asynchronous hardware modifications to the pte. + * The intention is not to prevent the hardware from making pte + * updates, but to prevent any updates it may make from being lost. + * + * This does not protect against other software modifications of the + * pte; the appropriate pte lock must be held over the transation. + * + * Note that this interface is intended to be batchable, meaning that + * ptep_modify_prot_commit may not actually update the pte, but merely + * queue the update to be done at some later time. The update must be + * actually committed before the pte lock is released, however. + */ +static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep) +{ + return __ptep_modify_prot_start(mm, addr, ptep); +} + +/* + * Commit an update to a pte, leaving any hardware-controlled bits in + * the PTE unmodified. + */ +static inline void ptep_modify_prot_commit(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pte_t pte) +{ + __ptep_modify_prot_commit(mm, addr, ptep, pte); +} +#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ + /* * A facility to provide lazy MMU batching. This allows PTE updates and * page invalidations to be delayed until a call to leave lazy MMU mode diff --git a/mm/mprotect.c b/mm/mprotect.c index a5bf31c2737..acfe7c8d72f 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -47,19 +47,17 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, if (pte_present(oldpte)) { pte_t ptent; - /* Avoid an SMP race with hardware updated dirty/clean - * bits by wiping the pte and then setting the new pte - * into place. - */ - ptent = ptep_get_and_clear(mm, addr, pte); + ptent = ptep_modify_prot_start(mm, addr, pte); ptent = pte_modify(ptent, newprot); + /* * Avoid taking write faults for pages we know to be * dirty. */ if (dirty_accountable && pte_dirty(ptent)) ptent = pte_mkwrite(ptent); - set_pte_at(mm, addr, pte, ptent); + + ptep_modify_prot_commit(mm, addr, pte, ptent); #ifdef CONFIG_MIGRATION } else if (!pte_file(oldpte)) { swp_entry_t entry = pte_to_swp_entry(oldpte); -- cgit v1.2.3-70-g09d2 From 15c8b6c1aaaf1c4edd67e2f02e4d8e1bd1a51c0d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 May 2008 09:39:44 +0200 Subject: on_each_cpu(): kill unused 'retry' parameter It's not even passed on to smp_call_function() anymore, since that was removed. So kill it. Acked-by: Jeremy Fitzhardinge Reviewed-by: Paul E. McKenney Signed-off-by: Jens Axboe --- arch/alpha/kernel/process.c | 2 +- arch/alpha/kernel/smp.c | 4 ++-- arch/arm/kernel/smp.c | 6 +++--- arch/ia64/kernel/mca.c | 4 ++-- arch/ia64/kernel/perfmon.c | 4 ++-- arch/ia64/kernel/smp.c | 4 ++-- arch/mips/kernel/irq-rm9000.c | 4 ++-- arch/mips/kernel/smp.c | 4 ++-- arch/mips/oprofile/common.c | 6 +++--- arch/parisc/kernel/cache.c | 6 +++--- arch/parisc/kernel/smp.c | 2 +- arch/parisc/mm/init.c | 2 +- arch/powerpc/kernel/rtas.c | 2 +- arch/powerpc/kernel/tau_6xx.c | 4 ++-- arch/powerpc/kernel/time.c | 2 +- arch/powerpc/mm/slice.c | 2 +- arch/powerpc/oprofile/common.c | 6 +++--- arch/s390/kernel/smp.c | 6 +++--- arch/s390/kernel/time.c | 2 +- arch/sh/kernel/smp.c | 4 ++-- arch/sparc64/mm/hugetlbpage.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_64.c | 6 +++--- arch/x86/kernel/cpu/mcheck/non-fatal.c | 2 +- arch/x86/kernel/cpu/perfctr-watchdog.c | 4 ++-- arch/x86/kernel/io_apic_32.c | 2 +- arch/x86/kernel/io_apic_64.c | 2 +- arch/x86/kernel/nmi_32.c | 4 ++-- arch/x86/kernel/nmi_64.c | 4 ++-- arch/x86/kernel/tlb_32.c | 2 +- arch/x86/kernel/tlb_64.c | 2 +- arch/x86/kernel/vsyscall_64.c | 2 +- arch/x86/kvm/vmx.c | 2 +- arch/x86/mach-voyager/voyager_smp.c | 2 +- arch/x86/mm/pageattr.c | 4 ++-- arch/x86/oprofile/nmi_int.c | 10 +++++----- drivers/char/agp/generic.c | 2 +- drivers/lguest/x86/core.c | 4 ++-- fs/buffer.c | 2 +- include/linux/smp.h | 4 ++-- kernel/hrtimer.c | 2 +- kernel/profile.c | 6 +++--- kernel/rcupdate.c | 2 +- kernel/softirq.c | 2 +- mm/page_alloc.c | 2 +- mm/slab.c | 4 ++-- mm/slub.c | 2 +- net/iucv/iucv.c | 2 +- virt/kvm/kvm_main.c | 8 ++++---- 48 files changed, 84 insertions(+), 84 deletions(-) (limited to 'mm') diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 96ed82fd9ee..351407e07e7 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -160,7 +160,7 @@ common_shutdown(int mode, char *restart_cmd) struct halt_info args; args.mode = mode; args.restart_cmd = restart_cmd; - on_each_cpu(common_shutdown_1, &args, 1, 0); + on_each_cpu(common_shutdown_1, &args, 0); } void diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 44114c8dbb2..83df541650f 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -657,7 +657,7 @@ void smp_imb(void) { /* Must wait other processors to flush their icache before continue. */ - if (on_each_cpu(ipi_imb, NULL, 1, 1)) + if (on_each_cpu(ipi_imb, NULL, 1)) printk(KERN_CRIT "smp_imb: timed out\n"); } EXPORT_SYMBOL(smp_imb); @@ -673,7 +673,7 @@ flush_tlb_all(void) { /* Although we don't have any data to pass, we do want to synchronize with the other processors. */ - if (on_each_cpu(ipi_flush_tlb_all, NULL, 1, 1)) { + if (on_each_cpu(ipi_flush_tlb_all, NULL, 1)) { printk(KERN_CRIT "flush_tlb_all: timed out\n"); } } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 6344466b211..5a7c09564d1 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -604,7 +604,7 @@ static inline void ipi_flush_tlb_kernel_range(void *arg) void flush_tlb_all(void) { - on_each_cpu(ipi_flush_tlb_all, NULL, 1, 1); + on_each_cpu(ipi_flush_tlb_all, NULL, 1); } void flush_tlb_mm(struct mm_struct *mm) @@ -631,7 +631,7 @@ void flush_tlb_kernel_page(unsigned long kaddr) ta.ta_start = kaddr; - on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1, 1); + on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1); } void flush_tlb_range(struct vm_area_struct *vma, @@ -654,5 +654,5 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) ta.ta_start = start; ta.ta_end = end; - on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1, 1); + on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 9cd818cc700..7dd96c12717 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -707,7 +707,7 @@ ia64_mca_cmc_vector_enable (void *dummy) static void ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused) { - on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 1, 0); + on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 0); } /* @@ -719,7 +719,7 @@ ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused) static void ia64_mca_cmc_vector_enable_keventd(struct work_struct *unused) { - on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0); + on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 0); } /* diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 9baa48255c1..19d4493c619 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -6508,7 +6508,7 @@ pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) } /* save the current system wide pmu states */ - ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1); + ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); if (ret) { DPRINT(("on_each_cpu() failed: %d\n", ret)); goto cleanup_reserve; @@ -6553,7 +6553,7 @@ pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) pfm_alt_intr_handler = NULL; - ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1); + ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); if (ret) { DPRINT(("on_each_cpu() failed: %d\n", ret)); } diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 19152dcbf6e..3676468612b 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -285,7 +285,7 @@ smp_flush_tlb_cpumask(cpumask_t xcpumask) void smp_flush_tlb_all (void) { - on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1); } void @@ -308,7 +308,7 @@ smp_flush_tlb_mm (struct mm_struct *mm) * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is * rather trivial. */ - on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1); + on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1); } void arch_send_call_function_single_ipi(int cpu) diff --git a/arch/mips/kernel/irq-rm9000.c b/arch/mips/kernel/irq-rm9000.c index ed9febe63d7..b47e4615ec1 100644 --- a/arch/mips/kernel/irq-rm9000.c +++ b/arch/mips/kernel/irq-rm9000.c @@ -49,7 +49,7 @@ static void local_rm9k_perfcounter_irq_startup(void *args) static unsigned int rm9k_perfcounter_irq_startup(unsigned int irq) { - on_each_cpu(local_rm9k_perfcounter_irq_startup, (void *) irq, 0, 1); + on_each_cpu(local_rm9k_perfcounter_irq_startup, (void *) irq, 1); return 0; } @@ -66,7 +66,7 @@ static void local_rm9k_perfcounter_irq_shutdown(void *args) static void rm9k_perfcounter_irq_shutdown(unsigned int irq) { - on_each_cpu(local_rm9k_perfcounter_irq_shutdown, (void *) irq, 0, 1); + on_each_cpu(local_rm9k_perfcounter_irq_shutdown, (void *) irq, 1); } static struct irq_chip rm9k_irq_controller = { diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 7a9ae830be8..4410f172b8a 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -246,7 +246,7 @@ static void flush_tlb_all_ipi(void *info) void flush_tlb_all(void) { - on_each_cpu(flush_tlb_all_ipi, NULL, 1, 1); + on_each_cpu(flush_tlb_all_ipi, NULL, 1); } static void flush_tlb_mm_ipi(void *mm) @@ -366,7 +366,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) .addr2 = end, }; - on_each_cpu(flush_tlb_kernel_range_ipi, &fd, 1, 1); + on_each_cpu(flush_tlb_kernel_range_ipi, &fd, 1); } static void flush_tlb_page_ipi(void *info) diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index b5f6f71b27b..dd2fbd6645c 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -27,7 +27,7 @@ static int op_mips_setup(void) model->reg_setup(ctr); /* Configure the registers on all cpus. */ - on_each_cpu(model->cpu_setup, NULL, 0, 1); + on_each_cpu(model->cpu_setup, NULL, 1); return 0; } @@ -58,7 +58,7 @@ static int op_mips_create_files(struct super_block * sb, struct dentry * root) static int op_mips_start(void) { - on_each_cpu(model->cpu_start, NULL, 0, 1); + on_each_cpu(model->cpu_start, NULL, 1); return 0; } @@ -66,7 +66,7 @@ static int op_mips_start(void) static void op_mips_stop(void) { /* Disable performance monitoring for all counters. */ - on_each_cpu(model->cpu_stop, NULL, 0, 1); + on_each_cpu(model->cpu_stop, NULL, 1); } int __init oprofile_arch_init(struct oprofile_operations *ops) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index e10d25d2d9c..5259d8c2067 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -51,12 +51,12 @@ static struct pdc_btlb_info btlb_info __read_mostly; void flush_data_cache(void) { - on_each_cpu(flush_data_cache_local, NULL, 1, 1); + on_each_cpu(flush_data_cache_local, NULL, 1); } void flush_instruction_cache(void) { - on_each_cpu(flush_instruction_cache_local, NULL, 1, 1); + on_each_cpu(flush_instruction_cache_local, NULL, 1); } #endif @@ -515,7 +515,7 @@ static void cacheflush_h_tmp_function(void *dummy) void flush_cache_all(void) { - on_each_cpu(cacheflush_h_tmp_function, NULL, 1, 1); + on_each_cpu(cacheflush_h_tmp_function, NULL, 1); } void flush_cache_mm(struct mm_struct *mm) diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 126105c76a4..d47f3975c9c 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -292,7 +292,7 @@ void arch_send_call_function_single_ipi(int cpu) void smp_flush_tlb_all(void) { - on_each_cpu(flush_tlb_all_local, NULL, 1, 1); + on_each_cpu(flush_tlb_all_local, NULL, 1); } /* diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index ce0da689a89..b4d6c8777ed 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -1053,7 +1053,7 @@ void flush_tlb_all(void) do_recycle++; } spin_unlock(&sid_lock); - on_each_cpu(flush_tlb_all_local, NULL, 1, 1); + on_each_cpu(flush_tlb_all_local, NULL, 1); if (do_recycle) { spin_lock(&sid_lock); recycle_sids(recycle_ndirty,recycle_dirty_array); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 34843c31841..647f3e8677d 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -747,7 +747,7 @@ static int rtas_ibm_suspend_me(struct rtas_args *args) /* Call function on all CPUs. One of us will make the * rtas call */ - if (on_each_cpu(rtas_percpu_suspend_me, &data, 1, 0)) + if (on_each_cpu(rtas_percpu_suspend_me, &data, 0)) data.error = -EINVAL; wait_for_completion(&done); diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 368a4934f7e..c3a56d65c5a 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -192,7 +192,7 @@ static void tau_timeout_smp(unsigned long unused) /* schedule ourselves to be run again */ mod_timer(&tau_timer, jiffies + shrink_timer) ; - on_each_cpu(tau_timeout, NULL, 1, 0); + on_each_cpu(tau_timeout, NULL, 0); } /* @@ -234,7 +234,7 @@ int __init TAU_init(void) tau_timer.expires = jiffies + shrink_timer; add_timer(&tau_timer); - on_each_cpu(TAU_init_smp, NULL, 1, 0); + on_each_cpu(TAU_init_smp, NULL, 0); printk("Thermal assist unit "); #ifdef CONFIG_TAU_INT diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 73401e83739..f1a38a6c1e2 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -322,7 +322,7 @@ void snapshot_timebases(void) { if (!cpu_has_feature(CPU_FTR_PURR)) return; - on_each_cpu(snapshot_tb_and_purr, NULL, 0, 1); + on_each_cpu(snapshot_tb_and_purr, NULL, 1); } /* diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index ad928edafb0..2bd12d965db 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -218,7 +218,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz mb(); /* XXX this is sub-optimal but will do for now */ - on_each_cpu(slice_flush_segments, mm, 0, 1); + on_each_cpu(slice_flush_segments, mm, 1); #ifdef CONFIG_SPU_BASE spu_flush_all_slbs(mm); #endif diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c index 4908dc98f9c..17807acb05d 100644 --- a/arch/powerpc/oprofile/common.c +++ b/arch/powerpc/oprofile/common.c @@ -65,7 +65,7 @@ static int op_powerpc_setup(void) /* Configure the registers on all cpus. If an error occurs on one * of the cpus, op_per_cpu_rc will be set to the error */ - on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1); + on_each_cpu(op_powerpc_cpu_setup, NULL, 1); out: if (op_per_cpu_rc) { /* error on setup release the performance counter hardware */ @@ -100,7 +100,7 @@ static int op_powerpc_start(void) if (model->global_start) return model->global_start(ctr); if (model->start) { - on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); + on_each_cpu(op_powerpc_cpu_start, NULL, 1); return op_per_cpu_rc; } return -EIO; /* No start function is defined for this @@ -115,7 +115,7 @@ static inline void op_powerpc_cpu_stop(void *dummy) static void op_powerpc_stop(void) { if (model->stop) - on_each_cpu(op_powerpc_cpu_stop, NULL, 0, 1); + on_each_cpu(op_powerpc_cpu_stop, NULL, 1); if (model->global_stop) model->global_stop(); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 276b105fb2a..b6781030cfb 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -299,7 +299,7 @@ static void smp_ptlb_callback(void *info) void smp_ptlb_all(void) { - on_each_cpu(smp_ptlb_callback, NULL, 0, 1); + on_each_cpu(smp_ptlb_callback, NULL, 1); } EXPORT_SYMBOL(smp_ptlb_all); #endif /* ! CONFIG_64BIT */ @@ -347,7 +347,7 @@ void smp_ctl_set_bit(int cr, int bit) memset(&parms.orvals, 0, sizeof(parms.orvals)); memset(&parms.andvals, 0xff, sizeof(parms.andvals)); parms.orvals[cr] = 1 << bit; - on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1); + on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_set_bit); @@ -361,7 +361,7 @@ void smp_ctl_clear_bit(int cr, int bit) memset(&parms.orvals, 0, sizeof(parms.orvals)); memset(&parms.andvals, 0xff, sizeof(parms.andvals)); parms.andvals[cr] = ~(1L << bit); - on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1); + on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_clear_bit); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index bf7bf2c2236..6037ed2b747 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -909,7 +909,7 @@ static void etr_work_fn(struct work_struct *work) if (!eacr.ea) { /* Both ports offline. Reset everything. */ eacr.dp = eacr.es = eacr.sl = 0; - on_each_cpu(etr_disable_sync_clock, NULL, 0, 1); + on_each_cpu(etr_disable_sync_clock, NULL, 1); del_timer_sync(&etr_timer); etr_update_eacr(eacr); set_bit(ETR_FLAG_EACCES, &etr_flags); diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 71781ba2675..60c50841143 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -197,7 +197,7 @@ static void flush_tlb_all_ipi(void *info) void flush_tlb_all(void) { - on_each_cpu(flush_tlb_all_ipi, 0, 1, 1); + on_each_cpu(flush_tlb_all_ipi, 0, 1); } static void flush_tlb_mm_ipi(void *mm) @@ -284,7 +284,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) fd.addr1 = start; fd.addr2 = end; - on_each_cpu(flush_tlb_kernel_range_ipi, (void *)&fd, 1, 1); + on_each_cpu(flush_tlb_kernel_range_ipi, (void *)&fd, 1); } static void flush_tlb_page_ipi(void *info) diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index 6cfab2e4d34..ebefd2a1437 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c @@ -344,7 +344,7 @@ void hugetlb_prefault_arch_hook(struct mm_struct *mm) * also executing in this address space. */ mm->context.sparc64_ctx_val = ctx; - on_each_cpu(context_reload, mm, 0, 0); + on_each_cpu(context_reload, mm, 0); } spin_unlock(&ctx_alloc_lock); } diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index e07e8c068ae..43b7cb59491 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -363,7 +363,7 @@ static void mcheck_check_cpu(void *info) static void mcheck_timer(struct work_struct *work) { - on_each_cpu(mcheck_check_cpu, NULL, 1, 1); + on_each_cpu(mcheck_check_cpu, NULL, 1); /* * Alert userspace if needed. If we logged an MCE, reduce the @@ -612,7 +612,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, * Collect entries that were still getting written before the * synchronize. */ - on_each_cpu(collect_tscs, cpu_tsc, 1, 1); + on_each_cpu(collect_tscs, cpu_tsc, 1); for (i = next; i < MCE_LOG_LEN; i++) { if (mcelog.entry[i].finished && mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { @@ -737,7 +737,7 @@ static void mce_restart(void) if (next_interval) cancel_delayed_work(&mcheck_work); /* Timer race is harmless here */ - on_each_cpu(mce_init, NULL, 1, 1); + on_each_cpu(mce_init, NULL, 1); next_interval = check_interval * HZ; if (next_interval) schedule_delayed_work(&mcheck_work, diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index 00ccb6c14ec..cc1fccdd31e 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c @@ -59,7 +59,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn); static void mce_work_fn(struct work_struct *work) { - on_each_cpu(mce_checkregs, NULL, 1, 1); + on_each_cpu(mce_checkregs, NULL, 1); schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); } diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index f9ae93adffe..58043f06d7e 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -180,7 +180,7 @@ void disable_lapic_nmi_watchdog(void) if (atomic_read(&nmi_active) <= 0) return; - on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); + on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); wd_ops->unreserve(); BUG_ON(atomic_read(&nmi_active) != 0); @@ -202,7 +202,7 @@ void enable_lapic_nmi_watchdog(void) return; } - on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); + on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); touch_nmi_watchdog(); } diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 4dc8600d9d2..720640ff36c 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1565,7 +1565,7 @@ void /*__init*/ print_local_APIC(void * dummy) void print_all_local_APICs (void) { - on_each_cpu(print_local_APIC, NULL, 1, 1); + on_each_cpu(print_local_APIC, NULL, 1); } void /*__init*/ print_PIC(void) diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index ef1a8dfcc52..4504c7f5001 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1146,7 +1146,7 @@ void __apicdebuginit print_local_APIC(void * dummy) void print_all_local_APICs (void) { - on_each_cpu(print_local_APIC, NULL, 1, 1); + on_each_cpu(print_local_APIC, NULL, 1); } void __apicdebuginit print_PIC(void) diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 5562dab0bd2..11008e0857c 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -218,7 +218,7 @@ static void __acpi_nmi_enable(void *__unused) void acpi_nmi_enable(void) { if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); + on_each_cpu(__acpi_nmi_enable, NULL, 1); } static void __acpi_nmi_disable(void *__unused) @@ -232,7 +232,7 @@ static void __acpi_nmi_disable(void *__unused) void acpi_nmi_disable(void) { if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); + on_each_cpu(__acpi_nmi_disable, NULL, 1); } void setup_apic_nmi_watchdog(void *unused) diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 2f1e4f503c9..bbdcb17b3df 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -225,7 +225,7 @@ static void __acpi_nmi_enable(void *__unused) void acpi_nmi_enable(void) { if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); + on_each_cpu(__acpi_nmi_enable, NULL, 1); } static void __acpi_nmi_disable(void *__unused) @@ -239,7 +239,7 @@ static void __acpi_nmi_disable(void *__unused) void acpi_nmi_disable(void) { if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); + on_each_cpu(__acpi_nmi_disable, NULL, 1); } void setup_apic_nmi_watchdog(void *unused) diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index 9bb2363851a..fec1ecedc9b 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -238,6 +238,6 @@ static void do_flush_tlb_all(void *info) void flush_tlb_all(void) { - on_each_cpu(do_flush_tlb_all, NULL, 1, 1); + on_each_cpu(do_flush_tlb_all, NULL, 1); } diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index a1f07d79320..184a367516d 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -270,5 +270,5 @@ static void do_flush_tlb_all(void *info) void flush_tlb_all(void) { - on_each_cpu(do_flush_tlb_all, NULL, 1, 1); + on_each_cpu(do_flush_tlb_all, NULL, 1); } diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 0a03d57f9b3..0dcae19ed62 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -301,7 +301,7 @@ static int __init vsyscall_init(void) #ifdef CONFIG_SYSCTL register_sysctl_table(kernel_root_table2); #endif - on_each_cpu(cpu_vsyscall_init, NULL, 0, 1); + on_each_cpu(cpu_vsyscall_init, NULL, 1); hotcpu_notifier(cpu_vsyscall_notifier, 0); return 0; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5534fe59b5f..10ce6ee4c49 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2968,7 +2968,7 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); if (vmx->vmcs) { - on_each_cpu(__vcpu_clear, vmx, 0, 1); + on_each_cpu(__vcpu_clear, vmx, 1); free_vmcs(vmx->vmcs); vmx->vmcs = NULL; } diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 04f596eab74..abea08459a7 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1072,7 +1072,7 @@ static void do_flush_tlb_all(void *info) /* flush the TLB of every active CPU in the system */ void flush_tlb_all(void) { - on_each_cpu(do_flush_tlb_all, 0, 1, 1); + on_each_cpu(do_flush_tlb_all, 0, 1); } /* used to set up the trampoline for other CPUs when the memory manager diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 60bcb5b6a37..9b836ba9ded 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -106,7 +106,7 @@ static void cpa_flush_all(unsigned long cache) { BUG_ON(irqs_disabled()); - on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1); + on_each_cpu(__cpa_flush_all, (void *) cache, 1); } static void __cpa_flush_range(void *arg) @@ -127,7 +127,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) BUG_ON(irqs_disabled()); WARN_ON(PAGE_ALIGN(start) != start); - on_each_cpu(__cpa_flush_range, NULL, 1, 1); + on_each_cpu(__cpa_flush_range, NULL, 1); if (!cache) return; diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cc48d3fde54..3238ad32ffd 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -218,8 +218,8 @@ static int nmi_setup(void) } } - on_each_cpu(nmi_save_registers, NULL, 0, 1); - on_each_cpu(nmi_cpu_setup, NULL, 0, 1); + on_each_cpu(nmi_save_registers, NULL, 1); + on_each_cpu(nmi_cpu_setup, NULL, 1); nmi_enabled = 1; return 0; } @@ -271,7 +271,7 @@ static void nmi_shutdown(void) { struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); nmi_enabled = 0; - on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); + on_each_cpu(nmi_cpu_shutdown, NULL, 1); unregister_die_notifier(&profile_exceptions_nb); model->shutdown(msrs); free_msrs(); @@ -285,7 +285,7 @@ static void nmi_cpu_start(void *dummy) static int nmi_start(void) { - on_each_cpu(nmi_cpu_start, NULL, 0, 1); + on_each_cpu(nmi_cpu_start, NULL, 1); return 0; } @@ -297,7 +297,7 @@ static void nmi_cpu_stop(void *dummy) static void nmi_stop(void) { - on_each_cpu(nmi_cpu_stop, NULL, 0, 1); + on_each_cpu(nmi_cpu_stop, NULL, 1); } struct op_counter_config counter_config[OP_MAX_COUNTER]; diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index 564daaa6c7d..eaa1a355bb3 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -1249,7 +1249,7 @@ static void ipi_handler(void *null) void global_cache_flush(void) { - if (on_each_cpu(ipi_handler, NULL, 1, 1) != 0) + if (on_each_cpu(ipi_handler, NULL, 1) != 0) panic(PFX "timed out waiting for the other CPUs!\n"); } EXPORT_SYMBOL(global_cache_flush); diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 2e554a4ab33..95dfda52b4f 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -478,7 +478,7 @@ void __init lguest_arch_host_init(void) cpu_had_pge = 1; /* adjust_pge is a helper function which sets or unsets the PGE * bit on its CPU, depending on the argument (0 == unset). */ - on_each_cpu(adjust_pge, (void *)0, 0, 1); + on_each_cpu(adjust_pge, (void *)0, 1); /* Turn off the feature in the global feature set. */ clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); } @@ -493,7 +493,7 @@ void __exit lguest_arch_host_fini(void) if (cpu_had_pge) { set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); /* adjust_pge's argument "1" means set PGE. */ - on_each_cpu(adjust_pge, (void *)1, 0, 1); + on_each_cpu(adjust_pge, (void *)1, 1); } put_online_cpus(); } diff --git a/fs/buffer.c b/fs/buffer.c index a073f3f4f01..5c23ef560d0 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1464,7 +1464,7 @@ static void invalidate_bh_lru(void *arg) void invalidate_bh_lrus(void) { - on_each_cpu(invalidate_bh_lru, NULL, 1, 1); + on_each_cpu(invalidate_bh_lru, NULL, 1); } EXPORT_SYMBOL_GPL(invalidate_bh_lrus); diff --git a/include/linux/smp.h b/include/linux/smp.h index 338cad1b954..55261101d09 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -89,7 +89,7 @@ static inline void init_call_single_data(void) /* * Call a function on all processors */ -int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait); +int on_each_cpu(void (*func) (void *info), void *info, int wait); #define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ #define MSG_ALL 0x8001 @@ -121,7 +121,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) } #define smp_call_function(func, info, wait) \ (up_smp_call_function(func, info)) -#define on_each_cpu(func,info,retry,wait) \ +#define on_each_cpu(func,info,wait) \ ({ \ local_irq_disable(); \ func(info); \ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 421be5fe5cc..50e8616d795 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -623,7 +623,7 @@ static void retrigger_next_event(void *arg) void clock_was_set(void) { /* Retrigger the CPU local events everywhere */ - on_each_cpu(retrigger_next_event, NULL, 0, 1); + on_each_cpu(retrigger_next_event, NULL, 1); } /* diff --git a/kernel/profile.c b/kernel/profile.c index ae7ead82cbc..58926411eb2 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -252,7 +252,7 @@ static void profile_flip_buffers(void) mutex_lock(&profile_flip_mutex); j = per_cpu(cpu_profile_flip, get_cpu()); put_cpu(); - on_each_cpu(__profile_flip_buffers, NULL, 0, 1); + on_each_cpu(__profile_flip_buffers, NULL, 1); for_each_online_cpu(cpu) { struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j]; for (i = 0; i < NR_PROFILE_HIT; ++i) { @@ -275,7 +275,7 @@ static void profile_discard_flip_buffers(void) mutex_lock(&profile_flip_mutex); i = per_cpu(cpu_profile_flip, get_cpu()); put_cpu(); - on_each_cpu(__profile_flip_buffers, NULL, 0, 1); + on_each_cpu(__profile_flip_buffers, NULL, 1); for_each_online_cpu(cpu) { struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i]; memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit)); @@ -558,7 +558,7 @@ static int __init create_hash_tables(void) out_cleanup: prof_on = 0; smp_mb(); - on_each_cpu(profile_nop, NULL, 0, 1); + on_each_cpu(profile_nop, NULL, 1); for_each_online_cpu(cpu) { struct page *page; diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c09605f8d16..6addab5e6d8 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -127,7 +127,7 @@ void rcu_barrier(void) * until all the callbacks are queued. */ rcu_read_lock(); - on_each_cpu(rcu_barrier_func, NULL, 0, 1); + on_each_cpu(rcu_barrier_func, NULL, 1); rcu_read_unlock(); wait_for_completion(&rcu_barrier_completion); mutex_unlock(&rcu_barrier_mutex); diff --git a/kernel/softirq.c b/kernel/softirq.c index d73afb4764e..c159fd09477 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -674,7 +674,7 @@ __init int spawn_ksoftirqd(void) /* * Call a function on all processors */ -int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait) +int on_each_cpu(void (*func) (void *info), void *info, int wait) { int ret = 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2f552955a02..53242344a77 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -918,7 +918,7 @@ void drain_local_pages(void *arg) */ void drain_all_pages(void) { - on_each_cpu(drain_local_pages, NULL, 0, 1); + on_each_cpu(drain_local_pages, NULL, 1); } #ifdef CONFIG_HIBERNATION diff --git a/mm/slab.c b/mm/slab.c index 046607f05f3..0772abb412b 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2454,7 +2454,7 @@ static void drain_cpu_caches(struct kmem_cache *cachep) struct kmem_list3 *l3; int node; - on_each_cpu(do_drain, cachep, 1, 1); + on_each_cpu(do_drain, cachep, 1); check_irq_on(); for_each_online_node(node) { l3 = cachep->nodelists[node]; @@ -3939,7 +3939,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, } new->cachep = cachep; - on_each_cpu(do_ccupdate_local, (void *)new, 1, 1); + on_each_cpu(do_ccupdate_local, (void *)new, 1); check_irq_on(); cachep->batchcount = batchcount; diff --git a/mm/slub.c b/mm/slub.c index 0987d1cd943..44715eb70c0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1497,7 +1497,7 @@ static void flush_cpu_slab(void *d) static void flush_all(struct kmem_cache *s) { #ifdef CONFIG_SMP - on_each_cpu(flush_cpu_slab, s, 1, 1); + on_each_cpu(flush_cpu_slab, s, 1); #else unsigned long flags; diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 94d5a45c3a5..a178e27e7b1 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -545,7 +545,7 @@ out: */ static void iucv_disable(void) { - on_each_cpu(iucv_retrieve_cpu, NULL, 0, 1); + on_each_cpu(iucv_retrieve_cpu, NULL, 1); kfree(iucv_path_table); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ea1f595f8a8..d4eae6af073 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1286,7 +1286,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, * in vmx root mode. */ printk(KERN_INFO "kvm: exiting hardware virtualization\n"); - on_each_cpu(hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 1); } return NOTIFY_OK; } @@ -1479,7 +1479,7 @@ int kvm_init(void *opaque, unsigned int vcpu_size, goto out_free_1; } - on_each_cpu(hardware_enable, NULL, 0, 1); + on_each_cpu(hardware_enable, NULL, 1); r = register_cpu_notifier(&kvm_cpu_notifier); if (r) goto out_free_2; @@ -1525,7 +1525,7 @@ out_free_3: unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); out_free_2: - on_each_cpu(hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 1); out_free_1: kvm_arch_hardware_unsetup(); out_free_0: @@ -1547,7 +1547,7 @@ void kvm_exit(void) sysdev_class_unregister(&kvm_sysdev_class); unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); - on_each_cpu(hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 1); kvm_arch_hardware_unsetup(); kvm_arch_exit(); kvm_exit_debug(); -- cgit v1.2.3-70-g09d2 From 38510754a50192a072210e24fdc4ae65592182f0 Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Mon, 14 Jan 2008 23:35:32 +0100 Subject: avr32: Use a quicklist for PTE allocation as well Using a quicklist to allocate PTEs might be slightly faster than using the page allocator directly since we might avoid zeroing the page after each allocation. Signed-off-by: Haavard Skinnemoen --- include/asm-avr32/pgalloc.h | 28 +++++++++++++++------------- mm/Kconfig | 2 +- 2 files changed, 16 insertions(+), 14 deletions(-) (limited to 'mm') diff --git a/include/asm-avr32/pgalloc.h b/include/asm-avr32/pgalloc.h index a291f59659c..64082132394 100644 --- a/include/asm-avr32/pgalloc.h +++ b/include/asm-avr32/pgalloc.h @@ -13,6 +13,7 @@ #include #define QUICK_PGD 0 /* Preserve kernel mappings over free */ +#define QUICK_PT 1 /* Zero on free */ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) @@ -52,34 +53,34 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte; - - pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); - - return pte; + return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); } -static inline struct page *pte_alloc_one(struct mm_struct *mm, +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *pte; + struct page *page; + void *pg; - pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); - if (!pte) + pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + if (!pg) return NULL; - pgtable_page_ctor(pte); - return pte; + + page = virt_to_page(pg); + pgtable_page_ctor(page); + + return page; } static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - free_page((unsigned long)pte); + quicklist_free(QUICK_PT, NULL, pte); } static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { pgtable_page_dtor(pte); - __free_page(pte); + quicklist_free_page(QUICK_PT, NULL, pte); } #define __pte_free_tlb(tlb,pte) \ @@ -91,6 +92,7 @@ do { \ static inline void check_pgt_cache(void) { quicklist_trim(QUICK_PGD, NULL, 25, 16); + quicklist_trim(QUICK_PT, NULL, 25, 16); } #endif /* __ASM_AVR32_PGALLOC_H */ diff --git a/mm/Kconfig b/mm/Kconfig index 3aa819d628c..60b26195278 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -199,7 +199,7 @@ config BOUNCE config NR_QUICK int depends on QUICKLIST - default "2" if SUPERH + default "2" if SUPERH || AVR32 default "1" config VIRT_TO_BUS -- cgit v1.2.3-70-g09d2 From cc1050bafebfb1d7935331282e948b5016318192 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 13 Jun 2008 19:08:52 -0700 Subject: x86: replace shrink_active_range() with remove_active_range() in case we have kva before ramdisk on a node, we still need to use those ranges. v2: reserve_early kva ram area, in case there are holes in highmem, to avoid those area could be treat as free high pages. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/discontig_32.c | 45 ++++++++++++++++++++++++--------------------- include/linux/mm.h | 3 ++- mm/page_alloc.c | 29 +++++++++++++++++++++++------ 3 files changed, 49 insertions(+), 28 deletions(-) (limited to 'mm') diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index accc7c6c57f..c3f119e99e0 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -230,8 +230,8 @@ static unsigned long calculate_numa_remap_pages(void) unsigned long size, reserve_pages = 0; for_each_online_node(nid) { - u64 node_end_target; - u64 node_end_final; + u64 node_kva_target; + u64 node_kva_final; /* * The acpi/srat node info can show hot-add memroy zones @@ -254,42 +254,45 @@ static unsigned long calculate_numa_remap_pages(void) /* now the roundup is correct, convert to PAGE_SIZE pages */ size = size * PTRS_PER_PTE; - node_end_target = round_down(node_end_pfn[nid] - size, + node_kva_target = round_down(node_end_pfn[nid] - size, PTRS_PER_PTE); - node_end_target <<= PAGE_SHIFT; + node_kva_target <<= PAGE_SHIFT; do { - node_end_final = find_e820_area(node_end_target, + node_kva_final = find_e820_area(node_kva_target, ((u64)node_end_pfn[nid])<>PAGE_SHIFT) > (node_start_pfn[nid])); + node_kva_target -= LARGE_PAGE_BYTES; + } while (node_kva_final == -1ULL && + (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); - if (node_end_final == -1ULL) + if (node_kva_final == -1ULL) panic("Can not get kva ram\n"); - printk("Reserving %ld pages of KVA for lmem_map of node %d\n", - size, nid); node_remap_size[nid] = size; node_remap_offset[nid] = reserve_pages; reserve_pages += size; - printk("Shrinking node %d from %ld pages to %lld pages\n", - nid, node_end_pfn[nid], node_end_final>>PAGE_SHIFT); + printk("Reserving %ld pages of KVA for lmem_map of node %d at %llx\n", + size, nid, node_kva_final>>PAGE_SHIFT); /* * prevent kva address below max_low_pfn want it on system * with less memory later. * layout will be: KVA address , KVA RAM + * + * we are supposed to only record the one less then max_low_pfn + * but we could have some hole in high memory, and it will only + * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide + * to use it as free. + * So reserve_early here, hope we don't run out of that array */ - if ((node_end_final>>PAGE_SHIFT) < max_low_pfn) - reserve_early(node_end_final, - node_end_final+(((u64)size)<>PAGE_SHIFT; - node_remap_start_pfn[nid] = node_end_pfn[nid]; - shrink_active_range(nid, node_end_pfn[nid]); + reserve_early(node_kva_final, + node_kva_final+(((u64)size)<>PAGE_SHIFT; + remove_active_range(nid, node_remap_start_pfn[nid], + node_remap_start_pfn[nid] + size); } printk("Reserving total of %ld pages for numa KVA remap\n", reserve_pages); diff --git a/include/linux/mm.h b/include/linux/mm.h index ce8e397a61f..034a3156d2f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -998,7 +998,8 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat, extern void free_area_init_nodes(unsigned long *max_zone_pfn); extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void shrink_active_range(unsigned int nid, unsigned long new_end_pfn); +extern void remove_active_range(unsigned int nid, unsigned long start_pfn, + unsigned long end_pfn); extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eee5ba7509c..d80e1868e57 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3552,30 +3552,47 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, } /** - * shrink_active_range - Shrink an existing registered range of PFNs + * remove_active_range - Shrink an existing registered range of PFNs * @nid: The node id the range is on that should be shrunk - * @new_end_pfn: The new PFN of the range + * @start_pfn: The new PFN of the range + * @end_pfn: The new PFN of the range * * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. * The map is kept near the end physical page range that has already been * registered. This function allows an arch to shrink an existing registered * range. */ -void __init shrink_active_range(unsigned int nid, unsigned long new_end_pfn) +void __init remove_active_range(unsigned int nid, unsigned long start_pfn, + unsigned long end_pfn) { int i, j; int removed = 0; + printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n", + nid, start_pfn, end_pfn); + /* Find the old active region end and shrink */ for_each_active_range_index_in_nid(i, nid) { - if (early_node_map[i].start_pfn >= new_end_pfn) { + if (early_node_map[i].start_pfn >= start_pfn && + early_node_map[i].end_pfn <= end_pfn) { /* clear it */ + early_node_map[i].start_pfn = 0; early_node_map[i].end_pfn = 0; removed = 1; continue; } - if (early_node_map[i].end_pfn > new_end_pfn) { - early_node_map[i].end_pfn = new_end_pfn; + if (early_node_map[i].start_pfn < start_pfn && + early_node_map[i].end_pfn > start_pfn) { + unsigned long temp_end_pfn = early_node_map[i].end_pfn; + early_node_map[i].end_pfn = start_pfn; + if (temp_end_pfn > end_pfn) + add_active_range(nid, end_pfn, temp_end_pfn); + continue; + } + if (early_node_map[i].start_pfn >= start_pfn && + early_node_map[i].end_pfn > end_pfn && + early_node_map[i].start_pfn < end_pfn) { + early_node_map[i].start_pfn = end_pfn; continue; } } -- cgit v1.2.3-70-g09d2 From b5bc6c0e55000dab86b73f838f5ad02908b23755 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 14 Jun 2008 18:32:52 -0700 Subject: x86, mm: use add_highpages_with_active_regions() for high pages init v2 use early_node_map to init high pages, so we can remove page_is_ram() and page_is_reserved_early() in the big loop with add_one_highpage also remove page_is_reserved_early(), it is not needed anymore. v2: fix the build of other platforms Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 11 -------- arch/x86/mm/discontig_32.c | 19 ++++++-------- arch/x86/mm/init_32.c | 62 ++++++++++++++++++++++++++++++++++++++-------- include/asm-x86/e820.h | 1 - include/asm-x86/highmem.h | 3 +++ include/linux/mm.h | 2 ++ mm/page_alloc.c | 8 ++++++ 7 files changed, 71 insertions(+), 35 deletions(-) (limited to 'mm') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 5051ce744b4..ed46b7a6bc1 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -612,17 +612,6 @@ void __init free_early(u64 start, u64 end) early_res[j - 1].end = 0; } -int __init page_is_reserved_early(unsigned long pagenr) -{ - u64 start = (u64)pagenr << PAGE_SHIFT; - int i; - struct early_res *r; - - i = find_overlapped_early(start, start + PAGE_SIZE); - r = &early_res[i]; - return (i < MAX_EARLY_RES && r->end); -} - void __init early_res_to_bootmem(u64 start, u64 end) { int i; diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index c3f119e99e0..7c4d0255f8d 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -100,7 +100,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, #endif extern unsigned long find_max_low_pfn(void); -extern void add_one_highpage_init(struct page *, int, int); extern unsigned long highend_pfn, highstart_pfn; #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) @@ -432,10 +431,10 @@ void __init set_highmem_pages_init(int bad_ppro) { #ifdef CONFIG_HIGHMEM struct zone *zone; - struct page *page; + int nid; for_each_zone(zone) { - unsigned long node_pfn, zone_start_pfn, zone_end_pfn; + unsigned long zone_start_pfn, zone_end_pfn; if (!is_highmem(zone)) continue; @@ -443,16 +442,12 @@ void __init set_highmem_pages_init(int bad_ppro) zone_start_pfn = zone->zone_start_pfn; zone_end_pfn = zone_start_pfn + zone->spanned_pages; + nid = zone_to_nid(zone); printk("Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, zone_to_nid(zone), - zone_start_pfn, zone_end_pfn); - - for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { - if (!pfn_valid(node_pfn)) - continue; - page = pfn_to_page(node_pfn); - add_one_highpage_init(page, node_pfn, bad_ppro); - } + zone->name, nid, zone_start_pfn, zone_end_pfn); + + add_highpages_with_active_regions(nid, zone_start_pfn, + zone_end_pfn, bad_ppro); } totalram_pages += totalhigh_pages; #endif diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index abadb1da70d..ba07a489230 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -287,10 +287,10 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) +static void __init +add_one_highpage_init(struct page *page, int pfn, int bad_ppro) { - if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn)) && - !page_is_reserved_early(pfn)) { + if (!(bad_ppro && page_kills_ppro(pfn))) { ClearPageReserved(page); init_page_count(page); __free_page(page); @@ -299,18 +299,58 @@ void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) SetPageReserved(page); } +struct add_highpages_data { + unsigned long start_pfn; + unsigned long end_pfn; + int bad_ppro; +}; + +static void __init add_highpages_work_fn(unsigned long start_pfn, + unsigned long end_pfn, void *datax) +{ + int node_pfn; + struct page *page; + unsigned long final_start_pfn, final_end_pfn; + struct add_highpages_data *data; + int bad_ppro; + + data = (struct add_highpages_data *)datax; + bad_ppro = data->bad_ppro; + + final_start_pfn = max(start_pfn, data->start_pfn); + final_end_pfn = min(end_pfn, data->end_pfn); + if (final_start_pfn >= final_end_pfn) + return; + + for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; + node_pfn++) { + if (!pfn_valid(node_pfn)) + continue; + page = pfn_to_page(node_pfn); + add_one_highpage_init(page, node_pfn, bad_ppro); + } + +} + +void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn, + int bad_ppro) +{ + struct add_highpages_data data; + + data.start_pfn = start_pfn; + data.end_pfn = end_pfn; + data.bad_ppro = bad_ppro; + + work_with_active_regions(nid, add_highpages_work_fn, &data); +} + #ifndef CONFIG_NUMA static void __init set_highmem_pages_init(int bad_ppro) { - int pfn; + add_highpages_with_active_regions(0, highstart_pfn, highend_pfn, + bad_ppro); - for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) { - /* - * Holes under sparsemem might not have no mem_map[]: - */ - if (pfn_valid(pfn)) - add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); - } totalram_pages += totalhigh_pages; } #endif /* !CONFIG_NUMA */ diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 6b0ce745a60..55d31059690 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -86,7 +86,6 @@ extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); extern void reserve_early(u64 start, u64 end, char *name); extern void free_early(u64 start, u64 end); extern void early_res_to_bootmem(u64 start, u64 end); -extern int page_is_reserved_early(unsigned long pagenr); extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); extern unsigned long e820_end_of_ram(void); diff --git a/include/asm-x86/highmem.h b/include/asm-x86/highmem.h index e153f3b4477..85c4fea41ff 100644 --- a/include/asm-x86/highmem.h +++ b/include/asm-x86/highmem.h @@ -74,6 +74,9 @@ struct page *kmap_atomic_to_page(void *ptr); #define flush_cache_kmaps() do { } while (0) +extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn, int bad_ppro); + #endif /* __KERNEL__ */ #endif /* _ASM_HIGHMEM_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 034a3156d2f..e4de460907c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,6 +1011,8 @@ extern unsigned long find_min_pfn_with_active_regions(void); extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); +typedef void (*work_fn_t)(unsigned long, unsigned long, void *); +extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID extern int early_pfn_to_nid(unsigned long pfn); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d80e1868e57..41c6e3aa059 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2929,6 +2929,14 @@ void __init free_bootmem_with_active_regions(int nid, } } +void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) +{ + int i; + + for_each_active_range_index_in_nid(i, nid) + work_fn(early_node_map[i].start_pfn, early_node_map[i].end_pfn, + data); +} /** * sparse_memory_present_with_active_regions - Call memory_present for each active range * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. -- cgit v1.2.3-70-g09d2 From d52d53b8a5b258bfaab9223a5e7284fcfdd48577 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 16 Jun 2008 20:10:55 -0700 Subject: RFC x86: try to remove arch_get_ram_range want to remove arch_get_ram_range, and use early_node_map instead. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 6 ++++-- drivers/pci/intel-iommu.c | 51 ++++++++++++++++++++++++++++++++++------------- include/linux/mm.h | 2 +- mm/page_alloc.c | 10 +++++++--- 4 files changed, 49 insertions(+), 20 deletions(-) (limited to 'mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 65d55056b6e..a0484adbf59 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -298,7 +298,7 @@ struct add_highpages_data { unsigned long end_pfn; }; -static void __init add_highpages_work_fn(unsigned long start_pfn, +static int __init add_highpages_work_fn(unsigned long start_pfn, unsigned long end_pfn, void *datax) { int node_pfn; @@ -311,7 +311,7 @@ static void __init add_highpages_work_fn(unsigned long start_pfn, final_start_pfn = max(start_pfn, data->start_pfn); final_end_pfn = min(end_pfn, data->end_pfn); if (final_start_pfn >= final_end_pfn) - return; + return 0; for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; node_pfn++) { @@ -321,6 +321,8 @@ static void __init add_highpages_work_fn(unsigned long start_pfn, add_one_highpage_init(page, node_pfn); } + return 0; + } void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 66c0fd21894..bb0642318a9 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1637,12 +1637,43 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, } #ifdef CONFIG_DMAR_GFX_WA -extern int arch_get_ram_range(int slot, u64 *addr, u64 *size); +struct iommu_prepare_data { + struct pci_dev *pdev; + int ret; +}; + +static int __init iommu_prepare_work_fn(unsigned long start_pfn, + unsigned long end_pfn, void *datax) +{ + struct iommu_prepare_data *data; + + data = (struct iommu_prepare_data *)datax; + + data->ret = iommu_prepare_identity_map(data->pdev, + start_pfn<ret; + +} + +static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev) +{ + int nid; + struct iommu_prepare_data data; + + data.pdev = pdev; + data.ret = 0; + + for_each_online_node(nid) { + work_with_active_regions(nid, iommu_prepare_work_fn, &data); + if (data.ret) + return data.ret; + } + return data.ret; +} + static void __init iommu_prepare_gfx_mapping(void) { struct pci_dev *pdev = NULL; - u64 base, size; - int slot; int ret; for_each_pci_dev(pdev) { @@ -1651,17 +1682,9 @@ static void __init iommu_prepare_gfx_mapping(void) continue; printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n", pci_name(pdev)); - slot = arch_get_ram_range(0, &base, &size); - while (slot >= 0) { - ret = iommu_prepare_identity_map(pdev, - base, base + size); - if (ret) - goto error; - slot = arch_get_ram_range(slot, &base, &size); - } - continue; -error: - printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); + ret = iommu_prepare_with_active_regions(pdev); + if (ret) + printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); } } #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 3d647b24041..cf1cd3a2ed7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,7 +1011,7 @@ extern unsigned long find_min_pfn_with_active_regions(void); extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); -typedef void (*work_fn_t)(unsigned long, unsigned long, void *); +typedef int (*work_fn_t)(unsigned long, unsigned long, void *); extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 41c6e3aa059..e25b6b24f84 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2932,10 +2932,14 @@ void __init free_bootmem_with_active_regions(int nid, void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) { int i; + int ret; - for_each_active_range_index_in_nid(i, nid) - work_fn(early_node_map[i].start_pfn, early_node_map[i].end_pfn, - data); + for_each_active_range_index_in_nid(i, nid) { + ret = work_fn(early_node_map[i].start_pfn, + early_node_map[i].end_pfn, data); + if (ret) + break; + } } /** * sparse_memory_present_with_active_regions - Call memory_present for each active range -- cgit v1.2.3-70-g09d2 From e2fc252e0ce695b4c4abe27bb073c35bd0d73252 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Sun, 22 Jun 2008 07:22:12 -0700 Subject: x86 boot: show pfn addresses in hex not decimal in some kernel info printks Page frame numbers (the portion of physical addresses above the low order page offsets) are displayed in several kernel debug and info prints in decimal, not hex. Decimal addresse are unreadable. Use hex. Signed-off-by: Paul Jackson Cc: "Yinghai Lu" Cc: "Jack Steiner" Cc: "Mike Travis" Cc: "Huang Cc: Ying" Cc: "Andi Kleen" Cc: "Andrew Morton" Cc: Paul Jackson Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 2 +- mm/page_alloc.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'mm') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 600c9de237a..512f779fc6a 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -990,7 +990,7 @@ unsigned long __init e820_end_of_ram(void) if (last_pfn > end_user_pfn) last_pfn = end_user_pfn; - printk(KERN_INFO "last_pfn = %lu max_arch_pfn = %lu\n", + printk(KERN_INFO "last_pfn = 0x%lx max_arch_pfn = 0x%lx\n", last_pfn, max_arch_pfn); return last_pfn; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e25b6b24f84..c09c2c8d2c6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3520,7 +3520,7 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, { int i; - printk(KERN_DEBUG "Entering add_active_range(%d, %lu, %lu) " + printk(KERN_DEBUG "Entering add_active_range(%d, 0x%lx, 0x%lx) " "%d entries of %d used\n", nid, start_pfn, end_pfn, nr_nodemap_entries, MAX_ACTIVE_REGIONS); @@ -3936,7 +3936,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) for (i = 0; i < MAX_NR_ZONES; i++) { if (i == ZONE_MOVABLE) continue; - printk(" %-8s %8lu -> %8lu\n", + printk(" %-8s 0x%8lx -> 0x%8lx\n", zone_names[i], arch_zone_lowest_possible_pfn[i], arch_zone_highest_possible_pfn[i]); @@ -3952,7 +3952,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Print out the early_node_map[] */ printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); for (i = 0; i < nr_nodemap_entries; i++) - printk(" %3d: %8lu -> %8lu\n", early_node_map[i].nid, + printk(" %3d: 0x%8lx -> 0x%8lx\n", early_node_map[i].nid, early_node_map[i].start_pfn, early_node_map[i].end_pfn); -- cgit v1.2.3-70-g09d2 From 2bc0d2615a15a93d344abbe8cb1b9056122bce9d Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Sun, 22 Jun 2008 07:22:17 -0700 Subject: x86 boot: more consistently use type int for node ids Everywhere I look, node id's are of type 'int', except in this one case, which has 'unsigned long'. Change this one to 'int' as well. There is nothing special about the way this variable 'nid' is used in this routine to justify using an unusual type here. Signed-off-by: Paul Jackson Cc: "Yinghai Lu" Cc: "Jack Steiner" Cc: "Mike Travis" Cc: "Huang Cc: Ying" Cc: "Andi Kleen" Cc: "Andrew Morton" Cc: Paul Jackson Signed-off-by: Ingo Molnar --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c09c2c8d2c6..b604c64a033 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3669,7 +3669,7 @@ static void __init sort_node_map(void) } /* Find the lowest pfn for a node */ -unsigned long __init find_min_pfn_for_node(unsigned long nid) +unsigned long __init find_min_pfn_for_node(int nid) { int i; unsigned long min_pfn = ULONG_MAX; @@ -3680,7 +3680,7 @@ unsigned long __init find_min_pfn_for_node(unsigned long nid) if (min_pfn == ULONG_MAX) { printk(KERN_WARNING - "Could not find start_pfn for node %lu\n", nid); + "Could not find start_pfn for node %d\n", nid); return 0; } -- cgit v1.2.3-70-g09d2 From 5dab8ec139be215fbaba216fb4aea914d0f4dac5 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 25 Jun 2008 05:44:40 -0700 Subject: mm, generic, x86 boot: more tweaks to hex prints of some pfn addresses Fix some problems with (and applies on top of) a previous patch: x86 boot: show pfn addresses in hex not decimal in some kernel info printks Primarily change "0x%8lx" format, which displays with a right aligned space filled hex number (spaces between the "0x" prefix and the number), into "%0#10lx" format, which zero fills instead of space fills, and which uses the printf flag '#' to request the "0x" prefix instead of hard coding it. Also replace some other "0x%lx" formats with "%#lx", making use of the '#' printf flag again. Signed-off-by: Paul Jackson Cc: "Yinghai Lu" Cc: "Jack Steiner" Cc: "Mike Travis" Cc: "Huang Cc: Ying" Cc: "Andi Kleen" Cc: "Andrew Morton" Cc: Paul Jackson Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 2 +- mm/page_alloc.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'mm') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 22cfd665224..1dcb66533df 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1014,7 +1014,7 @@ unsigned long __init e820_end_of_ram(void) if (last_pfn > end_user_pfn) last_pfn = end_user_pfn; - printk(KERN_INFO "last_pfn = 0x%lx max_arch_pfn = 0x%lx\n", + printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", last_pfn, max_arch_pfn); return last_pfn; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b604c64a033..f024b9b3a2a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3520,7 +3520,7 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, { int i; - printk(KERN_DEBUG "Entering add_active_range(%d, 0x%lx, 0x%lx) " + printk(KERN_DEBUG "Entering add_active_range(%d, %#lx, %#lx) " "%d entries of %d used\n", nid, start_pfn, end_pfn, nr_nodemap_entries, MAX_ACTIVE_REGIONS); @@ -3936,7 +3936,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) for (i = 0; i < MAX_NR_ZONES; i++) { if (i == ZONE_MOVABLE) continue; - printk(" %-8s 0x%8lx -> 0x%8lx\n", + printk(" %-8s %0#10lx -> %0#10lx\n", zone_names[i], arch_zone_lowest_possible_pfn[i], arch_zone_highest_possible_pfn[i]); @@ -3952,7 +3952,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Print out the early_node_map[] */ printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); for (i = 0; i < nr_nodemap_entries; i++) - printk(" %3d: 0x%8lx -> 0x%8lx\n", early_node_map[i].nid, + printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid, early_node_map[i].start_pfn, early_node_map[i].end_pfn); -- cgit v1.2.3-70-g09d2 From bdb21928512a860a60e6a24a849dc5b63cbaf96a Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Thu, 10 Jul 2008 22:21:58 +0200 Subject: slub: Fix use-after-preempt of per-CPU data structure Vegard Nossum reported a crash in kmem_cache_alloc(): BUG: unable to handle kernel paging request at da87d000 IP: [] kmem_cache_alloc+0xc7/0xe0 *pde = 28180163 *pte = 1a87d160 Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC Pid: 3850, comm: grep Not tainted (2.6.26-rc9-00059-gb190333 #5) EIP: 0060:[] EFLAGS: 00210203 CPU: 0 EIP is at kmem_cache_alloc+0xc7/0xe0 EAX: 00000000 EBX: da87c100 ECX: 1adad71a EDX: 6b6b6b6b ESI: 00200282 EDI: da87d000 EBP: f60bfe74 ESP: f60bfe54 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 and analyzed it: "The register %ecx looks innocent but is very important here. The disassembly: mov %edx,%ecx shr $0x2,%ecx rep stos %eax,%es:(%edi) <-- the fault So %ecx has been loaded from %edx... which is 0x6b6b6b6b/POISON_FREE. (0x6b6b6b6b >> 2 == 0x1adadada.) %ecx is the counter for the memset, from here: memset(object, 0, c->objsize); i.e. %ecx was loaded from c->objsize, so "c" must have been freed. Where did "c" come from? Uh-oh... c = get_cpu_slab(s, smp_processor_id()); This looks like it has very much to do with CPU hotplug/unplug. Is there a race between SLUB/hotplug since the CPU slab is used after it has been freed?" Good analysis. Yeah, it's possible that a caller of kmem_cache_alloc() -> slab_alloc() can be migrated on another CPU right after local_irq_restore() and before memset(). The inital cpu can become offline in the mean time (or a migration is a consequence of the CPU going offline) so its 'kmem_cache_cpu' structure gets freed ( slab_cpuup_callback). At some point of time the caller continues on another CPU having an obsolete pointer... Signed-off-by: Dmitry Adamushko Reported-by: Vegard Nossum Acked-by: Ingo Molnar Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- mm/slub.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 1a427c0ae83..315c392253c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1628,9 +1628,11 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, void **object; struct kmem_cache_cpu *c; unsigned long flags; + unsigned int objsize; local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); + objsize = c->objsize; if (unlikely(!c->freelist || !node_match(c, node))) object = __slab_alloc(s, gfpflags, node, addr, c); @@ -1643,7 +1645,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, local_irq_restore(flags); if (unlikely((gfpflags & __GFP_ZERO) && object)) - memset(object, 0, c->objsize); + memset(object, 0, objsize); return object; } -- cgit v1.2.3-70-g09d2 From f4c0a0fdfae708f7aa438c27a380ed4071294e11 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: vfs: export filemap_fdatawrite_range() Make filemap_fdatawrite_range() function public, so that it can later be used in ordered mode rewrite by JBD/JBD2. Signed-off-by: Jan Kara --- include/linux/fs.h | 2 ++ mm/filemap.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/include/linux/fs.h b/include/linux/fs.h index d8e2762ed14..97f992adc62 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1740,6 +1740,8 @@ extern int wait_on_page_writeback_range(struct address_space *mapping, pgoff_t start, pgoff_t end); extern int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode); +extern int filemap_fdatawrite_range(struct address_space *mapping, + loff_t start, loff_t end); extern long do_fsync(struct file *file, int datasync); extern void sync_supers(void); diff --git a/mm/filemap.c b/mm/filemap.c index 1e6a7d34874..65d9d9e2b75 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -236,11 +236,12 @@ int filemap_fdatawrite(struct address_space *mapping) } EXPORT_SYMBOL(filemap_fdatawrite); -static int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, +int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end) { return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL); } +EXPORT_SYMBOL(filemap_fdatawrite_range); /** * filemap_flush - mostly a non-blocking flush -- cgit v1.2.3-70-g09d2 From 06d6cf6959d22037fcec598f4f954db5db3d7356 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: mm: Add range_cont mode for writeback Filesystems like ext4 needs to start a new transaction in the writepages for block allocation. This happens with delayed allocation and there is limit to how many credits we can request from the journal layer. So we call write_cache_pages multiple times with wbc->nr_to_write set to the maximum possible value limitted by the max journal credits available. Add a new mode to writeback that enables us to handle this behaviour. In the new mode we update the wbc->range_start to point to the new offset to be written. Next call to call to write_cache_pages will start writeout from specified range_start offset. In the new mode we also limit writing to the specified wbc->range_end. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Acked-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- include/linux/writeback.h | 1 + mm/page-writeback.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'mm') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f462439cc28..0d8573e6b9e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -63,6 +63,7 @@ struct writeback_control { unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned more_io:1; /* more io to be dispatched */ + unsigned range_cont:1; }; /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 789b6adbef3..ded57d52806 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -956,6 +956,9 @@ retry: } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; + + if (wbc->range_cont) + wbc->range_start = index << PAGE_CACHE_SHIFT; return ret; } EXPORT_SYMBOL(write_cache_pages); -- cgit v1.2.3-70-g09d2 From 421c175c4d609864350df495b34d3e99f9fb1bdd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 14 Jul 2008 09:59:18 +0200 Subject: [S390] Add support for memory hot-add. Cc: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 4 ++++ arch/s390/mm/init.c | 19 +++++++++++++++++++ include/asm-s390/sparsemem.h | 4 ++-- mm/Kconfig | 2 +- 4 files changed, 26 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6d0d31651f0..5dc8f8028d5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -313,6 +313,10 @@ config ARCH_SPARSEMEM_DEFAULT config ARCH_SELECT_MEMORY_MODEL def_bool y +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y + depends on SPARSEMEM + source "mm/Kconfig" comment "I/O subsystem configuration" diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 05598649b32..388cc742005 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -202,3 +202,22 @@ void free_initrd_mem(unsigned long start, unsigned long end) } } #endif + +#ifdef CONFIG_MEMORY_HOTPLUG +int arch_add_memory(int nid, u64 start, u64 size) +{ + struct pglist_data *pgdat; + struct zone *zone; + int rc; + + pgdat = NODE_DATA(nid); + zone = pgdat->node_zones + ZONE_NORMAL; + rc = vmem_add_mapping(start, size); + if (rc) + return rc; + rc = __add_pages(zone, PFN_DOWN(start), PFN_DOWN(size)); + if (rc) + vmem_remove_mapping(start, size); + return rc; +} +#endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/include/asm-s390/sparsemem.h b/include/asm-s390/sparsemem.h index 06dfdab6c0e..545d219e6a2 100644 --- a/include/asm-s390/sparsemem.h +++ b/include/asm-s390/sparsemem.h @@ -1,15 +1,15 @@ #ifndef _ASM_S390_SPARSEMEM_H #define _ASM_S390_SPARSEMEM_H -#define SECTION_SIZE_BITS 25 - #ifdef CONFIG_64BIT +#define SECTION_SIZE_BITS 28 #define MAX_PHYSADDR_BITS 42 #define MAX_PHYSMEM_BITS 42 #else +#define SECTION_SIZE_BITS 25 #define MAX_PHYSADDR_BITS 31 #define MAX_PHYSMEM_BITS 31 diff --git a/mm/Kconfig b/mm/Kconfig index 3aa819d628c..4242743b981 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -129,7 +129,7 @@ config MEMORY_HOTPLUG bool "Allow for memory hot-add" depends on SPARSEMEM || X86_64_ACPI_NUMA depends on HOTPLUG && !HIBERNATION && ARCH_ENABLE_MEMORY_HOTPLUG - depends on (IA64 || X86 || PPC64 || SUPERH) + depends on (IA64 || X86 || PPC64 || SUPERH || S390) comment "Memory hotplug is currently incompatible with Software Suspend" depends on SPARSEMEM && HOTPLUG && HIBERNATION -- cgit v1.2.3-70-g09d2 From 7daf705f362e349983e92037a198b8821db198af Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 14 Jul 2008 12:12:53 -0700 Subject: Start using the new '%pS' infrastructure to print symbols This simplifies the code significantly, and was the whole point of the exercise. Signed-off-by: Linus Torvalds --- arch/x86/kernel/traps_64.c | 25 +------------------------ mm/slub.c | 5 ++--- 2 files changed, 3 insertions(+), 27 deletions(-) (limited to 'mm') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index adff76ea97c..f1a95d10595 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -104,30 +104,7 @@ int kstack_depth_to_print = 12; void printk_address(unsigned long address, int reliable) { -#ifdef CONFIG_KALLSYMS - unsigned long offset = 0, symsize; - const char *symname; - char *modname; - char *delim = ":"; - char namebuf[KSYM_NAME_LEN]; - char reliab[4] = ""; - - symname = kallsyms_lookup(address, &symsize, &offset, - &modname, namebuf); - if (!symname) { - printk(" [<%016lx>]\n", address); - return; - } - if (!reliable) - strcpy(reliab, "? "); - - if (!modname) - modname = delim = ""; - printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n", - address, reliab, delim, modname, delim, symname, offset, symsize); -#else - printk(" [<%016lx>]\n", address); -#endif + printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address); } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, diff --git a/mm/slub.c b/mm/slub.c index 315c392253c..5f6e2c4a2ba 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -431,9 +431,8 @@ static void print_track(const char *s, struct track *t) if (!t->addr) return; - printk(KERN_ERR "INFO: %s in ", s); - __print_symbol("%s", (unsigned long)t->addr); - printk(" age=%lu cpu=%u pid=%d\n", jiffies - t->when, t->cpu, t->pid); + printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n", + s, t->addr, jiffies - t->when, t->cpu, t->pid); } static void print_tracking(struct kmem_cache *s, void *object) -- cgit v1.2.3-70-g09d2 From 0937502af7c9b648ed4e884ccb7f504b01a005a1 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 28 May 2008 10:32:22 -0700 Subject: slub: Add check for kfree() of non slab objects. We can detect kfree()s on non slab objects by checking for PageCompound(). Works in the same way as for ksize. This helped me catch an invalid kfree(). Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 5f6e2c4a2ba..b3f2e713cdf 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2766,6 +2766,7 @@ void kfree(const void *x) page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { + BUG_ON(!PageCompound(page)); put_page(page); return; } -- cgit v1.2.3-70-g09d2 From 88e4ccf294ca62c2da998012a83533ce150c8dce Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 23 Jun 2008 02:58:37 +0400 Subject: slub: current is always valid Acked-by: Christoph Lameter Signed-off-by: Alexey Dobriyan Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index b3f2e713cdf..488400d1070 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -411,7 +411,7 @@ static void set_track(struct kmem_cache *s, void *object, if (addr) { p->addr = addr; p->cpu = smp_processor_id(); - p->pid = current ? current->pid : -1; + p->pid = current->pid; p->when = jiffies; } else memset(p, 0, sizeof(struct track)); -- cgit v1.2.3-70-g09d2 From e79aec291da55aa322ddb5d8f3bb04cdf69470d5 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 4 Jul 2008 00:40:32 +0530 Subject: slab: rename slab_destroy_objs With the removal of destructors, slab_destroy_objs no longer actually destroys any objects, making the kernel doc incorrect and the function name misleading. In keeping with the other debug functions, rename it to slab_destroy_debugcheck and drop the kernel doc. Signed-off-by: Rabin Vincent Signed-off-by: Pekka Enberg --- mm/slab.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'mm') diff --git a/mm/slab.c b/mm/slab.c index 046607f05f3..b4aa4c88250 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1901,15 +1901,7 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) #endif #if DEBUG -/** - * slab_destroy_objs - destroy a slab and its objects - * @cachep: cache pointer being destroyed - * @slabp: slab pointer being destroyed - * - * Call the registered destructor for each object in a slab that is being - * destroyed. - */ -static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) +static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) { int i; for (i = 0; i < cachep->num; i++) { @@ -1938,7 +1930,7 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) } } #else -static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) +static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) { } #endif @@ -1956,7 +1948,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) { void *addr = slabp->s_mem - slabp->colouroff; - slab_destroy_objs(cachep, slabp); + slab_destroy_debugcheck(cachep, slabp); if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { struct slab_rcu *slab_rcu; -- cgit v1.2.3-70-g09d2