diff options
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r-- | arch/powerpc/kernel/entry_64.S | 25 | ||||
-rw-r--r-- | arch/powerpc/kernel/ftrace.c | 69 | ||||
-rw-r--r-- | arch/powerpc/kernel/iommu.c | 197 | ||||
-rw-r--r-- | arch/powerpc/kernel/pci-common.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/pci_of_scan.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/smp.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/vio.c | 6 |
7 files changed, 210 insertions, 91 deletions
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 5971c85df13..cf38a17ab28 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -197,7 +197,16 @@ syscall_exit: wrteei 0 #else ld r10,PACAKMSR(r13) - mtmsrd r10,1 + /* + * For performance reasons we clear RI the same time that we + * clear EE. We only need to clear RI just before we restore r13 + * below, but batching it with EE saves us one expensive mtmsrd call. + * We have to be careful to restore RI if we branch anywhere from + * here (eg syscall_exit_work). + */ + li r9,MSR_RI + andc r11,r10,r9 + mtmsrd r11,1 #endif /* CONFIG_PPC_BOOK3E */ ld r9,TI_FLAGS(r12) @@ -214,17 +223,6 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) andi. r6,r8,MSR_PR ld r4,_LINK(r1) - /* - * Clear RI before restoring r13. If we are returning to - * userspace and we take an exception after restoring r13, - * we end up corrupting the userspace r13 value. - */ -#ifdef CONFIG_PPC_BOOK3S - /* No MSR:RI on BookE */ - li r12,MSR_RI - andc r11,r10,r12 - mtmsrd r11,1 /* clear MSR.RI */ -#endif /* CONFIG_PPC_BOOK3S */ beq- 1f ACCOUNT_CPU_USER_EXIT(r11, r12) @@ -271,6 +269,9 @@ syscall_enosys: b syscall_exit syscall_exit_work: +#ifdef CONFIG_PPC_BOOK3S + mtmsrd r10,1 /* Restore RI */ +#endif /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. If TIF_NOERROR is set, just save r3 as it is. */ diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index bf99cfa6bbf..6f33296a057 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -63,11 +63,9 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) return -EINVAL; /* replace the text with the new text */ - if (probe_kernel_write((void *)ip, &new, MCOUNT_INSN_SIZE)) + if (patch_instruction((unsigned int *)ip, new)) return -EPERM; - flush_icache_range(ip, ip + 8); - return 0; } @@ -212,12 +210,9 @@ __ftrace_make_nop(struct module *mod, */ op = 0x48000008; /* b +8 */ - if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + if (patch_instruction((unsigned int *)ip, op)) return -EPERM; - - flush_icache_range(ip, ip + 8); - return 0; } @@ -286,11 +281,9 @@ __ftrace_make_nop(struct module *mod, op = PPC_INST_NOP; - if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + if (patch_instruction((unsigned int *)ip, op)) return -EPERM; - flush_icache_range(ip, ip + 8); - return 0; } #endif /* PPC64 */ @@ -426,11 +419,9 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) pr_devel("write to %lx\n", rec->ip); - if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + if (patch_instruction((unsigned int *)ip, op)) return -EPERM; - flush_icache_range(ip, ip + 8); - return 0; } #endif /* CONFIG_PPC64 */ @@ -484,6 +475,58 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } +static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) +{ + unsigned long ftrace_addr = (unsigned long)FTRACE_ADDR; + int ret; + + ret = ftrace_update_record(rec, enable); + + switch (ret) { + case FTRACE_UPDATE_IGNORE: + return 0; + case FTRACE_UPDATE_MAKE_CALL: + return ftrace_make_call(rec, ftrace_addr); + case FTRACE_UPDATE_MAKE_NOP: + return ftrace_make_nop(NULL, rec, ftrace_addr); + } + + return 0; +} + +void ftrace_replace_code(int enable) +{ + struct ftrace_rec_iter *iter; + struct dyn_ftrace *rec; + int ret; + + for (iter = ftrace_rec_iter_start(); iter; + iter = ftrace_rec_iter_next(iter)) { + rec = ftrace_rec_iter_record(iter); + ret = __ftrace_replace_code(rec, enable); + if (ret) { + ftrace_bug(ret, rec->ip); + return; + } + } +} + +void arch_ftrace_update_code(int command) +{ + if (command & FTRACE_UPDATE_CALLS) + ftrace_replace_code(1); + else if (command & FTRACE_DISABLE_CALLS) + ftrace_replace_code(0); + + if (command & FTRACE_UPDATE_TRACE_FUNC) + ftrace_update_ftrace_func(ftrace_trace_function); + + if (command & FTRACE_START_FUNC_RET) + ftrace_enable_ftrace_graph_caller(); + else if (command & FTRACE_STOP_FUNC_RET) + ftrace_disable_ftrace_graph_caller(); +} + int __init ftrace_dyn_arch_init(void *data) { /* caller expects data to be zero */ diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 359f078571c..7bc94da1a83 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -33,6 +33,7 @@ #include <linux/bitmap.h> #include <linux/iommu-helper.h> #include <linux/crash_dump.h> +#include <linux/hash.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/iommu.h> @@ -58,6 +59,26 @@ static int __init setup_iommu(char *str) __setup("iommu=", setup_iommu); +static DEFINE_PER_CPU(unsigned int, iommu_pool_hash); + +/* + * We precalculate the hash to avoid doing it on every allocation. + * + * The hash is important to spread CPUs across all the pools. For example, + * on a POWER7 with 4 way SMT we want interrupts on the primary threads and + * with 4 pools all primary threads would map to the same pool. + */ +static int __init setup_iommu_pool_hash(void) +{ + unsigned int i; + + for_each_possible_cpu(i) + per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS); + + return 0; +} +subsys_initcall(setup_iommu_pool_hash); + static unsigned long iommu_range_alloc(struct device *dev, struct iommu_table *tbl, unsigned long npages, @@ -71,6 +92,9 @@ static unsigned long iommu_range_alloc(struct device *dev, int pass = 0; unsigned long align_mask; unsigned long boundary_size; + unsigned long flags; + unsigned int pool_nr; + struct iommu_pool *pool; align_mask = 0xffffffffffffffffl >> (64 - align_order); @@ -83,36 +107,46 @@ static unsigned long iommu_range_alloc(struct device *dev, return DMA_ERROR_CODE; } - if (handle && *handle) - start = *handle; + /* + * We don't need to disable preemption here because any CPU can + * safely use any IOMMU pool. + */ + pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1); + + if (largealloc) + pool = &(tbl->large_pool); else - start = largealloc ? tbl->it_largehint : tbl->it_hint; + pool = &(tbl->pools[pool_nr]); + + spin_lock_irqsave(&(pool->lock), flags); - /* Use only half of the table for small allocs (15 pages or less) */ - limit = largealloc ? tbl->it_size : tbl->it_halfpoint; +again: + if ((pass == 0) && handle && *handle) + start = *handle; + else + start = pool->hint; - if (largealloc && start < tbl->it_halfpoint) - start = tbl->it_halfpoint; + limit = pool->end; /* The case below can happen if we have a small segment appended * to a large, or when the previous alloc was at the very end of * the available space. If so, go back to the initial start. */ if (start >= limit) - start = largealloc ? tbl->it_largehint : tbl->it_hint; - - again: + start = pool->start; if (limit + tbl->it_offset > mask) { limit = mask - tbl->it_offset + 1; /* If we're constrained on address range, first try * at the masked hint to avoid O(n) search complexity, - * but on second pass, start at 0. + * but on second pass, start at 0 in pool 0. */ - if ((start & mask) >= limit || pass > 0) - start = 0; - else + if ((start & mask) >= limit || pass > 0) { + pool = &(tbl->pools[0]); + start = pool->start; + } else { start &= mask; + } } if (dev) @@ -126,16 +160,25 @@ static unsigned long iommu_range_alloc(struct device *dev, tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT, align_mask); if (n == -1) { - if (likely(pass < 2)) { - /* First failure, just rescan the half of the table. - * Second failure, rescan the other half of the table. - */ - start = (largealloc ^ pass) ? tbl->it_halfpoint : 0; - limit = pass ? tbl->it_size : limit; + if (likely(pass == 0)) { + /* First try the pool from the start */ + pool->hint = pool->start; pass++; goto again; + + } else if (pass <= tbl->nr_pools) { + /* Now try scanning all the other pools */ + spin_unlock(&(pool->lock)); + pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1); + pool = &tbl->pools[pool_nr]; + spin_lock(&(pool->lock)); + pool->hint = pool->start; + pass++; + goto again; + } else { - /* Third failure, give up */ + /* Give up */ + spin_unlock_irqrestore(&(pool->lock), flags); return DMA_ERROR_CODE; } } @@ -145,10 +188,10 @@ static unsigned long iommu_range_alloc(struct device *dev, /* Bump the hint to a new block for small allocs. */ if (largealloc) { /* Don't bump to new block to avoid fragmentation */ - tbl->it_largehint = end; + pool->hint = end; } else { /* Overflow will be taken care of at the next allocation */ - tbl->it_hint = (end + tbl->it_blocksize - 1) & + pool->hint = (end + tbl->it_blocksize - 1) & ~(tbl->it_blocksize - 1); } @@ -156,6 +199,8 @@ static unsigned long iommu_range_alloc(struct device *dev, if (handle) *handle = end; + spin_unlock_irqrestore(&(pool->lock), flags); + return n; } @@ -165,18 +210,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, unsigned long mask, unsigned int align_order, struct dma_attrs *attrs) { - unsigned long entry, flags; + unsigned long entry; dma_addr_t ret = DMA_ERROR_CODE; int build_fail; - spin_lock_irqsave(&(tbl->it_lock), flags); - entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order); - if (unlikely(entry == DMA_ERROR_CODE)) { - spin_unlock_irqrestore(&(tbl->it_lock), flags); + if (unlikely(entry == DMA_ERROR_CODE)) return DMA_ERROR_CODE; - } entry += tbl->it_offset; /* Offset into real TCE table */ ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ @@ -193,8 +234,6 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, */ if (unlikely(build_fail)) { __iommu_free(tbl, ret, npages); - - spin_unlock_irqrestore(&(tbl->it_lock), flags); return DMA_ERROR_CODE; } @@ -202,16 +241,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, if (ppc_md.tce_flush) ppc_md.tce_flush(tbl); - spin_unlock_irqrestore(&(tbl->it_lock), flags); - /* Make sure updates are seen by hardware */ mb(); return ret; } -static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, - unsigned int npages) +static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr, + unsigned int npages) { unsigned long entry, free_entry; @@ -231,20 +268,57 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index); WARN_ON(1); } - return; + + return false; + } + + return true; +} + +static struct iommu_pool *get_pool(struct iommu_table *tbl, + unsigned long entry) +{ + struct iommu_pool *p; + unsigned long largepool_start = tbl->large_pool.start; + + /* The large pool is the last pool at the top of the table */ + if (entry >= largepool_start) { + p = &tbl->large_pool; + } else { + unsigned int pool_nr = entry / tbl->poolsize; + + BUG_ON(pool_nr > tbl->nr_pools); + p = &tbl->pools[pool_nr]; } + return p; +} + +static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, + unsigned int npages) +{ + unsigned long entry, free_entry; + unsigned long flags; + struct iommu_pool *pool; + + entry = dma_addr >> IOMMU_PAGE_SHIFT; + free_entry = entry - tbl->it_offset; + + pool = get_pool(tbl, free_entry); + + if (!iommu_free_check(tbl, dma_addr, npages)) + return; + ppc_md.tce_free(tbl, entry, npages); + + spin_lock_irqsave(&(pool->lock), flags); bitmap_clear(tbl->it_map, free_entry, npages); + spin_unlock_irqrestore(&(pool->lock), flags); } static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned int npages) { - unsigned long flags; - - spin_lock_irqsave(&(tbl->it_lock), flags); - __iommu_free(tbl, dma_addr, npages); /* Make sure TLB cache is flushed if the HW needs it. We do @@ -253,8 +327,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, */ if (ppc_md.tce_flush) ppc_md.tce_flush(tbl); - - spin_unlock_irqrestore(&(tbl->it_lock), flags); } int iommu_map_sg(struct device *dev, struct iommu_table *tbl, @@ -263,7 +335,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, struct dma_attrs *attrs) { dma_addr_t dma_next = 0, dma_addr; - unsigned long flags; struct scatterlist *s, *outs, *segstart; int outcount, incount, i, build_fail = 0; unsigned int align; @@ -285,8 +356,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, DBG("sg mapping %d elements:\n", nelems); - spin_lock_irqsave(&(tbl->it_lock), flags); - max_seg_size = dma_get_max_seg_size(dev); for_each_sg(sglist, s, nelems, i) { unsigned long vaddr, npages, entry, slen; @@ -369,8 +438,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, if (ppc_md.tce_flush) ppc_md.tce_flush(tbl); - spin_unlock_irqrestore(&(tbl->it_lock), flags); - DBG("mapped %d elements:\n", outcount); /* For the sake of iommu_unmap_sg, we clear out the length in the @@ -402,7 +469,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, if (s == outs) break; } - spin_unlock_irqrestore(&(tbl->it_lock), flags); return 0; } @@ -412,15 +478,12 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, struct dma_attrs *attrs) { struct scatterlist *sg; - unsigned long flags; BUG_ON(direction == DMA_NONE); if (!tbl) return; - spin_lock_irqsave(&(tbl->it_lock), flags); - sg = sglist; while (nelems--) { unsigned int npages; @@ -440,8 +503,6 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, */ if (ppc_md.tce_flush) ppc_md.tce_flush(tbl); - - spin_unlock_irqrestore(&(tbl->it_lock), flags); } static void iommu_table_clear(struct iommu_table *tbl) @@ -494,9 +555,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) unsigned long sz; static int welcomed = 0; struct page *page; - - /* Set aside 1/4 of the table for large allocations. */ - tbl->it_halfpoint = tbl->it_size * 3 / 4; + unsigned int i; + struct iommu_pool *p; /* number of bytes needed for the bitmap */ sz = (tbl->it_size + 7) >> 3; @@ -515,9 +575,28 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) if (tbl->it_offset == 0) set_bit(0, tbl->it_map); - tbl->it_hint = 0; - tbl->it_largehint = tbl->it_halfpoint; - spin_lock_init(&tbl->it_lock); + /* We only split the IOMMU table if we have 1GB or more of space */ + if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024)) + tbl->nr_pools = IOMMU_NR_POOLS; + else + tbl->nr_pools = 1; + + /* We reserve the top 1/4 of the table for large allocations */ + tbl->poolsize = (tbl->it_size * 3 / 4) / IOMMU_NR_POOLS; + + for (i = 0; i < IOMMU_NR_POOLS; i++) { + p = &tbl->pools[i]; + spin_lock_init(&(p->lock)); + p->start = tbl->poolsize * i; + p->hint = p->start; + p->end = p->start + tbl->poolsize; + } + + p = &tbl->large_pool; + spin_lock_init(&(p->lock)); + p->start = tbl->poolsize * i; + p->hint = p->start; + p->end = tbl->it_size; iommu_table_clear(tbl); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 8e78e93c818..0f75bd50040 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1646,7 +1646,6 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) pci_free_resource_list(&resources); return; } - bus->secondary = hose->first_busno; hose->bus = bus; /* Get probe mode and perform scan */ diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 89dde171a6f..d7dd42bd145 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -198,7 +198,6 @@ EXPORT_SYMBOL(of_create_pci_dev); /** * of_scan_pci_bridge - Set up a PCI bridge and scan for child nodes - * @node: device tree node of bridge * @dev: pci_dev structure for the bridge * * of_scan_bus() calls this routine for each PCI bridge that it finds, and diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e4cb34322de..e1417c42155 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -571,7 +571,6 @@ void __devinit start_secondary(void *unused) if (system_state == SYSTEM_RUNNING) vdso_data->processorCount++; #endif - ipi_call_lock(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); /* Update sibling maps */ @@ -601,7 +600,6 @@ void __devinit start_secondary(void *unused) of_node_put(np); } of_node_put(l2_cache); - ipi_call_unlock(); local_irq_enable(); diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index cb87301ccd5..06cbc309b81 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -625,7 +625,7 @@ struct dma_map_ops vio_dma_mapping_ops = { * vio_cmo_set_dev_desired - Set desired entitlement for a device * * @viodev: struct vio_dev for device to alter - * @new_desired: new desired entitlement level in bytes + * @desired: new desired entitlement level in bytes * * For use by devices to request a change to their entitlement at runtime or * through sysfs. The desired entitlement level is changed and a balancing @@ -1262,7 +1262,7 @@ static int vio_bus_remove(struct device *dev) /** * vio_register_driver: - Register a new vio driver - * @drv: The vio_driver structure to be registered. + * @viodrv: The vio_driver structure to be registered. */ int __vio_register_driver(struct vio_driver *viodrv, struct module *owner, const char *mod_name) @@ -1282,7 +1282,7 @@ EXPORT_SYMBOL(__vio_register_driver); /** * vio_unregister_driver - Remove registration of vio driver. - * @driver: The vio_driver struct to be removed form registration + * @viodrv: The vio_driver struct to be removed form registration */ void vio_unregister_driver(struct vio_driver *viodrv) { |