diff options
Diffstat (limited to 'arch')
366 files changed, 15306 insertions, 7229 deletions
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 433be2a24f3..3d890a98a08 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -46,6 +46,7 @@ SECTIONS __init_end = .; /* Freed after init ends here */ + _sdata = .; /* Start of rw data section */ _data = .; RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 4ce8d1358fe..c04dd576f33 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -37,6 +37,7 @@ #include <linux/crash_dump.h> #include <linux/iommu-helper.h> #include <linux/dma-mapping.h> +#include <linux/prefetch.h> #include <asm/delay.h> /* ia64_get_itc() */ #include <asm/io.h> diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 787de4a77d8..53c0ba004e9 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -209,6 +209,7 @@ SECTIONS { data : { } :data .data : AT(ADDR(.data) - LOAD_OFFSET) { + _sdata = .; INIT_TASK_DATA(PAGE_SIZE) CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES) READ_MOSTLY_DATA(SMP_CACHE_BYTES) diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h index f6c5617e16a..b214b5b0432 100644 --- a/arch/ia64/kvm/vti.h +++ b/arch/ia64/kvm/vti.h @@ -83,13 +83,13 @@ union vac { unsigned long value; struct { - int a_int:1; - int a_from_int_cr:1; - int a_to_int_cr:1; - int a_from_psr:1; - int a_from_cpuid:1; - int a_cover:1; - int a_bsw:1; + unsigned int a_int:1; + unsigned int a_from_int_cr:1; + unsigned int a_to_int_cr:1; + unsigned int a_from_psr:1; + unsigned int a_from_cpuid:1; + unsigned int a_cover:1; + unsigned int a_bsw:1; long reserved:57; }; }; @@ -97,12 +97,12 @@ union vac { union vdc { unsigned long value; struct { - int d_vmsw:1; - int d_extint:1; - int d_ibr_dbr:1; - int d_pmc:1; - int d_to_pmd:1; - int d_itm:1; + unsigned int d_vmsw:1; + unsigned int d_extint:1; + unsigned int d_ibr_dbr:1; + unsigned int d_pmc:1; + unsigned int d_to_pmd:1; + unsigned int d_itm:1; long reserved:58; }; }; diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 0799fea4c58..20b35937612 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -10,6 +10,7 @@ #include <linux/interrupt.h> #include <linux/kprobes.h> #include <linux/kdebug.h> +#include <linux/prefetch.h> #include <asm/pgtable.h> #include <asm/processor.h> diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c index 5cdd7e4a597..f7b798993ce 100644 --- a/arch/ia64/oprofile/backtrace.c +++ b/arch/ia64/oprofile/backtrace.c @@ -29,7 +29,7 @@ typedef struct unsigned int depth; struct pt_regs *regs; struct unw_frame_info frame; - u64 *prev_pfs_loc; /* state for WAR for old spinlock ool code */ + unsigned long *prev_pfs_loc; /* state for WAR for old spinlock ool code */ } ia64_backtrace_t; /* Returns non-zero if the PC is in the Interrupt Vector Table */ diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index c194d64cdbb..cf95aec7746 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -44,6 +44,7 @@ SECTIONS EXCEPTION_TABLE(16) NOTES + _sdata = .; /* Start of data section */ RODATA RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE) _edata = .; /* End of data section */ diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index 878be5f38ca..d0993594f55 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -25,6 +25,8 @@ SECTIONS EXCEPTION_TABLE(16) + _sdata = .; /* Start of data section */ + RODATA RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE) diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index 1ad6b7ad2c1..8080469ee6c 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -25,6 +25,7 @@ SECTIONS _etext = .; /* End of text section */ EXCEPTION_TABLE(16) :data + _sdata = .; /* Start of rw data section */ RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE) :data /* End of data goes *here* so that freeing init code works properly. */ _edata = .; diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index cd2ca544454..01af3876cf9 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -65,6 +65,7 @@ SECTIONS NOTES :text :note .dummy : { *(.dummy) } :text + _sdata = .; /* Start of data section */ RODATA /* writeable */ diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c index 3dc19f48295..e9f95dcde37 100644 --- a/arch/mips/txx9/generic/setup_tx4939.c +++ b/arch/mips/txx9/generic/setup_tx4939.c @@ -318,19 +318,15 @@ void __init tx4939_sio_init(unsigned int sclk, unsigned int cts_mask) } #if defined(CONFIG_TC35815) || defined(CONFIG_TC35815_MODULE) -static int tx4939_get_eth_speed(struct net_device *dev) +static u32 tx4939_get_eth_speed(struct net_device *dev) { - struct ethtool_cmd cmd = { ETHTOOL_GSET }; - int speed = 100; /* default 100Mbps */ - int err; - if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) - return speed; - err = dev->ethtool_ops->get_settings(dev, &cmd); - if (err < 0) - return speed; - speed = cmd.speed == SPEED_100 ? 100 : 10; - return speed; + struct ethtool_cmd cmd; + if (dev_ethtool_get_settings(dev, &cmd)) + return 100; /* default 100Mbps */ + + return ethtool_cmd_speed(&cmd); } + static int tx4939_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) @@ -343,8 +339,7 @@ static int tx4939_netdev_event(struct notifier_block *this, else if (dev->irq == TXX9_IRQ_BASE + TX4939_IR_ETH(1)) bit = TX4939_PCFG_SPEED1; if (bit) { - int speed = tx4939_get_eth_speed(dev); - if (speed == 100) + if (tx4939_get_eth_speed(dev) == 100) txx9_set64(&tx4939_ccfgptr->pcfg, bit); else txx9_clear64(&tx4939_ccfgptr->pcfg, bit); diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index d18328b3f93..da601dd34c0 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -3,6 +3,7 @@ #include <linux/mm.h> #include <linux/uaccess.h> +#include <asm/tlbflush.h> /* The usual comment is "Caches aren't brain-dead on the <architecture>". * Unfortunately, that doesn't apply to PA-RISC. */ @@ -112,8 +113,10 @@ void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) { - if (PageAnon(page)) + if (PageAnon(page)) { + flush_tlb_page(vma, vmaddr); flush_dcache_page_asm(page_to_phys(page), vmaddr); + } } #ifdef CONFIG_DEBUG_RODATA diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 5d7b8ce9fdf..22dadeb5869 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -177,7 +177,10 @@ struct vm_area_struct; #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _PAGE_KERNEL (_PAGE_PRESENT | _PAGE_EXEC | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) +#define _PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_DIRTY | _PAGE_ACCESSED) +#define _PAGE_KERNEL_EXEC (_PAGE_KERNEL_RO | _PAGE_EXEC) +#define _PAGE_KERNEL_RWX (_PAGE_KERNEL_EXEC | _PAGE_WRITE) +#define _PAGE_KERNEL (_PAGE_KERNEL_RO | _PAGE_WRITE) /* The pgd/pmd contains a ptr (in phys addr space); since all pgds/pmds * are page-aligned, we don't care about the PAGE_OFFSET bits, except @@ -208,7 +211,9 @@ struct vm_area_struct; #define PAGE_COPY PAGE_EXECREAD #define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED) #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) -#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) +#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_RWX __pgprot(_PAGE_KERNEL_RWX) +#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO) #define PAGE_KERNEL_UNC __pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE) #define PAGE_GATEWAY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_GATEWAY| _PAGE_READ) diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index 3eb82c2a5ec..9cbc2c3bf63 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -814,8 +814,14 @@ #define __NR_recvmmsg (__NR_Linux + 319) #define __NR_accept4 (__NR_Linux + 320) #define __NR_prlimit64 (__NR_Linux + 321) - -#define __NR_Linux_syscalls (__NR_prlimit64 + 1) +#define __NR_fanotify_init (__NR_Linux + 322) +#define __NR_fanotify_mark (__NR_Linux + 323) +#define __NR_clock_adjtime (__NR_Linux + 324) +#define __NR_name_to_handle_at (__NR_Linux + 325) +#define __NR_open_by_handle_at (__NR_Linux + 326) +#define __NR_syncfs (__NR_Linux + 327) + +#define __NR_Linux_syscalls (__NR_syncfs + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 3f11331c277..83335f3da5f 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -304,10 +304,20 @@ void flush_dcache_page(struct page *page) offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; addr = mpnt->vm_start + offset; + /* The TLB is the engine of coherence on parisc: The + * CPU is entitled to speculate any page with a TLB + * mapping, so here we kill the mapping then flush the + * page along a special flush only alias mapping. + * This guarantees that the page is no-longer in the + * cache for any process and nor may it be + * speculatively read in (until the user or kernel + * specifically accesses it, of course) */ + + flush_tlb_page(mpnt, addr); if (old_addr == 0 || (old_addr & (SHMLBA - 1)) != (addr & (SHMLBA - 1))) { __flush_cache_page(mpnt, addr, page_to_phys(page)); if (old_addr) - printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %s\n", old_addr, addr, mpnt->vm_file ? mpnt->vm_file->f_path.dentry->d_name.name : "(null)"); + printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %s\n", old_addr, addr, mpnt->vm_file ? (char *)mpnt->vm_file->f_path.dentry->d_name.name : "(null)"); old_addr = addr; } } @@ -499,6 +509,7 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long { BUG_ON(!vma->vm_mm->context); + flush_tlb_page(vma, vmaddr); __flush_cache_page(vma, vmaddr, page_to_phys(pfn_to_page(pfn))); } diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index ead8d2a1034..6f059443914 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -692,6 +692,9 @@ ENTRY(fault_vector_11) END(fault_vector_11) #endif + /* Fault vector is separately protected and *must* be on its own page */ + .align PAGE_SIZE +ENTRY(end_fault_vector) .import handle_interruption,code .import do_cpu_irq_mask,code diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index 145c5e4caaa..37aabd772fb 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -106,8 +106,9 @@ $bss_loop: #endif - /* Now initialize the PTEs themselves */ - ldo 0+_PAGE_KERNEL(%r0),%r3 /* Hardwired 0 phys addr start */ + /* Now initialize the PTEs themselves. We use RWX for + * everything ... it will get remapped correctly later */ + ldo 0+_PAGE_KERNEL_RWX(%r0),%r3 /* Hardwired 0 phys addr start */ ldi (1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */ load32 PA(pg0),%r1 diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 6e81bb596e5..cedbbb8b18d 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -61,8 +61,10 @@ #include <linux/string.h> #include <linux/kernel.h> #include <linux/bug.h> +#include <linux/mm.h> #include <linux/slab.h> +#include <asm/pgtable.h> #include <asm/unwind.h> #if 0 @@ -214,7 +216,13 @@ void *module_alloc(unsigned long size) { if (size == 0) return NULL; - return vmalloc(size); + /* using RWX means less protection for modules, but it's + * easier than trying to map the text, data, init_text and + * init_data correctly */ + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL | __GFP_HIGHMEM, + PAGE_KERNEL_RWX, -1, + __builtin_return_address(0)); } #ifndef CONFIG_64BIT diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index a85823668cb..93ff3d90edd 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -817,10 +817,7 @@ ENTRY(purge_kernel_dcache_page) .procend ENDPROC(purge_kernel_dcache_page) - - .export flush_user_dcache_range_asm - -flush_user_dcache_range_asm: +ENTRY(flush_user_dcache_range_asm) .proc .callinfo NO_CALLS .entry @@ -839,6 +836,7 @@ flush_user_dcache_range_asm: .exit .procend +ENDPROC(flush_user_dcache_range_asm) ENTRY(flush_kernel_dcache_range_asm) .proc diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 88a0ad14a9c..dc9a6246232 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -228,3 +228,11 @@ asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, ((loff_t)lenhi << 32) | lenlo); } + +asmlinkage long compat_sys_fanotify_mark(int fan_fd, int flags, u32 mask_hi, + u32 mask_lo, int fd, + const char __user *pathname) +{ + return sys_fanotify_mark(fan_fd, flags, ((u64)mask_hi << 32) | mask_lo, + fd, pathname); +} diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 4be85ee10b8..a5b02ce4d41 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -420,6 +420,12 @@ ENTRY_COMP(recvmmsg) ENTRY_SAME(accept4) /* 320 */ ENTRY_SAME(prlimit64) + ENTRY_SAME(fanotify_init) + ENTRY_COMP(fanotify_mark) + ENTRY_COMP(clock_adjtime) + ENTRY_SAME(name_to_handle_at) /* 325 */ + ENTRY_COMP(open_by_handle_at) + ENTRY_SAME(syncfs) /* Nothing yet */ diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 8f1e4efd143..e1a55849bfa 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -69,6 +69,9 @@ SECTIONS /* End of text section */ _etext = .; + /* Start of data section */ + _sdata = .; + RODATA /* writeable */ @@ -134,6 +137,7 @@ SECTIONS . = ALIGN(16384); __init_begin = .; INIT_TEXT_SECTION(16384) + . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(16) /* we have to discard exit text and such at runtime, not link time */ .exit.text : diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index b1d126258de..5fa1e273006 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -371,24 +371,158 @@ static void __init setup_bootmem(void) request_resource(&sysram_resources[0], &pdcdata_resource); } +static void __init map_pages(unsigned long start_vaddr, + unsigned long start_paddr, unsigned long size, + pgprot_t pgprot, int force) +{ + pgd_t *pg_dir; + pmd_t *pmd; + pte_t *pg_table; + unsigned long end_paddr; + unsigned long start_pmd; + unsigned long start_pte; + unsigned long tmp1; + unsigned long tmp2; + unsigned long address; + unsigned long vaddr; + unsigned long ro_start; + unsigned long ro_end; + unsigned long fv_addr; + unsigned long gw_addr; + extern const unsigned long fault_vector_20; + extern void * const linux_gateway_page; + + ro_start = __pa((unsigned long)_text); + ro_end = __pa((unsigned long)&data_start); + fv_addr = __pa((unsigned long)&fault_vector_20) & PAGE_MASK; + gw_addr = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK; + + end_paddr = start_paddr + size; + + pg_dir = pgd_offset_k(start_vaddr); + +#if PTRS_PER_PMD == 1 + start_pmd = 0; +#else + start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1)); +#endif + start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)); + + address = start_paddr; + vaddr = start_vaddr; + while (address < end_paddr) { +#if PTRS_PER_PMD == 1 + pmd = (pmd_t *)__pa(pg_dir); +#else + pmd = (pmd_t *)pgd_address(*pg_dir); + + /* + * pmd is physical at this point + */ + + if (!pmd) { + pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0), PAGE_SIZE << PMD_ORDER); + pmd = (pmd_t *) __pa(pmd); + } + + pgd_populate(NULL, pg_dir, __va(pmd)); +#endif + pg_dir++; + + /* now change pmd to kernel virtual addresses */ + + pmd = (pmd_t *)__va(pmd) + start_pmd; + for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++, pmd++) { + + /* + * pg_table is physical at this point + */ + + pg_table = (pte_t *)pmd_address(*pmd); + if (!pg_table) { + pg_table = (pte_t *) + alloc_bootmem_low_pages_node(NODE_DATA(0), PAGE_SIZE); + pg_table = (pte_t *) __pa(pg_table); + } + + pmd_populate_kernel(NULL, pmd, __va(pg_table)); + + /* now change pg_table to kernel virtual addresses */ + + pg_table = (pte_t *) __va(pg_table) + start_pte; + for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++, pg_table++) { + pte_t pte; + + /* + * Map the fault vector writable so we can + * write the HPMC checksum. + */ + if (force) + pte = __mk_pte(address, pgprot); + else if (core_kernel_text(vaddr) && + address != fv_addr) + pte = __mk_pte(address, PAGE_KERNEL_EXEC); + else +#if defined(CONFIG_PARISC_PAGE_SIZE_4KB) + if (address >= ro_start && address < ro_end + && address != fv_addr + && address != gw_addr) + pte = __mk_pte(address, PAGE_KERNEL_RO); + else +#endif + pte = __mk_pte(address, pgprot); + + if (address >= end_paddr) { + if (force) + break; + else + pte_val(pte) = 0; + } + + set_pte(pg_table, pte); + + address += PAGE_SIZE; + vaddr += PAGE_SIZE; + } + start_pte = 0; + + if (address >= end_paddr) + break; + } + start_pmd = 0; + } +} + void free_initmem(void) { unsigned long addr; unsigned long init_begin = (unsigned long)__init_begin; unsigned long init_end = (unsigned long)__init_end; -#ifdef CONFIG_DEBUG_KERNEL + /* The init text pages are marked R-X. We have to + * flush the icache and mark them RW- + * + * This is tricky, because map_pages is in the init section. + * Do a dummy remap of the data section first (the data + * section is already PAGE_KERNEL) to pull in the TLB entries + * for map_kernel */ + map_pages(init_begin, __pa(init_begin), init_end - init_begin, + PAGE_KERNEL_RWX, 1); + /* now remap at PAGE_KERNEL since the TLB is pre-primed to execute + * map_pages */ + map_pages(init_begin, __pa(init_begin), init_end - init_begin, + PAGE_KERNEL, 1); + + /* force the kernel to see the new TLB entries */ + __flush_tlb_range(0, init_begin, init_end); /* Attempt to catch anyone trying to execute code here * by filling the page with BRK insns. */ memset((void *)init_begin, 0x00, init_end - init_begin); + /* finally dump all the instructions which were cached, since the + * pages are no-longer executable */ flush_icache_range(init_begin, init_end); -#endif - /* align __init_begin and __init_end to page size, - ignoring linker script where we might have tried to save RAM */ - init_begin = PAGE_ALIGN(init_begin); - init_end = PAGE_ALIGN(init_end); for (addr = init_begin; addr < init_end; addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); @@ -618,114 +752,6 @@ void show_mem(unsigned int filter) #endif } - -static void __init map_pages(unsigned long start_vaddr, unsigned long start_paddr, unsigned long size, pgprot_t pgprot) -{ - pgd_t *pg_dir; - pmd_t *pmd; - pte_t *pg_table; - unsigned long end_paddr; - unsigned long start_pmd; - unsigned long start_pte; - unsigned long tmp1; - unsigned long tmp2; - unsigned long address; - unsigned long ro_start; - unsigned long ro_end; - unsigned long fv_addr; - unsigned long gw_addr; - extern const unsigned long fault_vector_20; - extern void * const linux_gateway_page; - - ro_start = __pa((unsigned long)_text); - ro_end = __pa((unsigned long)&data_start); - fv_addr = __pa((unsigned long)&fault_vector_20) & PAGE_MASK; - gw_addr = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK; - - end_paddr = start_paddr + size; - - pg_dir = pgd_offset_k(start_vaddr); - -#if PTRS_PER_PMD == 1 - start_pmd = 0; -#else - start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1)); -#endif - start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)); - - address = start_paddr; - while (address < end_paddr) { -#if PTRS_PER_PMD == 1 - pmd = (pmd_t *)__pa(pg_dir); -#else - pmd = (pmd_t *)pgd_address(*pg_dir); - - /* - * pmd is physical at this point - */ - - if (!pmd) { - pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE << PMD_ORDER); - pmd = (pmd_t *) __pa(pmd); - } - - pgd_populate(NULL, pg_dir, __va(pmd)); -#endif - pg_dir++; - - /* now change pmd to kernel virtual addresses */ - - pmd = (pmd_t *)__va(pmd) + start_pmd; - for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++,pmd++) { - - /* - * pg_table is physical at this point - */ - - pg_table = (pte_t *)pmd_address(*pmd); - if (!pg_table) { - pg_table = (pte_t *) - alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE); - pg_table = (pte_t *) __pa(pg_table); - } - - pmd_populate_kernel(NULL, pmd, __va(pg_table)); - - /* now change pg_table to kernel virtual addresses */ - - pg_table = (pte_t *) __va(pg_table) + start_pte; - for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++,pg_table++) { - pte_t pte; - - /* - * Map the fault vector writable so we can - * write the HPMC checksum. - */ -#if defined(CONFIG_PARISC_PAGE_SIZE_4KB) - if (address >= ro_start && address < ro_end - && address != fv_addr - && address != gw_addr) - pte = __mk_pte(address, PAGE_KERNEL_RO); - else -#endif - pte = __mk_pte(address, pgprot); - - if (address >= end_paddr) - pte_val(pte) = 0; - - set_pte(pg_table, pte); - - address += PAGE_SIZE; - } - start_pte = 0; - - if (address >= end_paddr) - break; - } - start_pmd = 0; - } -} - /* * pagetable_init() sets up the page tables * @@ -750,14 +776,14 @@ static void __init pagetable_init(void) size = pmem_ranges[range].pages << PAGE_SHIFT; map_pages((unsigned long)__va(start_paddr), start_paddr, - size, PAGE_KERNEL); + size, PAGE_KERNEL, 0); } #ifdef CONFIG_BLK_DEV_INITRD if (initrd_end && initrd_end > mem_limit) { printk(KERN_INFO "initrd: mapping %08lx-%08lx\n", initrd_start, initrd_end); map_pages(initrd_start, __pa(initrd_start), - initrd_end - initrd_start, PAGE_KERNEL); + initrd_end - initrd_start, PAGE_KERNEL, 0); } #endif @@ -782,7 +808,7 @@ static void __init gateway_init(void) */ map_pages(linux_gateway_page_addr, __pa(&linux_gateway_page), - PAGE_SIZE, PAGE_GATEWAY); + PAGE_SIZE, PAGE_GATEWAY, 1); } #ifdef CONFIG_HPUX diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8f4d50b0adf..a3128ca0fe1 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -193,6 +193,12 @@ config SYS_SUPPORTS_APM_EMULATION default y if PMAC_APM_EMU bool +config EPAPR_BOOT + bool + help + Used to allow a board to specify it wants an ePAPR compliant wrapper. + default n + config DEFAULT_UIMAGE bool help diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 2d38a50e66b..a597dd77b90 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -267,6 +267,11 @@ config PPC_EARLY_DEBUG_USBGECKO Select this to enable early debugging for Nintendo GameCube/Wii consoles via an external USB Gecko adapter. +config PPC_EARLY_DEBUG_WSP + bool "Early debugging via WSP's internal UART" + depends on PPC_WSP + select PPC_UDBG_16550 + endchoice config PPC_EARLY_DEBUG_44x_PHYSLOW diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 89178164af5..c26200b40a4 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -69,7 +69,8 @@ src-wlib := string.S crt0.S crtsavres.S stdio.c main.c \ cpm-serial.c stdlib.c mpc52xx-psc.c planetcore.c uartlite.c \ fsl-soc.c mpc8xx.c pq2.c ugecon.c src-plat := of.c cuboot-52xx.c cuboot-824x.c cuboot-83xx.c cuboot-85xx.c holly.c \ - cuboot-ebony.c cuboot-hotfoot.c treeboot-ebony.c prpmc2800.c \ + cuboot-ebony.c cuboot-hotfoot.c epapr.c treeboot-ebony.c \ + prpmc2800.c \ ps3-head.S ps3-hvcall.S ps3.c treeboot-bamboo.c cuboot-8xx.c \ cuboot-pq2.c cuboot-sequoia.c treeboot-walnut.c \ cuboot-bamboo.c cuboot-mpc7448hpc2.c cuboot-taishan.c \ @@ -127,7 +128,7 @@ quiet_cmd_bootas = BOOTAS $@ cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< quiet_cmd_bootar = BOOTAR $@ - cmd_bootar = $(CROSS32AR) -cr $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@ + cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@ $(obj-libfdt): $(obj)/%.o: $(srctree)/scripts/dtc/libfdt/%.c FORCE $(call if_changed_dep,bootcc) @@ -182,6 +183,7 @@ image-$(CONFIG_PPC_HOLLY) += dtbImage.holly image-$(CONFIG_PPC_PRPMC2800) += dtbImage.prpmc2800 image-$(CONFIG_PPC_ISERIES) += zImage.iseries image-$(CONFIG_DEFAULT_UIMAGE) += uImage +image-$(CONFIG_EPAPR_BOOT) += zImage.epapr # # Targets which embed a device tree blob diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index f1c4dfc635b..0f7428a37ef 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -6,16 +6,28 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * NOTE: this code runs in 32 bit mode and is packaged as ELF32. + * NOTE: this code runs in 32 bit mode, is position-independent, + * and is packaged as ELF32. */ #include "ppc_asm.h" .text - /* a procedure descriptor used when booting this as a COFF file */ + /* A procedure descriptor used when booting this as a COFF file. + * When making COFF, this comes first in the link and we're + * linked at 0x500000. + */ .globl _zimage_start_opd _zimage_start_opd: - .long _zimage_start, 0, 0, 0 + .long 0x500000, 0, 0, 0 + +p_start: .long _start +p_etext: .long _etext +p_bss_start: .long __bss_start +p_end: .long _end + + .weak _platform_stack_top +p_pstack: .long _platform_stack_top .weak _zimage_start .globl _zimage_start @@ -24,37 +36,65 @@ _zimage_start: _zimage_start_lib: /* Work out the offset between the address we were linked at and the address where we're running. */ - bl 1f -1: mflr r0 - lis r9,1b@ha - addi r9,r9,1b@l - subf. r0,r9,r0 - beq 3f /* if running at same address as linked */ + bl .+4 +p_base: mflr r10 /* r10 now points to runtime addr of p_base */ + /* grab the link address of the dynamic section in r11 */ + addis r11,r10,(_GLOBAL_OFFSET_TABLE_-p_base)@ha + lwz r11,(_GLOBAL_OFFSET_TABLE_-p_base)@l(r11) + cmpwi r11,0 + beq 3f /* if not linked -pie */ + /* get the runtime address of the dynamic section in r12 */ + .weak __dynamic_start + addis r12,r10,(__dynamic_start-p_base)@ha + addi r12,r12,(__dynamic_start-p_base)@l + subf r11,r11,r12 /* runtime - linktime offset */ + + /* The dynamic section contains a series of tagged entries. + * We need the RELA and RELACOUNT entries. */ +RELA = 7 +RELACOUNT = 0x6ffffff9 + li r9,0 + li r0,0 +9: lwz r8,0(r12) /* get tag */ + cmpwi r8,0 + beq 10f /* end of list */ + cmpwi r8,RELA + bne 11f + lwz r9,4(r12) /* get RELA pointer in r9 */ + b 12f +11: addis r8,r8,(-RELACOUNT)@ha + cmpwi r8,RELACOUNT@l + bne 12f + lwz r0,4(r12) /* get RELACOUNT value in r0 */ +12: addi r12,r12,8 + b 9b - /* The .got2 section contains a list of addresses, so add - the address offset onto each entry. */ - lis r9,__got2_start@ha - addi r9,r9,__got2_start@l - lis r8,__got2_end@ha - addi r8,r8,__got2_end@l - subf. r8,r9,r8 + /* The relocation section contains a list of relocations. + * We now do the R_PPC_RELATIVE ones, which point to words + * which need to be initialized with addend + offset. + * The R_PPC_RELATIVE ones come first and there are RELACOUNT + * of them. */ +10: /* skip relocation if we don't have both */ + cmpwi r0,0 beq 3f - srwi. r8,r8,2 - mtctr r8 - add r9,r0,r9 -2: lwz r8,0(r9) - add r8,r8,r0 - stw r8,0(r9) - addi r9,r9,4 + cmpwi r9,0 + beq 3f + + add r9,r9,r11 /* Relocate RELA pointer */ + mtctr r0 +2: lbz r0,4+3(r9) /* ELF32_R_INFO(reloc->r_info) */ + cmpwi r0,22 /* R_PPC_RELATIVE */ + bne 3f + lwz r12,0(r9) /* reloc->r_offset */ + lwz r0,8(r9) /* reloc->r_addend */ + add r0,r0,r11 + stwx r0,r11,r12 + addi r9,r9,12 bdnz 2b /* Do a cache flush for our text, in case the loader didn't */ -3: lis r9,_start@ha - addi r9,r9,_start@l - add r9,r0,r9 - lis r8,_etext@ha - addi r8,r8,_etext@l - add r8,r0,r8 +3: lwz r9,p_start-p_base(r10) /* note: these are relocated now */ + lwz r8,p_etext-p_base(r10) 4: dcbf r0,r9 icbi r0,r9 addi r9,r9,0x20 @@ -64,27 +104,19 @@ _zimage_start_lib: isync /* Clear the BSS */ - lis r9,__bss_start@ha - addi r9,r9,__bss_start@l - add r9,r0,r9 - lis r8,_end@ha - addi r8,r8,_end@l - add r8,r0,r8 - li r10,0 -5: stw r10,0(r9) + lwz r9,p_bss_start-p_base(r10) + lwz r8,p_end-p_base(r10) + li r0,0 +5: stw r0,0(r9) addi r9,r9,4 cmplw cr0,r9,r8 blt 5b /* Possibly set up a custom stack */ -.weak _platform_stack_top - lis r8,_platform_stack_top@ha - addi r8,r8,_platform_stack_top@l + lwz r8,p_pstack-p_base(r10) cmpwi r8,0 beq 6f - add r8,r0,r8 lwz r1,0(r8) - add r1,r0,r1 li r0,0 stwu r0,-16(r1) /* establish a stack frame */ 6: diff --git a/arch/powerpc/boot/dts/p1020rdb.dts b/arch/powerpc/boot/dts/p1020rdb.dts index e0668f87779..d6a8ae45813 100644 --- a/arch/powerpc/boot/dts/p1020rdb.dts +++ b/arch/powerpc/boot/dts/p1020rdb.dts @@ -9,12 +9,11 @@ * option) any later version. */ -/dts-v1/; +/include/ "p1020si.dtsi" + / { - model = "fsl,P1020"; + model = "fsl,P1020RDB"; compatible = "fsl,P1020RDB"; - #address-cells = <2>; - #size-cells = <2>; aliases { serial0 = &serial0; @@ -26,34 +25,11 @@ pci1 = &pci1; }; - cpus { - #address-cells = <1>; - #size-cells = <0>; - - PowerPC,P1020@0 { - device_type = "cpu"; - reg = <0x0>; - next-level-cache = <&L2>; - }; - - PowerPC,P1020@1 { - device_type = "cpu"; - reg = <0x1>; - next-level-cache = <&L2>; - }; - }; - memory { device_type = "memory"; }; localbus@ffe05000 { - #address-cells = <2>; - #size-cells = <1>; - compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus"; - reg = <0 0xffe05000 0 0x1000>; - interrupts = <19 2>; - interrupt-parent = <&mpic>; /* NOR, NAND Flashes and Vitesse 5 port L2 switch */ ranges = <0x0 0x0 0x0 0xef000000 0x01000000 @@ -165,88 +141,14 @@ }; soc@ffe00000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "soc"; - compatible = "fsl,p1020-immr", "simple-bus"; - ranges = <0x0 0x0 0xffe00000 0x100000>; - bus-frequency = <0>; // Filled out by uboot. - - ecm-law@0 { - compatible = "fsl,ecm-law"; - reg = <0x0 0x1000>; - fsl,num-laws = <12>; - }; - - ecm@1000 { - compatible = "fsl,p1020-ecm", "fsl,ecm"; - reg = <0x1000 0x1000>; - interrupts = <16 2>; - interrupt-parent = <&mpic>; - }; - - memory-controller@2000 { - compatible = "fsl,p1020-memory-controller"; - reg = <0x2000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <16 2>; - }; - i2c@3000 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <0>; - compatible = "fsl-i2c"; - reg = <0x3000 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; rtc@68 { compatible = "dallas,ds1339"; reg = <0x68>; }; }; - i2c@3100 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <1>; - compatible = "fsl-i2c"; - reg = <0x3100 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; - }; - - serial0: serial@4500 { - cell-index = <0>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4500 0x100>; - clock-frequency = <0>; - interrupts = <42 2>; - interrupt-parent = <&mpic>; - }; - - serial1: serial@4600 { - cell-index = <1>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4600 0x100>; - clock-frequency = <0>; - interrupts = <42 2>; - interrupt-parent = <&mpic>; - }; - spi@7000 { - cell-index = <0>; - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,espi"; - reg = <0x7000 0x1000>; - interrupts = <59 0x2>; - interrupt-parent = <&mpic>; - mode = "cpu"; fsl_m25p80@0 { #address-cells = <1>; @@ -294,66 +196,7 @@ }; }; - gpio: gpio-controller@f000 { - #gpio-cells = <2>; - compatible = "fsl,mpc8572-gpio"; - reg = <0xf000 0x100>; - interrupts = <47 0x2>; - interrupt-parent = <&mpic>; - gpio-controller; - }; - - L2: l2-cache-controller@20000 { - compatible = "fsl,p1020-l2-cache-controller"; - reg = <0x20000 0x1000>; - cache-line-size = <32>; // 32 bytes - cache-size = <0x40000>; // L2,256K - interrupt-parent = <&mpic>; - interrupts = <16 2>; - }; - - dma@21300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,eloplus-dma"; - reg = <0x21300 0x4>; - ranges = <0x0 0x21100 0x200>; - cell-index = <0>; - dma-channel@0 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - cell-index = <0>; - interrupt-parent = <&mpic>; - interrupts = <20 2>; - }; - dma-channel@80 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; - interrupt-parent = <&mpic>; - interrupts = <21 2>; - }; - dma-channel@100 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; - interrupt-parent = <&mpic>; - interrupts = <22 2>; - }; - dma-channel@180 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; - interrupt-parent = <&mpic>; - interrupts = <23 2>; - }; - }; - mdio@24000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,etsec2-mdio"; - reg = <0x24000 0x1000 0xb0030 0x4>; phy0: ethernet-phy@0 { interrupt-parent = <&mpic>; @@ -369,10 +212,6 @@ }; mdio@25000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,etsec2-tbi"; - reg = <0x25000 0x1000 0xb1030 0x4>; tbi0: tbi-phy@11 { reg = <0x11>; @@ -381,97 +220,25 @@ }; enet0: ethernet@b0000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "network"; - model = "eTSEC"; - compatible = "fsl,etsec2"; - fsl,num_rx_queues = <0x8>; - fsl,num_tx_queues = <0x8>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupt-parent = <&mpic>; fixed-link = <1 1 1000 0 0>; phy-connection-type = "rgmii-id"; - queue-group@0 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0xb0000 0x1000>; - interrupts = <29 2 30 2 34 2>; - }; - - queue-group@1 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0xb4000 0x1000>; - interrupts = <17 2 18 2 24 2>; - }; }; enet1: ethernet@b1000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "network"; - model = "eTSEC"; - compatible = "fsl,etsec2"; - fsl,num_rx_queues = <0x8>; - fsl,num_tx_queues = <0x8>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupt-parent = <&mpic>; phy-handle = <&phy0>; tbi-handle = <&tbi0>; phy-connection-type = "sgmii"; - queue-group@0 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0xb1000 0x1000>; - interrupts = <35 2 36 2 40 2>; - }; - - queue-group@1 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0xb5000 0x1000>; - interrupts = <51 2 52 2 67 2>; - }; }; enet2: ethernet@b2000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "network"; - model = "eTSEC"; - compatible = "fsl,etsec2"; - fsl,num_rx_queues = <0x8>; - fsl,num_tx_queues = <0x8>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupt-parent = <&mpic>; phy-handle = <&phy1>; phy-connection-type = "rgmii-id"; - queue-group@0 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0xb2000 0x1000>; - interrupts = <31 2 32 2 33 2>; - }; - - queue-group@1 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0xb6000 0x1000>; - interrupts = <25 2 26 2 27 2>; - }; }; usb@22000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl-usb2-dr"; - reg = <0x22000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <28 0x2>; phy_type = "ulpi"; }; @@ -481,82 +248,23 @@ it enables USB2. OTOH, U-Boot does create a new node when there isn't any. So, just comment it out. usb@23000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl-usb2-dr"; - reg = <0x23000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <46 0x2>; phy_type = "ulpi"; }; */ - sdhci@2e000 { - compatible = "fsl,p1020-esdhc", "fsl,esdhc"; - reg = <0x2e000 0x1000>; - interrupts = <72 0x2>; - interrupt-parent = <&mpic>; - /* Filled in by U-Boot */ - clock-frequency = <0>; - }; - - crypto@30000 { - compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4", - "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; - reg = <0x30000 0x10000>; - interrupts = <45 2 58 2>; - interrupt-parent = <&mpic>; - fsl,num-channels = <4>; - fsl,channel-fifo-len = <24>; - fsl,exec-units-mask = <0xbfe>; - fsl,descriptor-types-mask = <0x3ab0ebf>; - }; - - mpic: pic@40000 { - interrupt-controller; - #address-cells = <0>; - #interrupt-cells = <2>; - reg = <0x40000 0x40000>; - compatible = "chrp,open-pic"; - device_type = "open-pic"; - }; - - msi@41600 { - compatible = "fsl,p1020-msi", "fsl,mpic-msi"; - reg = <0x41600 0x80>; - msi-available-ranges = <0 0x100>; - interrupts = < - 0xe0 0 - 0xe1 0 - 0xe2 0 - 0xe3 0 - 0xe4 0 - 0xe5 0 - 0xe6 0 - 0xe7 0>; - interrupt-parent = <&mpic>; - }; - - global-utilities@e0000 { //global utilities block - compatible = "fsl,p1020-guts"; - reg = <0xe0000 0x1000>; - fsl,has-rstcr; - }; }; pci0: pcie@ffe09000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe09000 0 0x1000>; - bus-range = <0 255>; ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <16 2>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x4 0x1 + 0000 0x0 0x0 0x2 &mpic 0x5 0x1 + 0000 0x0 0x0 0x3 &mpic 0x6 0x1 + 0000 0x0 0x0 0x4 &mpic 0x7 0x1 + >; pcie@0 { reg = <0x0 0x0 0x0 0x0 0x0>; #size-cells = <2>; @@ -573,18 +281,16 @@ }; pci1: pcie@ffe0a000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe0a000 0 0x1000>; - bus-range = <0 255>; ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <16 2>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x0 0x1 + 0000 0x0 0x0 0x2 &mpic 0x1 0x1 + 0000 0x0 0x0 0x3 &mpic 0x2 0x1 + 0000 0x0 0x0 0x4 &mpic 0x3 0x1 + >; pcie@0 { reg = <0x0 0x0 0x0 0x0 0x0>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts new file mode 100644 index 00000000000..f0bf7f42f09 --- /dev/null +++ b/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts @@ -0,0 +1,213 @@ +/* + * P1020 RDB Core0 Device Tree Source in CAMP mode. + * + * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache + * can be shared, all the other devices must be assigned to one core only. + * This dts file allows core0 to have memory, l2, i2c, spi, gpio, tdm, dma, usb, + * eth1, eth2, sdhc, crypto, global-util, message, pci0, pci1, msi. + * + * Please note to add "-b 0" for core0's dts compiling. + * + * Copyright 2011 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/include/ "p1020si.dtsi" + +/ { + model = "fsl,P1020RDB"; + compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP"; + + aliases { + ethernet1 = &enet1; + ethernet2 = &enet2; + serial0 = &serial0; + pci0 = &pci0; + pci1 = &pci1; + }; + + cpus { + PowerPC,P1020@1 { + status = "disabled"; + }; + }; + + memory { + device_type = "memory"; + }; + + localbus@ffe05000 { + status = "disabled"; + }; + + soc@ffe00000 { + i2c@3000 { + rtc@68 { + compatible = "dallas,ds1339"; + reg = <0x68>; + }; + }; + + serial1: serial@4600 { + status = "disabled"; + }; + + spi@7000 { + fsl_m25p80@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,espi-flash"; + reg = <0>; + linux,modalias = "fsl_m25p80"; + spi-max-frequency = <40000000>; + + partition@0 { + /* 512KB for u-boot Bootloader Image */ + reg = <0x0 0x00080000>; + label = "SPI (RO) U-Boot Image"; + read-only; + }; + + partition@80000 { + /* 512KB for DTB Image */ + reg = <0x00080000 0x00080000>; + label = "SPI (RO) DTB Image"; + read-only; + }; + + partition@100000 { + /* 4MB for Linux Kernel Image */ + reg = <0x00100000 0x00400000>; + label = "SPI (RO) Linux Kernel Image"; + read-only; + }; + + partition@500000 { + /* 4MB for Compressed RFS Image */ + reg = <0x00500000 0x00400000>; + label = "SPI (RO) Compressed RFS Image"; + read-only; + }; + + partition@900000 { + /* 7MB for JFFS2 based RFS */ + reg = <0x00900000 0x00700000>; + label = "SPI (RW) JFFS2 RFS"; + }; + }; + }; + + mdio@24000 { + phy0: ethernet-phy@0 { + interrupt-parent = <&mpic>; + interrupts = <3 1>; + reg = <0x0>; + }; + phy1: ethernet-phy@1 { + interrupt-parent = <&mpic>; + interrupts = <2 1>; + reg = <0x1>; + }; + }; + + mdio@25000 { + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + enet0: ethernet@b0000 { + status = "disabled"; + }; + + enet1: ethernet@b1000 { + phy-handle = <&phy0>; + tbi-handle = <&tbi0>; + phy-connection-type = "sgmii"; + }; + + enet2: ethernet@b2000 { + phy-handle = <&phy1>; + phy-connection-type = "rgmii-id"; + }; + + usb@22000 { + phy_type = "ulpi"; + }; + + /* USB2 is shared with localbus, so it must be disabled + by default. We can't put 'status = "disabled";' here + since U-Boot doesn't clear the status property when + it enables USB2. OTOH, U-Boot does create a new node + when there isn't any. So, just comment it out. + usb@23000 { + phy_type = "ulpi"; + }; + */ + + mpic: pic@40000 { + protected-sources = < + 42 29 30 34 /* serial1, enet0-queue-group0 */ + 17 18 24 45 /* enet0-queue-group1, crypto */ + >; + }; + + }; + + pci0: pcie@ffe09000 { + ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x4 0x1 + 0000 0x0 0x0 0x2 &mpic 0x5 0x1 + 0000 0x0 0x0 0x3 &mpic 0x6 0x1 + 0000 0x0 0x0 0x4 &mpic 0x7 0x1 + >; + pcie@0 { + reg = <0x0 0x0 0x0 0x0 0x0>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + ranges = <0x2000000 0x0 0xa0000000 + 0x2000000 0x0 0xa0000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x100000>; + }; + }; + + pci1: pcie@ffe0a000 { + ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x0 0x1 + 0000 0x0 0x0 0x2 &mpic 0x1 0x1 + 0000 0x0 0x0 0x3 &mpic 0x2 0x1 + 0000 0x0 0x0 0x4 &mpic 0x3 0x1 + >; + pcie@0 { + reg = <0x0 0x0 0x0 0x0 0x0>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + ranges = <0x2000000 0x0 0x80000000 + 0x2000000 0x0 0x80000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x100000>; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts new file mode 100644 index 00000000000..6ec02204a44 --- /dev/null +++ b/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts @@ -0,0 +1,148 @@ +/* + * P1020 RDB Core1 Device Tree Source in CAMP mode. + * + * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache + * can be shared, all the other devices must be assigned to one core only. + * This dts allows core1 to have l2, eth0, crypto. + * + * Please note to add "-b 1" for core1's dts compiling. + * + * Copyright 2011 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/include/ "p1020si.dtsi" + +/ { + model = "fsl,P1020RDB"; + compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP"; + + aliases { + ethernet0 = &enet0; + serial0 = &serial1; + }; + + cpus { + PowerPC,P1020@0 { + status = "disabled"; + }; + }; + + memory { + device_type = "memory"; + }; + + localbus@ffe05000 { + status = "disabled"; + }; + + soc@ffe00000 { + ecm-law@0 { + status = "disabled"; + }; + + ecm@1000 { + status = "disabled"; + }; + + memory-controller@2000 { + status = "disabled"; + }; + + i2c@3000 { + status = "disabled"; + }; + + i2c@3100 { + status = "disabled"; + }; + + serial0: serial@4500 { + status = "disabled"; + }; + + spi@7000 { + status = "disabled"; + }; + + gpio: gpio-controller@f000 { + status = "disabled"; + }; + + dma@21300 { + status = "disabled"; + }; + + mdio@24000 { + status = "disabled"; + }; + + mdio@25000 { + status = "disabled"; + }; + + enet0: ethernet@b0000 { + fixed-link = <1 1 1000 0 0>; + phy-connection-type = "rgmii-id"; + + }; + + enet1: ethernet@b1000 { + status = "disabled"; + }; + + enet2: ethernet@b2000 { + status = "disabled"; + }; + + usb@22000 { + status = "disabled"; + }; + + sdhci@2e000 { + status = "disabled"; + }; + + mpic: pic@40000 { + protected-sources = < + 16 /* ecm, mem, L2, pci0, pci1 */ + 43 42 59 /* i2c, serial0, spi */ + 47 63 62 /* gpio, tdm */ + 20 21 22 23 /* dma */ + 03 02 /* mdio */ + 35 36 40 /* enet1-queue-group0 */ + 51 52 67 /* enet1-queue-group1 */ + 31 32 33 /* enet2-queue-group0 */ + 25 26 27 /* enet2-queue-group1 */ + 28 72 58 /* usb, sdhci, crypto */ + 0xb0 0xb1 0xb2 /* message */ + 0xb3 0xb4 0xb5 + 0xb6 0xb7 + 0xe0 0xe1 0xe2 /* msi */ + 0xe3 0xe4 0xe5 + 0xe6 0xe7 /* sdhci, crypto , pci */ + >; + }; + + msi@41600 { + status = "disabled"; + }; + + global-utilities@e0000 { //global utilities block + status = "disabled"; + }; + + }; + + pci0: pcie@ffe09000 { + status = "disabled"; + }; + + pci1: pcie@ffe0a000 { + status = "disabled"; + }; +}; diff --git a/arch/powerpc/boot/dts/p1020si.dtsi b/arch/powerpc/boot/dts/p1020si.dtsi new file mode 100644 index 00000000000..5c5acb66c3f --- /dev/null +++ b/arch/powerpc/boot/dts/p1020si.dtsi @@ -0,0 +1,377 @@ +/* + * P1020si Device Tree Source + * + * Copyright 2011 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/dts-v1/; +/ { + compatible = "fsl,P1020"; + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + PowerPC,P1020@0 { + device_type = "cpu"; + reg = <0x0>; + next-level-cache = <&L2>; + }; + + PowerPC,P1020@1 { + device_type = "cpu"; + reg = <0x1>; + next-level-cache = <&L2>; + }; + }; + + localbus@ffe05000 { + #address-cells = <2>; + #size-cells = <1>; + compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus"; + reg = <0 0xffe05000 0 0x1000>; + interrupts = <19 2>; + interrupt-parent = <&mpic>; + }; + + soc@ffe00000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "soc"; + compatible = "fsl,p1020-immr", "simple-bus"; + ranges = <0x0 0x0 0xffe00000 0x100000>; + bus-frequency = <0>; // Filled out by uboot. + + ecm-law@0 { + compatible = "fsl,ecm-law"; + reg = <0x0 0x1000>; + fsl,num-laws = <12>; + }; + + ecm@1000 { + compatible = "fsl,p1020-ecm", "fsl,ecm"; + reg = <0x1000 0x1000>; + interrupts = <16 2>; + interrupt-parent = <&mpic>; + }; + + memory-controller@2000 { + compatible = "fsl,p1020-memory-controller"; + reg = <0x2000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <16 2>; + }; + + i2c@3000 { + #address-cells = <1>; + #size-cells = <0>; + cell-index = <0>; + compatible = "fsl-i2c"; + reg = <0x3000 0x100>; + interrupts = <43 2>; + interrupt-parent = <&mpic>; + dfsrr; + }; + + i2c@3100 { + #address-cells = <1>; + #size-cells = <0>; + cell-index = <1>; + compatible = "fsl-i2c"; + reg = <0x3100 0x100>; + interrupts = <43 2>; + interrupt-parent = <&mpic>; + dfsrr; + }; + + serial0: serial@4500 { + cell-index = <0>; + device_type = "serial"; + compatible = "ns16550"; + reg = <0x4500 0x100>; + clock-frequency = <0>; + interrupts = <42 2>; + interrupt-parent = <&mpic>; + }; + + serial1: serial@4600 { + cell-index = <1>; + device_type = "serial"; + compatible = "ns16550"; + reg = <0x4600 0x100>; + clock-frequency = <0>; + interrupts = <42 2>; + interrupt-parent = <&mpic>; + }; + + spi@7000 { + cell-index = <0>; + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,espi"; + reg = <0x7000 0x1000>; + interrupts = <59 0x2>; + interrupt-parent = <&mpic>; + mode = "cpu"; + }; + + gpio: gpio-controller@f000 { + #gpio-cells = <2>; + compatible = "fsl,mpc8572-gpio"; + reg = <0xf000 0x100>; + interrupts = <47 0x2>; + interrupt-parent = <&mpic>; + gpio-controller; + }; + + L2: l2-cache-controller@20000 { + compatible = "fsl,p1020-l2-cache-controller"; + reg = <0x20000 0x1000>; + cache-line-size = <32>; // 32 bytes + cache-size = <0x40000>; // L2,256K + interrupt-parent = <&mpic>; + interrupts = <16 2>; + }; + + dma@21300 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,eloplus-dma"; + reg = <0x21300 0x4>; + ranges = <0x0 0x21100 0x200>; + cell-index = <0>; + dma-channel@0 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x0 0x80>; + cell-index = <0>; + interrupt-parent = <&mpic>; + interrupts = <20 2>; + }; + dma-channel@80 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x80 0x80>; + cell-index = <1>; + interrupt-parent = <&mpic>; + interrupts = <21 2>; + }; + dma-channel@100 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x100 0x80>; + cell-index = <2>; + interrupt-parent = <&mpic>; + interrupts = <22 2>; + }; + dma-channel@180 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x180 0x80>; + cell-index = <3>; + interrupt-parent = <&mpic>; + interrupts = <23 2>; + }; + }; + + mdio@24000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,etsec2-mdio"; + reg = <0x24000 0x1000 0xb0030 0x4>; + + }; + + mdio@25000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,etsec2-tbi"; + reg = <0x25000 0x1000 0xb1030 0x4>; + + }; + + enet0: ethernet@b0000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "network"; + model = "eTSEC"; + compatible = "fsl,etsec2"; + fsl,num_rx_queues = <0x8>; + fsl,num_tx_queues = <0x8>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupt-parent = <&mpic>; + + queue-group@0 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0xb0000 0x1000>; + interrupts = <29 2 30 2 34 2>; + }; + + queue-group@1 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0xb4000 0x1000>; + interrupts = <17 2 18 2 24 2>; + }; + }; + + enet1: ethernet@b1000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "network"; + model = "eTSEC"; + compatible = "fsl,etsec2"; + fsl,num_rx_queues = <0x8>; + fsl,num_tx_queues = <0x8>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupt-parent = <&mpic>; + + queue-group@0 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0xb1000 0x1000>; + interrupts = <35 2 36 2 40 2>; + }; + + queue-group@1 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0xb5000 0x1000>; + interrupts = <51 2 52 2 67 2>; + }; + }; + + enet2: ethernet@b2000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "network"; + model = "eTSEC"; + compatible = "fsl,etsec2"; + fsl,num_rx_queues = <0x8>; + fsl,num_tx_queues = <0x8>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupt-parent = <&mpic>; + + queue-group@0 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0xb2000 0x1000>; + interrupts = <31 2 32 2 33 2>; + }; + + queue-group@1 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0xb6000 0x1000>; + interrupts = <25 2 26 2 27 2>; + }; + }; + + usb@22000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl-usb2-dr"; + reg = <0x22000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <28 0x2>; + }; + + /* USB2 is shared with localbus, so it must be disabled + by default. We can't put 'status = "disabled";' here + since U-Boot doesn't clear the status property when + it enables USB2. OTOH, U-Boot does create a new node + when there isn't any. So, just comment it out. + usb@23000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl-usb2-dr"; + reg = <0x23000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <46 0x2>; + phy_type = "ulpi"; + }; + */ + + sdhci@2e000 { + compatible = "fsl,p1020-esdhc", "fsl,esdhc"; + reg = <0x2e000 0x1000>; + interrupts = <72 0x2>; + interrupt-parent = <&mpic>; + /* Filled in by U-Boot */ + clock-frequency = <0>; + }; + + crypto@30000 { + compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4", + "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; + reg = <0x30000 0x10000>; + interrupts = <45 2 58 2>; + interrupt-parent = <&mpic>; + fsl,num-channels = <4>; + fsl,channel-fifo-len = <24>; + fsl,exec-units-mask = <0xbfe>; + fsl,descriptor-types-mask = <0x3ab0ebf>; + }; + + mpic: pic@40000 { + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <2>; + reg = <0x40000 0x40000>; + compatible = "chrp,open-pic"; + device_type = "open-pic"; + }; + + msi@41600 { + compatible = "fsl,p1020-msi", "fsl,mpic-msi"; + reg = <0x41600 0x80>; + msi-available-ranges = <0 0x100>; + interrupts = < + 0xe0 0 + 0xe1 0 + 0xe2 0 + 0xe3 0 + 0xe4 0 + 0xe5 0 + 0xe6 0 + 0xe7 0>; + interrupt-parent = <&mpic>; + }; + + global-utilities@e0000 { //global utilities block + compatible = "fsl,p1020-guts","fsl,p2020-guts"; + reg = <0xe0000 0x1000>; + fsl,has-rstcr; + }; + }; + + pci0: pcie@ffe09000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0 0xffe09000 0 0x1000>; + bus-range = <0 255>; + clock-frequency = <33333333>; + interrupt-parent = <&mpic>; + interrupts = <16 2>; + }; + + pci1: pcie@ffe0a000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0 0xffe0a000 0 0x1000>; + bus-range = <0 255>; + clock-frequency = <33333333>; + interrupt-parent = <&mpic>; + interrupts = <16 2>; + }; +}; diff --git a/arch/powerpc/boot/dts/p1022ds.dts b/arch/powerpc/boot/dts/p1022ds.dts index 59ef405c1c9..4f685a779f4 100644 --- a/arch/powerpc/boot/dts/p1022ds.dts +++ b/arch/powerpc/boot/dts/p1022ds.dts @@ -52,7 +52,7 @@ #size-cells = <1>; compatible = "fsl,p1022-elbc", "fsl,elbc", "simple-bus"; reg = <0 0xffe05000 0 0x1000>; - interrupts = <19 2>; + interrupts = <19 2 0 0>; ranges = <0x0 0x0 0xf 0xe8000000 0x08000000 0x1 0x0 0xf 0xe0000000 0x08000000 @@ -157,7 +157,7 @@ * IRQ8 is generated if the "EVENT" switch is pressed * and PX_CTL[EVESEL] is set to 00. */ - interrupts = <8 8>; + interrupts = <8 8 0 0>; }; }; @@ -178,13 +178,13 @@ ecm@1000 { compatible = "fsl,p1022-ecm", "fsl,ecm"; reg = <0x1000 0x1000>; - interrupts = <16 2>; + interrupts = <16 2 0 0>; }; memory-controller@2000 { compatible = "fsl,p1022-memory-controller"; reg = <0x2000 0x1000>; - interrupts = <16 2>; + interrupts = <16 2 0 0>; }; i2c@3000 { @@ -193,7 +193,7 @@ cell-index = <0>; compatible = "fsl-i2c"; reg = <0x3000 0x100>; - interrupts = <43 2>; + interrupts = <43 2 0 0>; dfsrr; }; @@ -203,7 +203,7 @@ cell-index = <1>; compatible = "fsl-i2c"; reg = <0x3100 0x100>; - interrupts = <43 2>; + interrupts = <43 2 0 0>; dfsrr; wm8776:codec@1a { @@ -220,7 +220,7 @@ compatible = "ns16550"; reg = <0x4500 0x100>; clock-frequency = <0>; - interrupts = <42 2>; + interrupts = <42 2 0 0>; }; serial1: serial@4600 { @@ -229,7 +229,7 @@ compatible = "ns16550"; reg = <0x4600 0x100>; clock-frequency = <0>; - interrupts = <42 2>; + interrupts = <42 2 0 0>; }; spi@7000 { @@ -238,7 +238,7 @@ #size-cells = <0>; compatible = "fsl,espi"; reg = <0x7000 0x1000>; - interrupts = <59 0x2>; + interrupts = <59 0x2 0 0>; espi,num-ss-bits = <4>; mode = "cpu"; @@ -275,7 +275,7 @@ compatible = "fsl,mpc8610-ssi"; cell-index = <0>; reg = <0x15000 0x100>; - interrupts = <75 2>; + interrupts = <75 2 0 0>; fsl,mode = "i2s-slave"; codec-handle = <&wm8776>; fsl,playback-dma = <&dma00>; @@ -294,25 +294,25 @@ compatible = "fsl,ssi-dma-channel"; reg = <0x0 0x80>; cell-index = <0>; - interrupts = <76 2>; + interrupts = <76 2 0 0>; }; dma01: dma-channel@80 { compatible = "fsl,ssi-dma-channel"; reg = <0x80 0x80>; cell-index = <1>; - interrupts = <77 2>; + interrupts = <77 2 0 0>; }; dma-channel@100 { compatible = "fsl,eloplus-dma-channel"; reg = <0x100 0x80>; cell-index = <2>; - interrupts = <78 2>; + interrupts = <78 2 0 0>; }; dma-channel@180 { compatible = "fsl,eloplus-dma-channel"; reg = <0x180 0x80>; cell-index = <3>; - interrupts = <79 2>; + interrupts = <79 2 0 0>; }; }; @@ -320,7 +320,7 @@ #gpio-cells = <2>; compatible = "fsl,mpc8572-gpio"; reg = <0xf000 0x100>; - interrupts = <47 0x2>; + interrupts = <47 0x2 0 0>; gpio-controller; }; @@ -329,7 +329,7 @@ reg = <0x20000 0x1000>; cache-line-size = <32>; // 32 bytes cache-size = <0x40000>; // L2, 256K - interrupts = <16 2>; + interrupts = <16 2 0 0>; }; dma@21300 { @@ -343,25 +343,25 @@ compatible = "fsl,eloplus-dma-channel"; reg = <0x0 0x80>; cell-index = <0>; - interrupts = <20 2>; + interrupts = <20 2 0 0>; }; dma-channel@80 { compatible = "fsl,eloplus-dma-channel"; reg = <0x80 0x80>; cell-index = <1>; - interrupts = <21 2>; + interrupts = <21 2 0 0>; }; dma-channel@100 { compatible = "fsl,eloplus-dma-channel"; reg = <0x100 0x80>; cell-index = <2>; - interrupts = <22 2>; + interrupts = <22 2 0 0>; }; dma-channel@180 { compatible = "fsl,eloplus-dma-channel"; reg = <0x180 0x80>; cell-index = <3>; - interrupts = <23 2>; + interrupts = <23 2 0 0>; }; }; @@ -370,7 +370,7 @@ #size-cells = <0>; compatible = "fsl-usb2-dr"; reg = <0x22000 0x1000>; - interrupts = <28 0x2>; + interrupts = <28 0x2 0 0>; phy_type = "ulpi"; }; @@ -381,11 +381,11 @@ reg = <0x24000 0x1000 0xb0030 0x4>; phy0: ethernet-phy@0 { - interrupts = <3 1>; + interrupts = <3 1 0 0>; reg = <0x1>; }; phy1: ethernet-phy@1 { - interrupts = <9 1>; + interrupts = <9 1 0 0>; reg = <0x2>; }; }; @@ -416,13 +416,13 @@ #address-cells = <1>; #size-cells = <1>; reg = <0xB0000 0x1000>; - interrupts = <29 2 30 2 34 2>; + interrupts = <29 2 0 0 30 2 0 0 34 2 0 0>; }; queue-group@1{ #address-cells = <1>; #size-cells = <1>; reg = <0xB4000 0x1000>; - interrupts = <17 2 18 2 24 2>; + interrupts = <17 2 0 0 18 2 0 0 24 2 0 0>; }; }; @@ -443,20 +443,20 @@ #address-cells = <1>; #size-cells = <1>; reg = <0xB1000 0x1000>; - interrupts = <35 2 36 2 40 2>; + interrupts = <35 2 0 0 36 2 0 0 40 2 0 0>; }; queue-group@1{ #address-cells = <1>; #size-cells = <1>; reg = <0xB5000 0x1000>; - interrupts = <51 2 52 2 67 2>; + interrupts = <51 2 0 0 52 2 0 0 67 2 0 0>; }; }; sdhci@2e000 { compatible = "fsl,p1022-esdhc", "fsl,esdhc"; reg = <0x2e000 0x1000>; - interrupts = <72 0x2>; + interrupts = <72 0x2 0 0>; fsl,sdhci-auto-cmd12; /* Filled in by U-Boot */ clock-frequency = <0>; @@ -467,7 +467,7 @@ "fsl,sec2.4", "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; reg = <0x30000 0x10000>; - interrupts = <45 2 58 2>; + interrupts = <45 2 0 0 58 2 0 0>; fsl,num-channels = <4>; fsl,channel-fifo-len = <24>; fsl,exec-units-mask = <0x97c>; @@ -478,14 +478,14 @@ compatible = "fsl,p1022-sata", "fsl,pq-sata-v2"; reg = <0x18000 0x1000>; cell-index = <1>; - interrupts = <74 0x2>; + interrupts = <74 0x2 0 0>; }; sata@19000 { compatible = "fsl,p1022-sata", "fsl,pq-sata-v2"; reg = <0x19000 0x1000>; cell-index = <2>; - interrupts = <41 0x2>; + interrupts = <41 0x2 0 0>; }; power@e0070{ @@ -496,21 +496,33 @@ display@10000 { compatible = "fsl,diu", "fsl,p1022-diu"; reg = <0x10000 1000>; - interrupts = <64 2>; + interrupts = <64 2 0 0>; }; timer@41100 { compatible = "fsl,mpic-global-timer"; - reg = <0x41100 0x204>; - interrupts = <0xf7 0x2>; + reg = <0x41100 0x100 0x41300 4>; + interrupts = <0 0 3 0 + 1 0 3 0 + 2 0 3 0 + 3 0 3 0>; + }; + + timer@42100 { + compatible = "fsl,mpic-global-timer"; + reg = <0x42100 0x100 0x42300 4>; + interrupts = <4 0 3 0 + 5 0 3 0 + 6 0 3 0 + 7 0 3 0>; }; mpic: pic@40000 { interrupt-controller; #address-cells = <0>; - #interrupt-cells = <2>; + #interrupt-cells = <4>; reg = <0x40000 0x40000>; - compatible = "chrp,open-pic"; + compatible = "fsl,mpic"; device_type = "open-pic"; }; @@ -519,14 +531,14 @@ reg = <0x41600 0x80>; msi-available-ranges = <0 0x100>; interrupts = < - 0xe0 0 - 0xe1 0 - 0xe2 0 - 0xe3 0 - 0xe4 0 - 0xe5 0 - 0xe6 0 - 0xe7 0>; + 0xe0 0 0 0 + 0xe1 0 0 0 + 0xe2 0 0 0 + 0xe3 0 0 0 + 0xe4 0 0 0 + 0xe5 0 0 0 + 0xe6 0 0 0 + 0xe7 0 0 0>; }; global-utilities@e0000 { //global utilities block @@ -547,7 +559,7 @@ ranges = <0x2000000 0x0 0xa0000000 0xc 0x20000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>; clock-frequency = <33333333>; - interrupts = <16 2>; + interrupts = <16 2 0 0>; interrupt-map-mask = <0xf800 0 0 7>; interrupt-map = < /* IDSEL 0x0 */ @@ -582,7 +594,7 @@ ranges = <0x2000000 0x0 0xc0000000 0xc 0x40000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0xf 0xffc20000 0x0 0x10000>; clock-frequency = <33333333>; - interrupts = <16 2>; + interrupts = <16 2 0 0>; interrupt-map-mask = <0xf800 0 0 7>; interrupt-map = < /* IDSEL 0x0 */ @@ -618,7 +630,7 @@ ranges = <0x2000000 0x0 0x80000000 0xc 0x00000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>; clock-frequency = <33333333>; - interrupts = <16 2>; + interrupts = <16 2 0 0>; interrupt-map-mask = <0xf800 0 0 7>; interrupt-map = < /* IDSEL 0x0 */ diff --git a/arch/powerpc/boot/dts/p2020ds.dts b/arch/powerpc/boot/dts/p2020ds.dts index 11019142813..2bcf3683d22 100644 --- a/arch/powerpc/boot/dts/p2020ds.dts +++ b/arch/powerpc/boot/dts/p2020ds.dts @@ -1,7 +1,7 @@ /* * P2020 DS Device Tree Source * - * Copyright 2009 Freescale Semiconductor Inc. + * Copyright 2009-2011 Freescale Semiconductor Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -9,12 +9,11 @@ * option) any later version. */ -/dts-v1/; +/include/ "p2020si.dtsi" + / { - model = "fsl,P2020"; + model = "fsl,P2020DS"; compatible = "fsl,P2020DS"; - #address-cells = <2>; - #size-cells = <2>; aliases { ethernet0 = &enet0; @@ -27,35 +26,13 @@ pci2 = &pci2; }; - cpus { - #address-cells = <1>; - #size-cells = <0>; - - PowerPC,P2020@0 { - device_type = "cpu"; - reg = <0x0>; - next-level-cache = <&L2>; - }; - - PowerPC,P2020@1 { - device_type = "cpu"; - reg = <0x1>; - next-level-cache = <&L2>; - }; - }; memory { device_type = "memory"; }; localbus@ffe05000 { - #address-cells = <2>; - #size-cells = <1>; compatible = "fsl,elbc", "simple-bus"; - reg = <0 0xffe05000 0 0x1000>; - interrupts = <19 2>; - interrupt-parent = <&mpic>; - ranges = <0x0 0x0 0x0 0xe8000000 0x08000000 0x1 0x0 0x0 0xe0000000 0x08000000 0x2 0x0 0x0 0xffa00000 0x00040000 @@ -158,352 +135,77 @@ }; soc@ffe00000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "soc"; - compatible = "fsl,p2020-immr", "simple-bus"; - ranges = <0x0 0 0xffe00000 0x100000>; - bus-frequency = <0>; // Filled out by uboot. - - ecm-law@0 { - compatible = "fsl,ecm-law"; - reg = <0x0 0x1000>; - fsl,num-laws = <12>; - }; - - ecm@1000 { - compatible = "fsl,p2020-ecm", "fsl,ecm"; - reg = <0x1000 0x1000>; - interrupts = <17 2>; - interrupt-parent = <&mpic>; - }; - - memory-controller@2000 { - compatible = "fsl,p2020-memory-controller"; - reg = <0x2000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <18 2>; - }; - - i2c@3000 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <0>; - compatible = "fsl-i2c"; - reg = <0x3000 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; - }; - - i2c@3100 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <1>; - compatible = "fsl-i2c"; - reg = <0x3100 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; - }; - serial0: serial@4500 { - cell-index = <0>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4500 0x100>; - clock-frequency = <0>; - interrupts = <42 2>; - interrupt-parent = <&mpic>; - }; - - serial1: serial@4600 { - cell-index = <1>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4600 0x100>; - clock-frequency = <0>; - interrupts = <42 2>; - interrupt-parent = <&mpic>; - }; - - spi@7000 { - compatible = "fsl,espi"; - reg = <0x7000 0x1000>; - interrupts = <59 0x2>; - interrupt-parent = <&mpic>; + usb@22000 { + phy_type = "ulpi"; }; - dma@c300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,eloplus-dma"; - reg = <0xc300 0x4>; - ranges = <0x0 0xc100 0x200>; - cell-index = <1>; - dma-channel@0 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - cell-index = <0>; + mdio@24520 { + phy0: ethernet-phy@0 { interrupt-parent = <&mpic>; - interrupts = <76 2>; + interrupts = <3 1>; + reg = <0x0>; }; - dma-channel@80 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; + phy1: ethernet-phy@1 { interrupt-parent = <&mpic>; - interrupts = <77 2>; + interrupts = <3 1>; + reg = <0x1>; }; - dma-channel@100 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; + phy2: ethernet-phy@2 { interrupt-parent = <&mpic>; - interrupts = <78 2>; + interrupts = <3 1>; + reg = <0x2>; }; - dma-channel@180 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; - interrupt-parent = <&mpic>; - interrupts = <79 2>; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; }; - }; - gpio: gpio-controller@f000 { - #gpio-cells = <2>; - compatible = "fsl,mpc8572-gpio"; - reg = <0xf000 0x100>; - interrupts = <47 0x2>; - interrupt-parent = <&mpic>; - gpio-controller; }; - L2: l2-cache-controller@20000 { - compatible = "fsl,p2020-l2-cache-controller"; - reg = <0x20000 0x1000>; - cache-line-size = <32>; // 32 bytes - cache-size = <0x80000>; // L2, 512k - interrupt-parent = <&mpic>; - interrupts = <16 2>; + mdio@25520 { + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; - dma@21300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,eloplus-dma"; - reg = <0x21300 0x4>; - ranges = <0x0 0x21100 0x200>; - cell-index = <0>; - dma-channel@0 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - cell-index = <0>; - interrupt-parent = <&mpic>; - interrupts = <20 2>; - }; - dma-channel@80 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; - interrupt-parent = <&mpic>; - interrupts = <21 2>; + mdio@26520 { + tbi2: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; }; - dma-channel@100 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; - interrupt-parent = <&mpic>; - interrupts = <22 2>; - }; - dma-channel@180 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; - interrupt-parent = <&mpic>; - interrupts = <23 2>; - }; - }; - usb@22000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl-usb2-dr"; - reg = <0x22000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <28 0x2>; - phy_type = "ulpi"; }; enet0: ethernet@24000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <0>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x24000 0x1000>; - ranges = <0x0 0x24000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <29 2 30 2 34 2>; - interrupt-parent = <&mpic>; tbi-handle = <&tbi0>; phy-handle = <&phy0>; phy-connection-type = "rgmii-id"; - - mdio@520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-mdio"; - reg = <0x520 0x20>; - - phy0: ethernet-phy@0 { - interrupt-parent = <&mpic>; - interrupts = <3 1>; - reg = <0x0>; - }; - phy1: ethernet-phy@1 { - interrupt-parent = <&mpic>; - interrupts = <3 1>; - reg = <0x1>; - }; - phy2: ethernet-phy@2 { - interrupt-parent = <&mpic>; - interrupts = <3 1>; - reg = <0x2>; - }; - tbi0: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; }; enet1: ethernet@25000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <1>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x25000 0x1000>; - ranges = <0x0 0x25000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <35 2 36 2 40 2>; - interrupt-parent = <&mpic>; tbi-handle = <&tbi1>; phy-handle = <&phy1>; phy-connection-type = "rgmii-id"; - mdio@520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-tbi"; - reg = <0x520 0x20>; - - tbi1: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; }; enet2: ethernet@26000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <2>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x26000 0x1000>; - ranges = <0x0 0x26000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <31 2 32 2 33 2>; - interrupt-parent = <&mpic>; tbi-handle = <&tbi2>; phy-handle = <&phy2>; phy-connection-type = "rgmii-id"; - - mdio@520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-tbi"; - reg = <0x520 0x20>; - - tbi2: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; - }; - - sdhci@2e000 { - compatible = "fsl,p2020-esdhc", "fsl,esdhc"; - reg = <0x2e000 0x1000>; - interrupts = <72 0x2>; - interrupt-parent = <&mpic>; - /* Filled in by U-Boot */ - clock-frequency = <0>; - }; - - crypto@30000 { - compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4", - "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; - reg = <0x30000 0x10000>; - interrupts = <45 2 58 2>; - interrupt-parent = <&mpic>; - fsl,num-channels = <4>; - fsl,channel-fifo-len = <24>; - fsl,exec-units-mask = <0xbfe>; - fsl,descriptor-types-mask = <0x3ab0ebf>; }; - mpic: pic@40000 { - interrupt-controller; - #address-cells = <0>; - #interrupt-cells = <2>; - reg = <0x40000 0x40000>; - compatible = "chrp,open-pic"; - device_type = "open-pic"; - }; msi@41600 { compatible = "fsl,mpic-msi"; - reg = <0x41600 0x80>; - msi-available-ranges = <0 0x100>; - interrupts = < - 0xe0 0 - 0xe1 0 - 0xe2 0 - 0xe3 0 - 0xe4 0 - 0xe5 0 - 0xe6 0 - 0xe7 0>; - interrupt-parent = <&mpic>; - }; - - global-utilities@e0000 { //global utilities block - compatible = "fsl,p2020-guts"; - reg = <0xe0000 0x1000>; - fsl,has-rstcr; }; }; pci0: pcie@ffe08000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe08000 0 0x1000>; - bus-range = <0 255>; ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <24 2>; interrupt-map-mask = <0xf800 0x0 0x0 0x7>; interrupt-map = < /* IDSEL 0x0 */ @@ -528,18 +230,8 @@ }; pci1: pcie@ffe09000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe09000 0 0x1000>; - bus-range = <0 255>; ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <25 2>; interrupt-map-mask = <0xff00 0x0 0x0 0x7>; interrupt-map = < @@ -667,18 +359,8 @@ }; pci2: pcie@ffe0a000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe0a000 0 0x1000>; - bus-range = <0 255>; ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <26 2>; interrupt-map-mask = <0xf800 0x0 0x0 0x7>; interrupt-map = < /* IDSEL 0x0 */ diff --git a/arch/powerpc/boot/dts/p2020rdb.dts b/arch/powerpc/boot/dts/p2020rdb.dts index e2d48fd4416..3782a58f13b 100644 --- a/arch/powerpc/boot/dts/p2020rdb.dts +++ b/arch/powerpc/boot/dts/p2020rdb.dts @@ -9,12 +9,11 @@ * option) any later version. */ -/dts-v1/; +/include/ "p2020si.dtsi" + / { - model = "fsl,P2020"; + model = "fsl,P2020RDB"; compatible = "fsl,P2020RDB"; - #address-cells = <2>; - #size-cells = <2>; aliases { ethernet0 = &enet0; @@ -26,34 +25,11 @@ pci1 = &pci1; }; - cpus { - #address-cells = <1>; - #size-cells = <0>; - - PowerPC,P2020@0 { - device_type = "cpu"; - reg = <0x0>; - next-level-cache = <&L2>; - }; - - PowerPC,P2020@1 { - device_type = "cpu"; - reg = <0x1>; - next-level-cache = <&L2>; - }; - }; - memory { device_type = "memory"; }; localbus@ffe05000 { - #address-cells = <2>; - #size-cells = <1>; - compatible = "fsl,p2020-elbc", "fsl,elbc", "simple-bus"; - reg = <0 0xffe05000 0 0x1000>; - interrupts = <19 2>; - interrupt-parent = <&mpic>; /* NOR and NAND Flashes */ ranges = <0x0 0x0 0x0 0xef000000 0x01000000 @@ -165,90 +141,16 @@ }; soc@ffe00000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "soc"; - compatible = "fsl,p2020-immr", "simple-bus"; - ranges = <0x0 0x0 0xffe00000 0x100000>; - bus-frequency = <0>; // Filled out by uboot. - - ecm-law@0 { - compatible = "fsl,ecm-law"; - reg = <0x0 0x1000>; - fsl,num-laws = <12>; - }; - - ecm@1000 { - compatible = "fsl,p2020-ecm", "fsl,ecm"; - reg = <0x1000 0x1000>; - interrupts = <17 2>; - interrupt-parent = <&mpic>; - }; - - memory-controller@2000 { - compatible = "fsl,p2020-memory-controller"; - reg = <0x2000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <18 2>; - }; - i2c@3000 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <0>; - compatible = "fsl-i2c"; - reg = <0x3000 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; rtc@68 { compatible = "dallas,ds1339"; reg = <0x68>; }; }; - i2c@3100 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <1>; - compatible = "fsl-i2c"; - reg = <0x3100 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; - }; - - serial0: serial@4500 { - cell-index = <0>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4500 0x100>; - clock-frequency = <0>; - interrupts = <42 2>; - interrupt-parent = <&mpic>; - }; - - serial1: serial@4600 { - cell-index = <1>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4600 0x100>; - clock-frequency = <0>; - interrupts = <42 2>; - interrupt-parent = <&mpic>; - }; + spi@7000 { - spi@7000 { - cell-index = <0>; - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,espi"; - reg = <0x7000 0x1000>; - interrupts = <59 0x2>; - interrupt-parent = <&mpic>; - mode = "cpu"; - - fsl_m25p80@0 { + fsl_m25p80@0 { #address-cells = <1>; #size-cells = <1>; compatible = "fsl,espi-flash"; @@ -294,254 +196,68 @@ }; }; - dma@c300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,eloplus-dma"; - reg = <0xc300 0x4>; - ranges = <0x0 0xc100 0x200>; - cell-index = <1>; - dma-channel@0 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - cell-index = <0>; - interrupt-parent = <&mpic>; - interrupts = <76 2>; - }; - dma-channel@80 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; - interrupt-parent = <&mpic>; - interrupts = <77 2>; - }; - dma-channel@100 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; - interrupt-parent = <&mpic>; - interrupts = <78 2>; - }; - dma-channel@180 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; - interrupt-parent = <&mpic>; - interrupts = <79 2>; - }; - }; - - gpio: gpio-controller@f000 { - #gpio-cells = <2>; - compatible = "fsl,mpc8572-gpio"; - reg = <0xf000 0x100>; - interrupts = <47 0x2>; - interrupt-parent = <&mpic>; - gpio-controller; - }; - - L2: l2-cache-controller@20000 { - compatible = "fsl,p2020-l2-cache-controller"; - reg = <0x20000 0x1000>; - cache-line-size = <32>; // 32 bytes - cache-size = <0x80000>; // L2,512K - interrupt-parent = <&mpic>; - interrupts = <16 2>; + usb@22000 { + phy_type = "ulpi"; }; - dma@21300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,eloplus-dma"; - reg = <0x21300 0x4>; - ranges = <0x0 0x21100 0x200>; - cell-index = <0>; - dma-channel@0 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - cell-index = <0>; + mdio@24520 { + phy0: ethernet-phy@0 { interrupt-parent = <&mpic>; - interrupts = <20 2>; - }; - dma-channel@80 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; - interrupt-parent = <&mpic>; - interrupts = <21 2>; - }; - dma-channel@100 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; - interrupt-parent = <&mpic>; - interrupts = <22 2>; - }; - dma-channel@180 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; + interrupts = <3 1>; + reg = <0x0>; + }; + phy1: ethernet-phy@1 { interrupt-parent = <&mpic>; - interrupts = <23 2>; + interrupts = <3 1>; + reg = <0x1>; + }; + }; + + mdio@25520 { + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; }; }; - usb@22000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl-usb2-dr"; - reg = <0x22000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <28 0x2>; - phy_type = "ulpi"; + mdio@26520 { + status = "disabled"; }; enet0: ethernet@24000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <0>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x24000 0x1000>; - ranges = <0x0 0x24000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <29 2 30 2 34 2>; - interrupt-parent = <&mpic>; fixed-link = <1 1 1000 0 0>; phy-connection-type = "rgmii-id"; - - mdio@520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-mdio"; - reg = <0x520 0x20>; - - phy0: ethernet-phy@0 { - interrupt-parent = <&mpic>; - interrupts = <3 1>; - reg = <0x0>; - }; - phy1: ethernet-phy@1 { - interrupt-parent = <&mpic>; - interrupts = <3 1>; - reg = <0x1>; - }; - }; }; enet1: ethernet@25000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <1>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x25000 0x1000>; - ranges = <0x0 0x25000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <35 2 36 2 40 2>; - interrupt-parent = <&mpic>; tbi-handle = <&tbi0>; phy-handle = <&phy0>; phy-connection-type = "sgmii"; - - mdio@520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-tbi"; - reg = <0x520 0x20>; - - tbi0: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; }; enet2: ethernet@26000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <2>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x26000 0x1000>; - ranges = <0x0 0x26000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <31 2 32 2 33 2>; - interrupt-parent = <&mpic>; phy-handle = <&phy1>; phy-connection-type = "rgmii-id"; }; - sdhci@2e000 { - compatible = "fsl,p2020-esdhc", "fsl,esdhc"; - reg = <0x2e000 0x1000>; - interrupts = <72 0x2>; - interrupt-parent = <&mpic>; - /* Filled in by U-Boot */ - clock-frequency = <0>; - }; - - crypto@30000 { - compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4", - "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; - reg = <0x30000 0x10000>; - interrupts = <45 2 58 2>; - interrupt-parent = <&mpic>; - fsl,num-channels = <4>; - fsl,channel-fifo-len = <24>; - fsl,exec-units-mask = <0xbfe>; - fsl,descriptor-types-mask = <0x3ab0ebf>; - }; - - mpic: pic@40000 { - interrupt-controller; - #address-cells = <0>; - #interrupt-cells = <2>; - reg = <0x40000 0x40000>; - compatible = "chrp,open-pic"; - device_type = "open-pic"; - }; - - msi@41600 { - compatible = "fsl,p2020-msi", "fsl,mpic-msi"; - reg = <0x41600 0x80>; - msi-available-ranges = <0 0x100>; - interrupts = < - 0xe0 0 - 0xe1 0 - 0xe2 0 - 0xe3 0 - 0xe4 0 - 0xe5 0 - 0xe6 0 - 0xe7 0>; - interrupt-parent = <&mpic>; - }; + }; - global-utilities@e0000 { //global utilities block - compatible = "fsl,p2020-guts"; - reg = <0xe0000 0x1000>; - fsl,has-rstcr; - }; + pci0: pcie@ffe08000 { + status = "disabled"; }; - pci0: pcie@ffe09000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe09000 0 0x1000>; - bus-range = <0 255>; + pci1: pcie@ffe09000 { ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <25 2>; - pcie@0 { + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x4 0x1 + 0000 0x0 0x0 0x2 &mpic 0x5 0x1 + 0000 0x0 0x0 0x3 &mpic 0x6 0x1 + 0000 0x0 0x0 0x4 &mpic 0x7 0x1 + >; + pcie@0 { reg = <0x0 0x0 0x0 0x0 0x0>; #size-cells = <2>; #address-cells = <3>; @@ -556,19 +272,17 @@ }; }; - pci1: pcie@ffe0a000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe0a000 0 0x1000>; - bus-range = <0 255>; + pci2: pcie@ffe0a000 { ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <26 2>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x0 0x1 + 0000 0x0 0x0 0x2 &mpic 0x1 0x1 + 0000 0x0 0x0 0x3 &mpic 0x2 0x1 + 0000 0x0 0x0 0x4 &mpic 0x3 0x1 + >; pcie@0 { reg = <0x0 0x0 0x0 0x0 0x0>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts index b69c3a5dc85..fc8ddddfccb 100644 --- a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts +++ b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts @@ -14,12 +14,11 @@ * option) any later version. */ -/dts-v1/; +/include/ "p2020si.dtsi" + / { - model = "fsl,P2020"; + model = "fsl,P2020RDB"; compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP"; - #address-cells = <2>; - #size-cells = <2>; aliases { ethernet1 = &enet1; @@ -29,91 +28,33 @@ }; cpus { - #address-cells = <1>; - #size-cells = <0>; - - PowerPC,P2020@0 { - device_type = "cpu"; - reg = <0x0>; - next-level-cache = <&L2>; + PowerPC,P2020@1 { + status = "disabled"; }; + }; memory { device_type = "memory"; }; - soc@ffe00000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "soc"; - compatible = "fsl,p2020-immr", "simple-bus"; - ranges = <0x0 0x0 0xffe00000 0x100000>; - bus-frequency = <0>; // Filled out by uboot. - - ecm-law@0 { - compatible = "fsl,ecm-law"; - reg = <0x0 0x1000>; - fsl,num-laws = <12>; - }; - - ecm@1000 { - compatible = "fsl,p2020-ecm", "fsl,ecm"; - reg = <0x1000 0x1000>; - interrupts = <17 2>; - interrupt-parent = <&mpic>; - }; - - memory-controller@2000 { - compatible = "fsl,p2020-memory-controller"; - reg = <0x2000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <18 2>; - }; + localbus@ffe05000 { + status = "disabled"; + }; + soc@ffe00000 { i2c@3000 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <0>; - compatible = "fsl-i2c"; - reg = <0x3000 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; rtc@68 { compatible = "dallas,ds1339"; reg = <0x68>; }; }; - i2c@3100 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <1>; - compatible = "fsl-i2c"; - reg = <0x3100 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; - }; - - serial0: serial@4500 { - cell-index = <0>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4500 0x100>; - clock-frequency = <0>; + serial1: serial@4600 { + status = "disabled"; }; spi@7000 { - cell-index = <0>; - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,espi"; - reg = <0x7000 0x1000>; - interrupts = <59 0x2>; - interrupt-parent = <&mpic>; - mode = "cpu"; fsl_m25p80@0 { #address-cells = <1>; @@ -161,76 +102,15 @@ }; }; - gpio: gpio-controller@f000 { - #gpio-cells = <2>; - compatible = "fsl,mpc8572-gpio"; - reg = <0xf000 0x100>; - interrupts = <47 0x2>; - interrupt-parent = <&mpic>; - gpio-controller; - }; - - L2: l2-cache-controller@20000 { - compatible = "fsl,p2020-l2-cache-controller"; - reg = <0x20000 0x1000>; - cache-line-size = <32>; // 32 bytes - cache-size = <0x80000>; // L2,512K - interrupt-parent = <&mpic>; - interrupts = <16 2>; - }; - - dma@21300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,eloplus-dma"; - reg = <0x21300 0x4>; - ranges = <0x0 0x21100 0x200>; - cell-index = <0>; - dma-channel@0 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - cell-index = <0>; - interrupt-parent = <&mpic>; - interrupts = <20 2>; - }; - dma-channel@80 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; - interrupt-parent = <&mpic>; - interrupts = <21 2>; - }; - dma-channel@100 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; - interrupt-parent = <&mpic>; - interrupts = <22 2>; - }; - dma-channel@180 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; - interrupt-parent = <&mpic>; - interrupts = <23 2>; - }; + dma@c300 { + status = "disabled"; }; usb@22000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl-usb2-dr"; - reg = <0x22000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <28 0x2>; phy_type = "ulpi"; }; mdio@24520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-mdio"; - reg = <0x24520 0x20>; phy0: ethernet-phy@0 { interrupt-parent = <&mpic>; @@ -245,29 +125,21 @@ }; mdio@25520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-tbi"; - reg = <0x26520 0x20>; - tbi0: tbi-phy@11 { reg = <0x11>; device_type = "tbi-phy"; }; }; + mdio@26520 { + status = "disabled"; + }; + + enet0: ethernet@24000 { + status = "disabled"; + }; + enet1: ethernet@25000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <1>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x25000 0x1000>; - ranges = <0x0 0x25000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <35 2 36 2 40 2>; - interrupt-parent = <&mpic>; tbi-handle = <&tbi0>; phy-handle = <&phy0>; phy-connection-type = "sgmii"; @@ -275,49 +147,12 @@ }; enet2: ethernet@26000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <2>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x26000 0x1000>; - ranges = <0x0 0x26000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <31 2 32 2 33 2>; - interrupt-parent = <&mpic>; phy-handle = <&phy1>; phy-connection-type = "rgmii-id"; }; - sdhci@2e000 { - compatible = "fsl,p2020-esdhc", "fsl,esdhc"; - reg = <0x2e000 0x1000>; - interrupts = <72 0x2>; - interrupt-parent = <&mpic>; - /* Filled in by U-Boot */ - clock-frequency = <0>; - }; - - crypto@30000 { - compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4", - "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; - reg = <0x30000 0x10000>; - interrupts = <45 2 58 2>; - interrupt-parent = <&mpic>; - fsl,num-channels = <4>; - fsl,channel-fifo-len = <24>; - fsl,exec-units-mask = <0xbfe>; - fsl,descriptor-types-mask = <0x3ab0ebf>; - }; mpic: pic@40000 { - interrupt-controller; - #address-cells = <0>; - #interrupt-cells = <2>; - reg = <0x40000 0x40000>; - compatible = "chrp,open-pic"; - device_type = "open-pic"; protected-sources = < 42 76 77 78 79 /* serial1 , dma2 */ 29 30 34 26 /* enet0, pci1 */ @@ -326,26 +161,28 @@ >; }; - global-utilities@e0000 { - compatible = "fsl,p2020-guts"; - reg = <0xe0000 0x1000>; - fsl,has-rstcr; + msi@41600 { + status = "disabled"; }; + + }; - pci0: pcie@ffe09000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe09000 0 0x1000>; - bus-range = <0 255>; + pci0: pcie@ffe08000 { + status = "disabled"; + }; + + pci1: pcie@ffe09000 { ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <25 2>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x4 0x1 + 0000 0x0 0x0 0x2 &mpic 0x5 0x1 + 0000 0x0 0x0 0x3 &mpic 0x6 0x1 + 0000 0x0 0x0 0x4 &mpic 0x7 0x1 + >; pcie@0 { reg = <0x0 0x0 0x0 0x0 0x0>; #size-cells = <2>; @@ -360,4 +197,8 @@ 0x0 0x100000>; }; }; + + pci2: pcie@ffe0a000 { + status = "disabled"; + }; }; diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts index 7a31d46c01b..261c34ba45e 100644 --- a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts +++ b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts @@ -15,27 +15,21 @@ * option) any later version. */ -/dts-v1/; +/include/ "p2020si.dtsi" + / { - model = "fsl,P2020"; + model = "fsl,P2020RDB"; compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP"; - #address-cells = <2>; - #size-cells = <2>; aliases { ethernet0 = &enet0; - serial0 = &serial0; + serial0 = &serial1; pci1 = &pci1; }; cpus { - #address-cells = <1>; - #size-cells = <0>; - - PowerPC,P2020@1 { - device_type = "cpu"; - reg = <0x1>; - next-level-cache = <&L2>; + PowerPC,P2020@0 { + status = "disabled"; }; }; @@ -43,20 +37,37 @@ device_type = "memory"; }; + localbus@ffe05000 { + status = "disabled"; + }; + soc@ffe00000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "soc"; - compatible = "fsl,p2020-immr", "simple-bus"; - ranges = <0x0 0x0 0xffe00000 0x100000>; - bus-frequency = <0>; // Filled out by uboot. - - serial0: serial@4600 { - cell-index = <1>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4600 0x100>; - clock-frequency = <0>; + ecm-law@0 { + status = "disabled"; + }; + + ecm@1000 { + status = "disabled"; + }; + + memory-controller@2000 { + status = "disabled"; + }; + + i2c@3000 { + status = "disabled"; + }; + + i2c@3100 { + status = "disabled"; + }; + + serial0: serial@4500 { + status = "disabled"; + }; + + spi@7000 { + status = "disabled"; }; dma@c300 { @@ -96,6 +107,10 @@ }; }; + gpio: gpio-controller@f000 { + status = "disabled"; + }; + L2: l2-cache-controller@20000 { compatible = "fsl,p2020-l2-cache-controller"; reg = <0x20000 0x1000>; @@ -104,31 +119,49 @@ interrupt-parent = <&mpic>; }; + dma@21300 { + status = "disabled"; + }; + + usb@22000 { + status = "disabled"; + }; + + mdio@24520 { + status = "disabled"; + }; + + mdio@25520 { + status = "disabled"; + }; + + mdio@26520 { + status = "disabled"; + }; enet0: ethernet@24000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <0>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x24000 0x1000>; - ranges = <0x0 0x24000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <29 2 30 2 34 2>; - interrupt-parent = <&mpic>; fixed-link = <1 1 1000 0 0>; phy-connection-type = "rgmii-id"; }; + enet1: ethernet@25000 { + status = "disabled"; + }; + + enet2: ethernet@26000 { + status = "disabled"; + }; + + sdhci@2e000 { + status = "disabled"; + }; + + crypto@30000 { + status = "disabled"; + }; + mpic: pic@40000 { - interrupt-controller; - #address-cells = <0>; - #interrupt-cells = <2>; - reg = <0x40000 0x40000>; - compatible = "chrp,open-pic"; - device_type = "open-pic"; protected-sources = < 17 18 43 42 59 47 /*ecm, mem, i2c, serial0, spi,gpio */ 16 20 21 22 23 28 /* L2, dma1, USB */ @@ -152,21 +185,32 @@ 0xe7 0>; interrupt-parent = <&mpic>; }; + + global-utilities@e0000 { //global utilities block + status = "disabled"; + }; + }; - pci1: pcie@ffe0a000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe0a000 0 0x1000>; - bus-range = <0 255>; + pci0: pcie@ffe08000 { + status = "disabled"; + }; + + pci1: pcie@ffe09000 { + status = "disabled"; + }; + + pci2: pcie@ffe0a000 { ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <26 2>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x0 0x1 + 0000 0x0 0x0 0x2 &mpic 0x1 0x1 + 0000 0x0 0x0 0x3 &mpic 0x2 0x1 + 0000 0x0 0x0 0x4 &mpic 0x3 0x1 + >; pcie@0 { reg = <0x0 0x0 0x0 0x0 0x0>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/p2020si.dtsi b/arch/powerpc/boot/dts/p2020si.dtsi new file mode 100644 index 00000000000..6def17f265d --- /dev/null +++ b/arch/powerpc/boot/dts/p2020si.dtsi @@ -0,0 +1,382 @@ +/* + * P2020 Device Tree Source + * + * Copyright 2011 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/dts-v1/; +/ { + compatible = "fsl,P2020"; + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + PowerPC,P2020@0 { + device_type = "cpu"; + reg = <0x0>; + next-level-cache = <&L2>; + }; + + PowerPC,P2020@1 { + device_type = "cpu"; + reg = <0x1>; + next-level-cache = <&L2>; + }; + }; + + localbus@ffe05000 { + #address-cells = <2>; + #size-cells = <1>; + compatible = "fsl,p2020-elbc", "fsl,elbc", "simple-bus"; + reg = <0 0xffe05000 0 0x1000>; + interrupts = <19 2>; + interrupt-parent = <&mpic>; + }; + + soc@ffe00000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "soc"; + compatible = "fsl,p2020-immr", "simple-bus"; + ranges = <0x0 0x0 0xffe00000 0x100000>; + bus-frequency = <0>; // Filled out by uboot. + + ecm-law@0 { + compatible = "fsl,ecm-law"; + reg = <0x0 0x1000>; + fsl,num-laws = <12>; + }; + + ecm@1000 { + compatible = "fsl,p2020-ecm", "fsl,ecm"; + reg = <0x1000 0x1000>; + interrupts = <17 2>; + interrupt-parent = <&mpic>; + }; + + memory-controller@2000 { + compatible = "fsl,p2020-memory-controller"; + reg = <0x2000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <18 2>; + }; + + i2c@3000 { + #address-cells = <1>; + #size-cells = <0>; + cell-index = <0>; + compatible = "fsl-i2c"; + reg = <0x3000 0x100>; + interrupts = <43 2>; + interrupt-parent = <&mpic>; + dfsrr; + }; + + i2c@3100 { + #address-cells = <1>; + #size-cells = <0>; + cell-index = <1>; + compatible = "fsl-i2c"; + reg = <0x3100 0x100>; + interrupts = <43 2>; + interrupt-parent = <&mpic>; + dfsrr; + }; + + serial0: serial@4500 { + cell-index = <0>; + device_type = "serial"; + compatible = "ns16550"; + reg = <0x4500 0x100>; + clock-frequency = <0>; + interrupts = <42 2>; + interrupt-parent = <&mpic>; + }; + + serial1: serial@4600 { + cell-index = <1>; + device_type = "serial"; + compatible = "ns16550"; + reg = <0x4600 0x100>; + clock-frequency = <0>; + interrupts = <42 2>; + interrupt-parent = <&mpic>; + }; + + spi@7000 { + cell-index = <0>; + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,espi"; + reg = <0x7000 0x1000>; + interrupts = <59 0x2>; + interrupt-parent = <&mpic>; + mode = "cpu"; + }; + + dma@c300 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,eloplus-dma"; + reg = <0xc300 0x4>; + ranges = <0x0 0xc100 0x200>; + cell-index = <1>; + dma-channel@0 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x0 0x80>; + cell-index = <0>; + interrupt-parent = <&mpic>; + interrupts = <76 2>; + }; + dma-channel@80 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x80 0x80>; + cell-index = <1>; + interrupt-parent = <&mpic>; + interrupts = <77 2>; + }; + dma-channel@100 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x100 0x80>; + cell-index = <2>; + interrupt-parent = <&mpic>; + interrupts = <78 2>; + }; + dma-channel@180 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x180 0x80>; + cell-index = <3>; + interrupt-parent = <&mpic>; + interrupts = <79 2>; + }; + }; + + gpio: gpio-controller@f000 { + #gpio-cells = <2>; + compatible = "fsl,mpc8572-gpio"; + reg = <0xf000 0x100>; + interrupts = <47 0x2>; + interrupt-parent = <&mpic>; + gpio-controller; + }; + + L2: l2-cache-controller@20000 { + compatible = "fsl,p2020-l2-cache-controller"; + reg = <0x20000 0x1000>; + cache-line-size = <32>; // 32 bytes + cache-size = <0x80000>; // L2,512K + interrupt-parent = <&mpic>; + interrupts = <16 2>; + }; + + dma@21300 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,eloplus-dma"; + reg = <0x21300 0x4>; + ranges = <0x0 0x21100 0x200>; + cell-index = <0>; + dma-channel@0 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x0 0x80>; + cell-index = <0>; + interrupt-parent = <&mpic>; + interrupts = <20 2>; + }; + dma-channel@80 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x80 0x80>; + cell-index = <1>; + interrupt-parent = <&mpic>; + interrupts = <21 2>; + }; + dma-channel@100 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x100 0x80>; + cell-index = <2>; + interrupt-parent = <&mpic>; + interrupts = <22 2>; + }; + dma-channel@180 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x180 0x80>; + cell-index = <3>; + interrupt-parent = <&mpic>; + interrupts = <23 2>; + }; + }; + + usb@22000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl-usb2-dr"; + reg = <0x22000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <28 0x2>; + }; + + mdio@24520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-mdio"; + reg = <0x24520 0x20>; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x26520 0x20>; + }; + + mdio@26520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x520 0x20>; + }; + + enet0: ethernet@24000 { + #address-cells = <1>; + #size-cells = <1>; + cell-index = <0>; + device_type = "network"; + model = "eTSEC"; + compatible = "gianfar"; + reg = <0x24000 0x1000>; + ranges = <0x0 0x24000 0x1000>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <29 2 30 2 34 2>; + interrupt-parent = <&mpic>; + }; + + enet1: ethernet@25000 { + #address-cells = <1>; + #size-cells = <1>; + cell-index = <1>; + device_type = "network"; + model = "eTSEC"; + compatible = "gianfar"; + reg = <0x25000 0x1000>; + ranges = <0x0 0x25000 0x1000>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <35 2 36 2 40 2>; + interrupt-parent = <&mpic>; + + }; + + enet2: ethernet@26000 { + #address-cells = <1>; + #size-cells = <1>; + cell-index = <2>; + device_type = "network"; + model = "eTSEC"; + compatible = "gianfar"; + reg = <0x26000 0x1000>; + ranges = <0x0 0x26000 0x1000>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <31 2 32 2 33 2>; + interrupt-parent = <&mpic>; + + }; + + sdhci@2e000 { + compatible = "fsl,p2020-esdhc", "fsl,esdhc"; + reg = <0x2e000 0x1000>; + interrupts = <72 0x2>; + interrupt-parent = <&mpic>; + /* Filled in by U-Boot */ + clock-frequency = <0>; + }; + + crypto@30000 { + compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4", + "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; + reg = <0x30000 0x10000>; + interrupts = <45 2 58 2>; + interrupt-parent = <&mpic>; + fsl,num-channels = <4>; + fsl,channel-fifo-len = <24>; + fsl,exec-units-mask = <0xbfe>; + fsl,descriptor-types-mask = <0x3ab0ebf>; + }; + + mpic: pic@40000 { + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <2>; + reg = <0x40000 0x40000>; + compatible = "chrp,open-pic"; + device_type = "open-pic"; + }; + + msi@41600 { + compatible = "fsl,p2020-msi", "fsl,mpic-msi"; + reg = <0x41600 0x80>; + msi-available-ranges = <0 0x100>; + interrupts = < + 0xe0 0 + 0xe1 0 + 0xe2 0 + 0xe3 0 + 0xe4 0 + 0xe5 0 + 0xe6 0 + 0xe7 0>; + interrupt-parent = <&mpic>; + }; + + global-utilities@e0000 { //global utilities block + compatible = "fsl,p2020-guts"; + reg = <0xe0000 0x1000>; + fsl,has-rstcr; + }; + }; + + pci0: pcie@ffe08000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0 0xffe08000 0 0x1000>; + bus-range = <0 255>; + clock-frequency = <33333333>; + interrupt-parent = <&mpic>; + interrupts = <24 2>; + }; + + pci1: pcie@ffe09000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0 0xffe09000 0 0x1000>; + bus-range = <0 255>; + clock-frequency = <33333333>; + interrupt-parent = <&mpic>; + interrupts = <25 2>; + }; + + pci2: pcie@ffe0a000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0 0xffe0a000 0 0x1000>; + bus-range = <0 255>; + clock-frequency = <33333333>; + interrupt-parent = <&mpic>; + interrupts = <26 2>; + }; +}; diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts index 5b7fc29dd6c..927f94d16e9 100644 --- a/arch/powerpc/boot/dts/p4080ds.dts +++ b/arch/powerpc/boot/dts/p4080ds.dts @@ -1,7 +1,7 @@ /* * P4080DS Device Tree Source * - * Copyright 2009 Freescale Semiconductor Inc. + * Copyright 2009-2011 Freescale Semiconductor Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -33,6 +33,17 @@ dma1 = &dma1; sdhc = &sdhc; + crypto = &crypto; + sec_jr0 = &sec_jr0; + sec_jr1 = &sec_jr1; + sec_jr2 = &sec_jr2; + sec_jr3 = &sec_jr3; + rtic_a = &rtic_a; + rtic_b = &rtic_b; + rtic_c = &rtic_c; + rtic_d = &rtic_d; + sec_mon = &sec_mon; + rio0 = &rapidio0; }; @@ -410,6 +421,79 @@ dr_mode = "host"; phy_type = "ulpi"; }; + + crypto: crypto@300000 { + compatible = "fsl,sec-v4.0"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x300000 0x10000>; + ranges = <0 0x300000 0x10000>; + interrupt-parent = <&mpic>; + interrupts = <92 2>; + + sec_jr0: jr@1000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x1000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <88 2>; + }; + + sec_jr1: jr@2000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x2000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <89 2>; + }; + + sec_jr2: jr@3000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x3000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <90 2>; + }; + + sec_jr3: jr@4000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x4000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <91 2>; + }; + + rtic@6000 { + compatible = "fsl,sec-v4.0-rtic"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x6000 0x100>; + ranges = <0x0 0x6100 0xe00>; + + rtic_a: rtic-a@0 { + compatible = "fsl,sec-v4.0-rtic-memory"; + reg = <0x00 0x20 0x100 0x80>; + }; + + rtic_b: rtic-b@20 { + compatible = "fsl,sec-v4.0-rtic-memory"; + reg = <0x20 0x20 0x200 0x80>; + }; + + rtic_c: rtic-c@40 { + compatible = "fsl,sec-v4.0-rtic-memory"; + reg = <0x40 0x20 0x300 0x80>; + }; + + rtic_d: rtic-d@60 { + compatible = "fsl,sec-v4.0-rtic-memory"; + reg = <0x60 0x20 0x500 0x80>; + }; + }; + }; + + sec_mon: sec_mon@314000 { + compatible = "fsl,sec-v4.0-mon"; + reg = <0x314000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <93 2>; + }; }; rapidio0: rapidio@ffe0c0000 { diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c new file mode 100644 index 00000000000..06c1961bd12 --- /dev/null +++ b/arch/powerpc/boot/epapr.c @@ -0,0 +1,66 @@ +/* + * Bootwrapper for ePAPR compliant firmwares + * + * Copyright 2010 David Gibson <david@gibson.dropbear.id.au>, IBM Corporation. + * + * Based on earlier bootwrappers by: + * (c) Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp,\ + * and + * Scott Wood <scottwood@freescale.com> + * Copyright (c) 2007 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include "ops.h" +#include "stdio.h" +#include "io.h" +#include <libfdt.h> + +BSS_STACK(4096); + +#define EPAPR_SMAGIC 0x65504150 +#define EPAPR_EMAGIC 0x45504150 + +static unsigned epapr_magic; +static unsigned long ima_size; +static unsigned long fdt_addr; + +static void platform_fixups(void) +{ + if ((epapr_magic != EPAPR_EMAGIC) + && (epapr_magic != EPAPR_SMAGIC)) + fatal("r6 contained 0x%08x instead of ePAPR magic number\n", + epapr_magic); + + if (ima_size < (unsigned long)_end) + printf("WARNING: Image loaded outside IMA!" + " (_end=%p, ima_size=0x%lx)\n", _end, ima_size); + if (ima_size < fdt_addr) + printf("WARNING: Device tree address is outside IMA!" + "(fdt_addr=0x%lx, ima_size=0x%lx)\n", fdt_addr, + ima_size); + if (ima_size < fdt_addr + fdt_totalsize((void *)fdt_addr)) + printf("WARNING: Device tree extends outside IMA!" + " (fdt_addr=0x%lx, size=0x%x, ima_size=0x%lx\n", + fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size); +} + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + epapr_magic = r6; + ima_size = r7; + fdt_addr = r3; + + /* FIXME: we should process reserve entries */ + + simple_alloc_init(_end, ima_size - (unsigned long)_end, 32, 64); + + fdt_init((void *)fdt_addr); + + serial_console_init(); + platform_ops.fixups = platform_fixups; +} diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index cb97e7511d7..c74531af72c 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -39,6 +39,7 @@ dts= cacheit= binary= gzip=.gz +pie= # cross-compilation prefix CROSS= @@ -157,9 +158,10 @@ pmac|chrp) platformo=$object/of.o ;; coff) - platformo=$object/of.o + platformo="$object/crt0.o $object/of.o" lds=$object/zImage.coff.lds link_address='0x500000' + pie= ;; miboot|uboot) # miboot and U-boot want just the bare bits, not an ELF binary @@ -208,6 +210,7 @@ ps3) ksection=.kernel:vmlinux.bin isection=.kernel:initrd link_address='' + pie= ;; ep88xc|ep405|ep8248e) platformo="$object/fixed-head.o $object/$platform.o" @@ -244,6 +247,10 @@ gamecube|wii) treeboot-iss4xx-mpic) platformo="$object/treeboot-iss4xx.o" ;; +epapr) + link_address='0x20000000' + pie=-pie + ;; esac vmz="$tmpdir/`basename \"$kernel\"`.$ext" @@ -251,7 +258,7 @@ if [ -z "$cacheit" -o ! -f "$vmz$gzip" -o "$vmz$gzip" -ot "$kernel" ]; then ${CROSS}objcopy $objflags "$kernel" "$vmz.$$" if [ -n "$gzip" ]; then - gzip -f -9 "$vmz.$$" + gzip -n -f -9 "$vmz.$$" fi if [ -n "$cacheit" ]; then @@ -310,9 +317,9 @@ fi if [ "$platform" != "miboot" ]; then if [ -n "$link_address" ] ; then - text_start="-Ttext $link_address --defsym _start=$link_address" + text_start="-Ttext $link_address" fi - ${CROSS}ld -m elf32ppc -T $lds $text_start -o "$ofile" \ + ${CROSS}ld -m elf32ppc -T $lds $text_start $pie -o "$ofile" \ $platformo $tmp $object/wrapper.a rm $tmp fi @@ -336,7 +343,7 @@ coff) $objbin/hack-coff "$ofile" ;; cuboot*) - gzip -f -9 "$ofile" + gzip -n -f -9 "$ofile" ${MKIMAGE} -A ppc -O linux -T kernel -C gzip -a "$base" -e "$entry" \ $uboot_version -d "$ofile".gz "$ofile" ;; @@ -383,6 +390,6 @@ ps3) odir="$(dirname "$ofile.bin")" rm -f "$odir/otheros.bld" - gzip --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld" + gzip -n --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld" ;; esac diff --git a/arch/powerpc/boot/zImage.coff.lds.S b/arch/powerpc/boot/zImage.coff.lds.S index 856dc78b14e..de4c9e3c934 100644 --- a/arch/powerpc/boot/zImage.coff.lds.S +++ b/arch/powerpc/boot/zImage.coff.lds.S @@ -3,13 +3,13 @@ ENTRY(_zimage_start_opd) EXTERN(_zimage_start_opd) SECTIONS { - _start = .; .text : { + _start = .; *(.text) *(.fixup) + _etext = .; } - _etext = .; . = ALIGN(4096); .data : { @@ -17,9 +17,7 @@ SECTIONS *(.data*) *(__builtin_*) *(.sdata*) - __got2_start = .; *(.got2) - __got2_end = .; _dtb_start = .; *(.kernel:dtb) diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index 0962d62bdb5..2bd8731f136 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -3,49 +3,64 @@ ENTRY(_zimage_start) EXTERN(_zimage_start) SECTIONS { - _start = .; .text : { + _start = .; *(.text) *(.fixup) + _etext = .; } - _etext = .; . = ALIGN(4096); .data : { *(.rodata*) *(.data*) *(.sdata*) - __got2_start = .; *(.got2) - __got2_end = .; } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .dynamic : + { + __dynamic_start = .; + *(.dynamic) + } + .hash : { *(.hash) } + .interp : { *(.interp) } + .rela.dyn : { *(.rela*) } . = ALIGN(8); - _dtb_start = .; - .kernel:dtb : { *(.kernel:dtb) } - _dtb_end = .; - - . = ALIGN(4096); - _vmlinux_start = .; - .kernel:vmlinux.strip : { *(.kernel:vmlinux.strip) } - _vmlinux_end = .; + .kernel:dtb : + { + _dtb_start = .; + *(.kernel:dtb) + _dtb_end = .; + } . = ALIGN(4096); - _initrd_start = .; - .kernel:initrd : { *(.kernel:initrd) } - _initrd_end = .; + .kernel:vmlinux.strip : + { + _vmlinux_start = .; + *(.kernel:vmlinux.strip) + _vmlinux_end = .; + } . = ALIGN(4096); - _edata = .; + .kernel:initrd : + { + _initrd_start = .; + *(.kernel:initrd) + _initrd_end = .; + } . = ALIGN(4096); - __bss_start = .; .bss : { - *(.sbss) - *(.bss) + _edata = .; + __bss_start = .; + *(.sbss) + *(.bss) + *(COMMON) + _end = . ; } - . = ALIGN(4096); - _end = . ; } diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig index c683bce4c26..126ef1b08a0 100644 --- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig @@ -104,7 +104,6 @@ CONFIG_ROOT_NFS=y CONFIG_PARTITION_ADVANCED=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig index a721cd3d793..abcf00ad939 100644 --- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig @@ -101,7 +101,6 @@ CONFIG_ROOT_NFS=y CONFIG_PARTITION_ADVANCED=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig index 55e0725500d..11662c217ac 100644 --- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig +++ b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig @@ -58,7 +58,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig index d724095530a..ebe9b30b072 100644 --- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig +++ b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig @@ -59,7 +59,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig index 4b44beaa21a..eb25229b387 100644 --- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig +++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig @@ -63,7 +63,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig index b614508d6fd..f51c7ebc181 100644 --- a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig +++ b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig @@ -168,7 +168,6 @@ CONFIG_MAC_PARTITION=y CONFIG_CRC_T10DIF=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig index f9e6a3ea5a6..2a84fd7f631 100644 --- a/arch/powerpc/configs/c2k_defconfig +++ b/arch/powerpc/configs/c2k_defconfig @@ -132,8 +132,8 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_IND=y CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m diff --git a/arch/powerpc/configs/e55xx_smp_defconfig b/arch/powerpc/configs/e55xx_smp_defconfig index 9fa1613e5e2..d32283555b5 100644 --- a/arch/powerpc/configs/e55xx_smp_defconfig +++ b/arch/powerpc/configs/e55xx_smp_defconfig @@ -6,10 +6,10 @@ CONFIG_NR_CPUS=2 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SPARSE_IRQ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y @@ -25,8 +25,32 @@ CONFIG_P5020_DS=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=m -CONFIG_SPARSE_IRQ=y # CONFIG_PCI is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_NET_IPIP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_ARPD=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +CONFIG_IPV6=y +CONFIG_IP_SCTP=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y @@ -34,6 +58,9 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 CONFIG_MISC_DEVICES=y CONFIG_EEPROM_LEGACY=y +CONFIG_NETDEVICES=y +CONFIG_DUMMY=y +CONFIG_NET_ETHERNET=y CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set @@ -64,22 +91,14 @@ CONFIG_NLS=y CONFIG_NLS_UTF8=m CONFIG_CRC_T10DIF=y CONFIG_CRC_ITU_T=m -CONFIG_LIBCRC32C=m CONFIG_FRAME_WARN=1024 CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_VIRQ_DEBUG=y -CONFIG_CRYPTO=y -CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_SHA1=m -CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_TALITOS=y diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index c06a86c3309..96b89df7752 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -204,7 +204,6 @@ CONFIG_CRC_T10DIF=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index 942ced90557..de65841aa04 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -206,7 +206,6 @@ CONFIG_CRC_T10DIF=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/powerpc/configs/mpc86xx_defconfig b/arch/powerpc/configs/mpc86xx_defconfig index 038a308cbfc..a1cc8179e9f 100644 --- a/arch/powerpc/configs/mpc86xx_defconfig +++ b/arch/powerpc/configs/mpc86xx_defconfig @@ -171,7 +171,6 @@ CONFIG_MAC_PARTITION=y CONFIG_CRC_T10DIF=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index ac4fc41035f..f8b394a76ac 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -112,8 +112,8 @@ CONFIG_IRDA_CACHE_LAST_LSAP=y CONFIG_IRDA_FAST_RR=y CONFIG_IRTTY_SIR=m CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 0a10fb009ef..214208924a9 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -351,8 +351,8 @@ CONFIG_VLSI_FIR=m CONFIG_VIA_FIR=m CONFIG_MCS_FIR=m CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index caba919f65d..6472322bf13 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -52,8 +52,8 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_DIAG is not set CONFIG_IPV6=y CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 249ddd0a27c..7de13865508 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -146,12 +146,18 @@ CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_SAS_ATTRS=m +CONFIG_SCSI_CXGB3_ISCSI=m +CONFIG_SCSI_CXGB4_ISCSI=m +CONFIG_SCSI_BNX2_ISCSI=m +CONFIG_SCSI_BNX2_ISCSI=m +CONFIG_BE2ISCSI=m CONFIG_SCSI_IBMVSCSI=y CONFIG_SCSI_IBMVFC=m CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 CONFIG_SCSI_IPR=y CONFIG_SCSI_QLA_FC=m +CONFIG_SCSI_QLA_ISCSI=m CONFIG_SCSI_LPFC=m CONFIG_ATA=y # CONFIG_ATA_SFF is not set @@ -197,6 +203,8 @@ CONFIG_S2IO=m CONFIG_MYRI10GE=m CONFIG_NETXEN_NIC=m CONFIG_MLX4_EN=m +CONFIG_QLGE=m +CONFIG_BE2NET=m CONFIG_PPP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 1833d1a07e7..c0d842cfd01 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -157,6 +157,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_476_DD2 ASM_CONST(0x0000000000010000) #define CPU_FTR_NEED_COHERENT ASM_CONST(0x0000000000020000) #define CPU_FTR_NO_BTIC ASM_CONST(0x0000000000040000) +#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x0000000000080000) #define CPU_FTR_NODSISRALIGN ASM_CONST(0x0000000000100000) #define CPU_FTR_PPC_LE ASM_CONST(0x0000000000200000) #define CPU_FTR_REAL_LE ASM_CONST(0x0000000000400000) @@ -178,22 +179,18 @@ extern const char *powerpc_base_platform; #define LONG_ASM_CONST(x) 0 #endif -#define CPU_FTR_SLB LONG_ASM_CONST(0x0000000100000000) -#define CPU_FTR_16M_PAGE LONG_ASM_CONST(0x0000000200000000) -#define CPU_FTR_TLBIEL LONG_ASM_CONST(0x0000000400000000) + +#define CPU_FTR_HVMODE_206 LONG_ASM_CONST(0x0000000800000000) +#define CPU_FTR_CFAR LONG_ASM_CONST(0x0000001000000000) #define CPU_FTR_IABR LONG_ASM_CONST(0x0000002000000000) #define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000004000000000) #define CPU_FTR_CTRL LONG_ASM_CONST(0x0000008000000000) #define CPU_FTR_SMT LONG_ASM_CONST(0x0000010000000000) -#define CPU_FTR_LOCKLESS_TLBIE LONG_ASM_CONST(0x0000040000000000) -#define CPU_FTR_CI_LARGE_PAGE LONG_ASM_CONST(0x0000100000000000) #define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000200000000000) #define CPU_FTR_PURR LONG_ASM_CONST(0x0000400000000000) #define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000800000000000) #define CPU_FTR_SPURR LONG_ASM_CONST(0x0001000000000000) #define CPU_FTR_DSCR LONG_ASM_CONST(0x0002000000000000) -#define CPU_FTR_1T_SEGMENT LONG_ASM_CONST(0x0004000000000000) -#define CPU_FTR_NO_SLBIE_B LONG_ASM_CONST(0x0008000000000000) #define CPU_FTR_VSX LONG_ASM_CONST(0x0010000000000000) #define CPU_FTR_SAO LONG_ASM_CONST(0x0020000000000000) #define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000) @@ -202,12 +199,14 @@ extern const char *powerpc_base_platform; #define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0200000000000000) #define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0400000000000000) #define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0800000000000000) +#define CPU_FTR_ICSWX LONG_ASM_CONST(0x1000000000000000) #ifndef __ASSEMBLY__ -#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_SLB | \ - CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_16M_PAGE) +#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN) + +#define MMU_FTR_PPCAS_ARCH_V2 (MMU_FTR_SLB | MMU_FTR_TLBIEL | \ + MMU_FTR_16M_PAGE) /* We only set the altivec features if the kernel was compiled with altivec * support @@ -387,7 +386,8 @@ extern const char *powerpc_base_platform; CPU_FTR_DBELL) #define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ - CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD) + CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ + CPU_FTR_DEBUG_LVL_EXC) #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ @@ -407,44 +407,45 @@ extern const char *powerpc_base_platform; #define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ - CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS | \ - CPU_FTR_POPCNTB) + CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \ + CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB) #define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ + CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \ - CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB) + CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR) #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_HVMODE_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ + CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ - CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD) + CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ + CPU_FTR_ICSWX | CPU_FTR_CFAR) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ - CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE | \ - CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \ + CPU_FTR_PAUSE_ZERO | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \ CPU_FTR_UNALIGNED_LD_STD) #define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ - CPU_FTR_PPCAS_ARCH_V2 | \ - CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \ - CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_NO_SLBIE_B) + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \ + CPU_FTR_PURR | CPU_FTR_REAL_LE) #define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2) +#define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \ + CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN) + #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E -#define CPU_FTRS_POSSIBLE (CPU_FTRS_E5500) +#define CPU_FTRS_POSSIBLE (CPU_FTRS_E5500 | CPU_FTRS_A2) #else #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | \ CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 | \ CPU_FTRS_POWER7 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \ - CPU_FTR_1T_SEGMENT | CPU_FTR_VSX) + CPU_FTR_VSX) #endif #else enum { @@ -487,7 +488,7 @@ enum { #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E -#define CPU_FTRS_ALWAYS (CPU_FTRS_E5500) +#define CPU_FTRS_ALWAYS (CPU_FTRS_E5500 & CPU_FTRS_A2) #else #define CPU_FTRS_ALWAYS \ (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 & \ diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index f71bb4c118b..ce516e5eb0d 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -37,16 +37,16 @@ extern cpumask_t threads_core_mask; * This can typically be used for things like IPI for tlb invalidations * since those need to be done only once per core/TLB */ -static inline cpumask_t cpu_thread_mask_to_cores(cpumask_t threads) +static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads) { cpumask_t tmp, res; int i; - res = CPU_MASK_NONE; + cpumask_clear(&res); for (i = 0; i < NR_CPUS; i += threads_per_core) { - cpus_shift_left(tmp, threads_core_mask, i); - if (cpus_intersects(threads, tmp)) - cpu_set(i, res); + cpumask_shift_left(&tmp, &threads_core_mask, i); + if (cpumask_intersects(threads, &tmp)) + cpumask_set_cpu(i, &res); } return res; } @@ -58,7 +58,7 @@ static inline int cpu_nr_cores(void) static inline cpumask_t cpu_online_cores_map(void) { - return cpu_thread_mask_to_cores(cpu_online_map); + return cpu_thread_mask_to_cores(cpu_online_mask); } #ifdef CONFIG_SMP diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h index 0893ab9343a..9c70d0ca96d 100644 --- a/arch/powerpc/include/asm/dbell.h +++ b/arch/powerpc/include/asm/dbell.h @@ -27,9 +27,8 @@ enum ppc_dbell { PPC_G_DBELL_MC = 4, /* guest mcheck doorbell */ }; -extern void doorbell_message_pass(int target, int msg); +extern void doorbell_cause_ipi(int cpu, unsigned long data); extern void doorbell_exception(struct pt_regs *regs); -extern void doorbell_check_self(void); extern void doorbell_setup_this_cpu(void); static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag) diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h index f0fb4fc1f6e..45921672b97 100644 --- a/arch/powerpc/include/asm/emulated_ops.h +++ b/arch/powerpc/include/asm/emulated_ops.h @@ -52,6 +52,10 @@ extern struct ppc_emulated { #ifdef CONFIG_VSX struct ppc_emulated_entry vsx; #endif +#ifdef CONFIG_PPC64 + struct ppc_emulated_entry mfdscr; + struct ppc_emulated_entry mtdscr; +#endif } ppc_emulated; extern u32 ppc_warn_emulated; diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 7778d6f0c87..f5dfe3411f6 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -46,6 +46,7 @@ #define EX_CCR 60 #define EX_R3 64 #define EX_LR 72 +#define EX_CFAR 80 /* * We're short on space and time in the exception prolog, so we can't @@ -56,30 +57,40 @@ #define LOAD_HANDLER(reg, label) \ addi reg,reg,(label)-_stext; /* virt addr of handler ... */ -#define EXCEPTION_PROLOG_1(area) \ - mfspr r13,SPRN_SPRG_PACA; /* get paca address into r13 */ \ +/* Exception register prefixes */ +#define EXC_HV H +#define EXC_STD + +#define EXCEPTION_PROLOG_1(area) \ + GET_PACA(r13); \ std r9,area+EX_R9(r13); /* save r9 - r12 */ \ std r10,area+EX_R10(r13); \ std r11,area+EX_R11(r13); \ std r12,area+EX_R12(r13); \ - mfspr r9,SPRN_SPRG_SCRATCH0; \ + BEGIN_FTR_SECTION_NESTED(66); \ + mfspr r10,SPRN_CFAR; \ + std r10,area+EX_CFAR(r13); \ + END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \ + GET_SCRATCH0(r9); \ std r9,area+EX_R13(r13); \ mfcr r9 -#define EXCEPTION_PROLOG_PSERIES_1(label) \ +#define __EXCEPTION_PROLOG_PSERIES_1(label, h) \ ld r12,PACAKBASE(r13); /* get high part of &label */ \ ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ - mfspr r11,SPRN_SRR0; /* save SRR0 */ \ + mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ LOAD_HANDLER(r12,label) \ - mtspr SPRN_SRR0,r12; \ - mfspr r12,SPRN_SRR1; /* and SRR1 */ \ - mtspr SPRN_SRR1,r10; \ - rfid; \ + mtspr SPRN_##h##SRR0,r12; \ + mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ + mtspr SPRN_##h##SRR1,r10; \ + h##rfid; \ b . /* prevent speculative execution */ +#define EXCEPTION_PROLOG_PSERIES_1(label, h) \ + __EXCEPTION_PROLOG_PSERIES_1(label, h) -#define EXCEPTION_PROLOG_PSERIES(area, label) \ +#define EXCEPTION_PROLOG_PSERIES(area, label, h) \ EXCEPTION_PROLOG_1(area); \ - EXCEPTION_PROLOG_PSERIES_1(label); + EXCEPTION_PROLOG_PSERIES_1(label, h); /* * The common exception prolog is used for all except a few exceptions @@ -98,10 +109,11 @@ beq- 1f; \ ld r1,PACAKSAVE(r13); /* kernel stack to use */ \ 1: cmpdi cr1,r1,0; /* check if r1 is in userspace */ \ - bge- cr1,2f; /* abort if it is */ \ - b 3f; \ -2: li r1,(n); /* will be reloaded later */ \ + blt+ cr1,3f; /* abort if it is */ \ + li r1,(n); /* will be reloaded later */ \ sth r1,PACA_TRAP_SAVE(r13); \ + std r3,area+EX_R3(r13); \ + addi r3,r13,area; /* r3 -> where regs are saved*/ \ b bad_stack; \ 3: std r9,_CCR(r1); /* save CR in stackframe */ \ std r11,_NIP(r1); /* save SRR0 in stackframe */ \ @@ -123,6 +135,10 @@ std r9,GPR11(r1); \ std r10,GPR12(r1); \ std r11,GPR13(r1); \ + BEGIN_FTR_SECTION_NESTED(66); \ + ld r10,area+EX_CFAR(r13); \ + std r10,ORIG_GPR3(r1); \ + END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \ ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ mflr r9; /* save LR in stackframe */ \ std r9,_LINK(r1); \ @@ -143,57 +159,62 @@ /* * Exception vectors. */ -#define STD_EXCEPTION_PSERIES(n, label) \ - . = n; \ +#define STD_EXCEPTION_PSERIES(loc, vec, label) \ + . = loc; \ .globl label##_pSeries; \ label##_pSeries: \ HMT_MEDIUM; \ - DO_KVM n; \ - mtspr SPRN_SPRG_SCRATCH0,r13; /* save r13 */ \ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) + DO_KVM vec; \ + SET_SCRATCH0(r13); /* save r13 */ \ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_STD) -#define HSTD_EXCEPTION_PSERIES(n, label) \ - . = n; \ - .globl label##_pSeries; \ -label##_pSeries: \ +#define STD_EXCEPTION_HV(loc, vec, label) \ + . = loc; \ + .globl label##_hv; \ +label##_hv: \ HMT_MEDIUM; \ - mtspr SPRN_SPRG_SCRATCH0,r20; /* save r20 */ \ - mfspr r20,SPRN_HSRR0; /* copy HSRR0 to SRR0 */ \ - mtspr SPRN_SRR0,r20; \ - mfspr r20,SPRN_HSRR1; /* copy HSRR0 to SRR0 */ \ - mtspr SPRN_SRR1,r20; \ - mfspr r20,SPRN_SPRG_SCRATCH0; /* restore r20 */ \ - mtspr SPRN_SPRG_SCRATCH0,r13; /* save r13 */ \ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) + DO_KVM vec; \ + SET_SCRATCH0(r13); /* save r13 */ \ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_HV) - -#define MASKABLE_EXCEPTION_PSERIES(n, label) \ - . = n; \ - .globl label##_pSeries; \ -label##_pSeries: \ +#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h) \ HMT_MEDIUM; \ - DO_KVM n; \ - mtspr SPRN_SPRG_SCRATCH0,r13; /* save r13 */ \ - mfspr r13,SPRN_SPRG_PACA; /* get paca address into r13 */ \ + DO_KVM vec; \ + SET_SCRATCH0(r13); /* save r13 */ \ + GET_PACA(r13); \ std r9,PACA_EXGEN+EX_R9(r13); /* save r9, r10 */ \ std r10,PACA_EXGEN+EX_R10(r13); \ lbz r10,PACASOFTIRQEN(r13); \ mfcr r9; \ cmpwi r10,0; \ - beq masked_interrupt; \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ + beq masked_##h##interrupt; \ + GET_SCRATCH0(r10); \ std r10,PACA_EXGEN+EX_R13(r13); \ std r11,PACA_EXGEN+EX_R11(r13); \ std r12,PACA_EXGEN+EX_R12(r13); \ ld r12,PACAKBASE(r13); /* get high part of &label */ \ ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ - mfspr r11,SPRN_SRR0; /* save SRR0 */ \ + mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ LOAD_HANDLER(r12,label##_common) \ - mtspr SPRN_SRR0,r12; \ - mfspr r12,SPRN_SRR1; /* and SRR1 */ \ - mtspr SPRN_SRR1,r10; \ - rfid; \ + mtspr SPRN_##h##SRR0,r12; \ + mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ + mtspr SPRN_##h##SRR1,r10; \ + h##rfid; \ b . /* prevent speculative execution */ +#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h) \ + __MASKABLE_EXCEPTION_PSERIES(vec, label, h) + +#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \ + . = loc; \ + .globl label##_pSeries; \ +label##_pSeries: \ + _MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_STD) + +#define MASKABLE_EXCEPTION_HV(loc, vec, label) \ + . = loc; \ + .globl label##_hv; \ +label##_hv: \ + _MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_HV) #ifdef CONFIG_PPC_ISERIES #define DISABLE_INTS \ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 921a8470e18..9a67a38bf7b 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -49,7 +49,7 @@ label##5: \ FTR_ENTRY_OFFSET label##2b-label##5b; \ FTR_ENTRY_OFFSET label##3b-label##5b; \ FTR_ENTRY_OFFSET label##4b-label##5b; \ - .ifgt (label##4b-label##3b)-(label##2b-label##1b); \ + .ifgt (label##4b- label##3b)-(label##2b- label##1b); \ .error "Feature section else case larger than body"; \ .endif; \ .popsection; @@ -146,6 +146,19 @@ label##5: \ #ifndef __ASSEMBLY__ +#define ASM_FTR_IF(section_if, section_else, msk, val) \ + stringify_in_c(BEGIN_FTR_SECTION) \ + section_if "; " \ + stringify_in_c(FTR_SECTION_ELSE) \ + section_else "; " \ + stringify_in_c(ALT_FTR_SECTION_END((msk), (val))) + +#define ASM_FTR_IFSET(section_if, section_else, msk) \ + ASM_FTR_IF(section_if, section_else, (msk), (msk)) + +#define ASM_FTR_IFCLR(section_if, section_else, msk) \ + ASM_FTR_IF(section_if, section_else, (msk), 0) + #define ASM_MMU_FTR_IF(section_if, section_else, msk, val) \ stringify_in_c(BEGIN_MMU_FTR_SECTION) \ section_if "; " \ diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 4ef662e4a31..3a6c586c4e4 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -47,6 +47,7 @@ #define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000) #define FW_FEATURE_CMO ASM_CONST(0x0000000002000000) #define FW_FEATURE_VPHN ASM_CONST(0x0000000004000000) +#define FW_FEATURE_XCMO ASM_CONST(0x0000000008000000) #ifndef __ASSEMBLY__ @@ -60,7 +61,7 @@ enum { FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | - FW_FEATURE_CMO | FW_FEATURE_VPHN, + FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 8edec710cc6..852b8c1c09d 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -102,6 +102,7 @@ #define H_ANDCOND (1UL<<(63-33)) #define H_ICACHE_INVALIDATE (1UL<<(63-40)) /* icbi, etc. (ignored for IO pages) */ #define H_ICACHE_SYNCHRONIZE (1UL<<(63-41)) /* dcbst, icbi, etc (ignored for IO pages */ +#define H_COALESCE_CAND (1UL<<(63-42)) /* page is a good candidate for coalescing */ #define H_ZERO_PAGE (1UL<<(63-48)) /* zero the page before mapping (ignored for IO pages) */ #define H_COPY_PAGE (1UL<<(63-49)) #define H_N (1UL<<(63-61)) @@ -234,6 +235,7 @@ #define H_GET_MPP 0x2D4 #define H_HOME_NODE_ASSOCIATIVITY 0x2EC #define H_BEST_ENERGY 0x2F4 +#define H_GET_MPP_X 0x314 #define MAX_HCALL_OPCODE H_BEST_ENERGY #ifndef __ASSEMBLY__ @@ -312,6 +314,16 @@ struct hvcall_mpp_data { int h_get_mpp(struct hvcall_mpp_data *); +struct hvcall_mpp_x_data { + unsigned long coalesced_bytes; + unsigned long pool_coalesced_bytes; + unsigned long pool_purr_cycles; + unsigned long pool_spurr_cycles; + unsigned long reserved[3]; +}; + +int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data); + #ifdef CONFIG_PPC_PSERIES extern int CMO_PrPSP; extern int CMO_SecPSP; diff --git a/arch/powerpc/platforms/cell/io-workarounds.h b/arch/powerpc/include/asm/io-workarounds.h index 6efc7782ebf..fbae4928692 100644 --- a/arch/powerpc/platforms/cell/io-workarounds.h +++ b/arch/powerpc/include/asm/io-workarounds.h @@ -31,7 +31,6 @@ struct iowa_bus { void *private; }; -void __devinit io_workaround_init(void); void __devinit iowa_register_bus(struct pci_controller *, struct ppc_pci_io *, int (*)(struct iowa_bus *, void *), void *); struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR); diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 001f2f11c19..45698d55cd6 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -2,6 +2,8 @@ #define _ASM_POWERPC_IO_H #ifdef __KERNEL__ +#define ARCH_HAS_IOREMAP_WC + /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -481,10 +483,16 @@ __do_out_asm(_rec_outl, "stwbrx") _memcpy_fromio(dst,PCI_FIX_ADDR(src),n) #endif /* !CONFIG_EEH */ -#ifdef CONFIG_PPC_INDIRECT_IO -#define DEF_PCI_HOOK(x) x +#ifdef CONFIG_PPC_INDIRECT_PIO +#define DEF_PCI_HOOK_pio(x) x +#else +#define DEF_PCI_HOOK_pio(x) NULL +#endif + +#ifdef CONFIG_PPC_INDIRECT_MMIO +#define DEF_PCI_HOOK_mem(x) x #else -#define DEF_PCI_HOOK(x) NULL +#define DEF_PCI_HOOK_mem(x) NULL #endif /* Structure containing all the hooks */ @@ -504,7 +512,7 @@ extern struct ppc_pci_io { #define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ static inline ret name at \ { \ - if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL) \ + if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL) \ return ppc_pci_io.name al; \ return __do_##name al; \ } @@ -512,7 +520,7 @@ static inline ret name at \ #define DEF_PCI_AC_NORET(name, at, al, space, aa) \ static inline void name at \ { \ - if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL) \ + if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL) \ ppc_pci_io.name al; \ else \ __do_##name al; \ @@ -616,12 +624,13 @@ static inline void iosync(void) * * ioremap is the standard one and provides non-cacheable guarded mappings * and can be hooked by the platform via ppc_md * - * * ioremap_flags allows to specify the page flags as an argument and can - * also be hooked by the platform via ppc_md. ioremap_prot is the exact - * same thing as ioremap_flags. + * * ioremap_prot allows to specify the page flags as an argument and can + * also be hooked by the platform via ppc_md. * * * ioremap_nocache is identical to ioremap * + * * ioremap_wc enables write combining + * * * iounmap undoes such a mapping and can be hooked * * * __ioremap_at (and the pending __iounmap_at) are low level functions to @@ -629,7 +638,7 @@ static inline void iosync(void) * currently be hooked. Must be page aligned. * * * __ioremap is the low level implementation used by ioremap and - * ioremap_flags and cannot be hooked (but can be used by a hook on one + * ioremap_prot and cannot be hooked (but can be used by a hook on one * of the previous ones) * * * __ioremap_caller is the same as above but takes an explicit caller @@ -640,10 +649,10 @@ static inline void iosync(void) * */ extern void __iomem *ioremap(phys_addr_t address, unsigned long size); -extern void __iomem *ioremap_flags(phys_addr_t address, unsigned long size, - unsigned long flags); +extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size, + unsigned long flags); +extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size); #define ioremap_nocache(addr, size) ioremap((addr), (size)) -#define ioremap_prot(addr, size, prot) ioremap_flags((addr), (size), (prot)) extern void iounmap(volatile void __iomem *addr); diff --git a/arch/powerpc/include/asm/io_event_irq.h b/arch/powerpc/include/asm/io_event_irq.h new file mode 100644 index 00000000000..b1a9a1be3c2 --- /dev/null +++ b/arch/powerpc/include/asm/io_event_irq.h @@ -0,0 +1,54 @@ +/* + * Copyright 2010, 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERPC_IO_EVENT_IRQ_H +#define _ASM_POWERPC_IO_EVENT_IRQ_H + +#include <linux/types.h> +#include <linux/notifier.h> + +#define PSERIES_IOEI_RPC_MAX_LEN 216 + +#define PSERIES_IOEI_TYPE_ERR_DETECTED 0x01 +#define PSERIES_IOEI_TYPE_ERR_RECOVERED 0x02 +#define PSERIES_IOEI_TYPE_EVENT 0x03 +#define PSERIES_IOEI_TYPE_RPC_PASS_THRU 0x04 + +#define PSERIES_IOEI_SUBTYPE_NOT_APP 0x00 +#define PSERIES_IOEI_SUBTYPE_REBALANCE_REQ 0x01 +#define PSERIES_IOEI_SUBTYPE_NODE_ONLINE 0x03 +#define PSERIES_IOEI_SUBTYPE_NODE_OFFLINE 0x04 +#define PSERIES_IOEI_SUBTYPE_DUMP_SIZE_CHANGE 0x05 +#define PSERIES_IOEI_SUBTYPE_TORRENT_IRV_UPDATE 0x06 +#define PSERIES_IOEI_SUBTYPE_TORRENT_HFI_CFGED 0x07 + +#define PSERIES_IOEI_SCOPE_NOT_APP 0x00 +#define PSERIES_IOEI_SCOPE_RIO_HUB 0x36 +#define PSERIES_IOEI_SCOPE_RIO_BRIDGE 0x37 +#define PSERIES_IOEI_SCOPE_PHB 0x38 +#define PSERIES_IOEI_SCOPE_EADS_GLOBAL 0x39 +#define PSERIES_IOEI_SCOPE_EADS_SLOT 0x3A +#define PSERIES_IOEI_SCOPE_TORRENT_HUB 0x3B +#define PSERIES_IOEI_SCOPE_SERVICE_PROC 0x51 + +/* Platform Event Log Format, Version 6, data portition of IO event section */ +struct pseries_io_event { + uint8_t event_type; /* 0x00 IO-Event Type */ + uint8_t rpc_data_len; /* 0x01 RPC data length */ + uint8_t scope; /* 0x02 Error/Event Scope */ + uint8_t event_subtype; /* 0x03 I/O-Event Sub-Type */ + uint32_t drc_index; /* 0x04 DRC Index */ + uint8_t rpc_data[PSERIES_IOEI_RPC_MAX_LEN]; + /* 0x08 RPC Data (0-216 bytes, */ + /* padded to 4 bytes alignment) */ +}; + +extern struct atomic_notifier_head pseries_ioei_notifier_list; + +#endif /* _ASM_POWERPC_IO_EVENT_IRQ_H */ diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 67ab5fb7d15..1bff591f7f7 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -88,9 +88,6 @@ struct irq_host_ops { /* Dispose of such a mapping */ void (*unmap)(struct irq_host *h, unsigned int virq); - /* Update of such a mapping */ - void (*remap)(struct irq_host *h, unsigned int virq, irq_hw_number_t hw); - /* Translate device-tree interrupt specifier from raw format coming * from the firmware to a irq_hw_number_t (interrupt line number) and * type (sense) that can be passed to set_irq_type(). In the absence @@ -128,19 +125,10 @@ struct irq_host { struct device_node *of_node; }; -/* The main irq map itself is an array of NR_IRQ entries containing the - * associate host and irq number. An entry with a host of NULL is free. - * An entry can be allocated if it's free, the allocator always then sets - * hwirq first to the host's invalid irq number and then fills ops. - */ -struct irq_map_entry { - irq_hw_number_t hwirq; - struct irq_host *host; -}; - -extern struct irq_map_entry irq_map[NR_IRQS]; - +struct irq_data; +extern irq_hw_number_t irqd_to_hwirq(struct irq_data *d); extern irq_hw_number_t virq_to_hw(unsigned int virq); +extern bool virq_is_host(unsigned int virq, struct irq_host *host); /** * irq_alloc_host - Allocate a new irq_host data structure diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index f54408d995b..8a33698c61b 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -76,7 +76,7 @@ extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)); extern cpumask_t cpus_in_sr; static inline int kexec_sr_activated(int cpu) { - return cpu_isset(cpu,cpus_in_sr); + return cpumask_test_cpu(cpu, &cpus_in_sr); } struct kimage; diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 18ea6963ad7..d2ca5ed3877 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -45,6 +45,114 @@ struct kvm_regs { __u64 gpr[32]; }; +#define KVM_SREGS_E_IMPL_NONE 0 +#define KVM_SREGS_E_IMPL_FSL 1 + +#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */ + +/* + * Feature bits indicate which sections of the sregs struct are valid, + * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers + * corresponding to unset feature bits will not be modified. This allows + * restoring a checkpoint made without that feature, while keeping the + * default values of the new registers. + * + * KVM_SREGS_E_BASE contains: + * CSRR0/1 (refers to SRR2/3 on 40x) + * ESR + * DEAR + * MCSR + * TSR + * TCR + * DEC + * TB + * VRSAVE (USPRG0) + */ +#define KVM_SREGS_E_BASE (1 << 0) + +/* + * KVM_SREGS_E_ARCH206 contains: + * + * PIR + * MCSRR0/1 + * DECAR + * IVPR + */ +#define KVM_SREGS_E_ARCH206 (1 << 1) + +/* + * Contains EPCR, plus the upper half of 64-bit registers + * that are 32-bit on 32-bit implementations. + */ +#define KVM_SREGS_E_64 (1 << 2) + +#define KVM_SREGS_E_SPRG8 (1 << 3) +#define KVM_SREGS_E_MCIVPR (1 << 4) + +/* + * IVORs are used -- contains IVOR0-15, plus additional IVORs + * in combination with an appropriate feature bit. + */ +#define KVM_SREGS_E_IVOR (1 << 5) + +/* + * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG. + * Also TLBnPS if MMUCFG[MAVN] = 1. + */ +#define KVM_SREGS_E_ARCH206_MMU (1 << 6) + +/* DBSR, DBCR, IAC, DAC, DVC */ +#define KVM_SREGS_E_DEBUG (1 << 7) + +/* Enhanced debug -- DSRR0/1, SPRG9 */ +#define KVM_SREGS_E_ED (1 << 8) + +/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_SPE (1 << 9) + +/* External Proxy (EXP) -- EPR */ +#define KVM_SREGS_EXP (1 << 10) + +/* External PID (E.PD) -- EPSC/EPLC */ +#define KVM_SREGS_E_PD (1 << 11) + +/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PC (1 << 12) + +/* Page table (E.PT) -- EPTCFG */ +#define KVM_SREGS_E_PT (1 << 13) + +/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PM (1 << 14) + +/* + * Special updates: + * + * Some registers may change even while a vcpu is not running. + * To avoid losing these changes, by default these registers are + * not updated by KVM_SET_SREGS. To force an update, set the bit + * in u.e.update_special corresponding to the register to be updated. + * + * The update_special field is zero on return from KVM_GET_SREGS. + * + * When restoring a checkpoint, the caller can set update_special + * to 0xffffffff to ensure that everything is restored, even new features + * that the caller doesn't know about. + */ +#define KVM_SREGS_E_UPDATE_MCSR (1 << 0) +#define KVM_SREGS_E_UPDATE_TSR (1 << 1) +#define KVM_SREGS_E_UPDATE_DEC (1 << 2) +#define KVM_SREGS_E_UPDATE_DBSR (1 << 3) + +/* + * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a + * previous KVM_GET_REGS. + * + * Unless otherwise indicated, setting any register with KVM_SET_SREGS + * directly sets its value. It does not trigger any special semantics such + * as write-one-to-clear. Calling KVM_SET_SREGS on an unmodified struct + * just received from KVM_GET_SREGS is always a no-op. + */ struct kvm_sregs { __u32 pvr; union { @@ -62,6 +170,82 @@ struct kvm_sregs { __u64 dbat[8]; } ppc32; } s; + struct { + union { + struct { /* KVM_SREGS_E_IMPL_FSL */ + __u32 features; /* KVM_SREGS_E_FSL_ */ + __u32 svr; + __u64 mcar; + __u32 hid0; + + /* KVM_SREGS_E_FSL_PIDn */ + __u32 pid1, pid2; + } fsl; + __u8 pad[256]; + } impl; + + __u32 features; /* KVM_SREGS_E_ */ + __u32 impl_id; /* KVM_SREGS_E_IMPL_ */ + __u32 update_special; /* KVM_SREGS_E_UPDATE_ */ + __u32 pir; /* read-only */ + __u64 sprg8; + __u64 sprg9; /* E.ED */ + __u64 csrr0; + __u64 dsrr0; /* E.ED */ + __u64 mcsrr0; + __u32 csrr1; + __u32 dsrr1; /* E.ED */ + __u32 mcsrr1; + __u32 esr; + __u64 dear; + __u64 ivpr; + __u64 mcivpr; + __u64 mcsr; /* KVM_SREGS_E_UPDATE_MCSR */ + + __u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */ + __u32 tcr; + __u32 decar; + __u32 dec; /* KVM_SREGS_E_UPDATE_DEC */ + + /* + * Userspace can read TB directly, but the + * value reported here is consistent with "dec". + * + * Read-only. + */ + __u64 tb; + + __u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */ + __u32 dbcr[3]; + __u32 iac[4]; + __u32 dac[2]; + __u32 dvc[2]; + __u8 num_iac; /* read-only */ + __u8 num_dac; /* read-only */ + __u8 num_dvc; /* read-only */ + __u8 pad; + + __u32 epr; /* EXP */ + __u32 vrsave; /* a.k.a. USPRG0 */ + __u32 epcr; /* KVM_SREGS_E_64 */ + + __u32 mas0; + __u32 mas1; + __u64 mas2; + __u64 mas7_3; + __u32 mas4; + __u32 mas6; + + __u32 ivor_low[16]; /* IVOR0-15 */ + __u32 ivor_high[18]; /* IVOR32+, plus room to expand */ + + __u32 mmucfg; /* read-only */ + __u32 eptcfg; /* E.PT, read-only */ + __u32 tlbcfg[4];/* read-only */ + __u32 tlbps[4]; /* read-only */ + + __u32 eplc, epsc; /* E.PD */ + } e; __u8 pad[1020]; } u; }; diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h index d22d39942a9..a0e57618ff3 100644 --- a/arch/powerpc/include/asm/kvm_44x.h +++ b/arch/powerpc/include/asm/kvm_44x.h @@ -61,7 +61,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); } -void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid); void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 5b750467439..0951b17f4eb 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -59,6 +59,7 @@ #define BOOK3S_INTERRUPT_INST_SEGMENT 0x480 #define BOOK3S_INTERRUPT_EXTERNAL 0x500 #define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501 +#define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502 #define BOOK3S_INTERRUPT_ALIGNMENT 0x600 #define BOOK3S_INTERRUPT_PROGRAM 0x700 #define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800 diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 36fdb3aff30..d5a8a386163 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -34,6 +34,7 @@ (\intno == BOOK3S_INTERRUPT_DATA_SEGMENT) || \ (\intno == BOOK3S_INTERRUPT_INST_SEGMENT) || \ (\intno == BOOK3S_INTERRUPT_EXTERNAL) || \ + (\intno == BOOK3S_INTERRUPT_EXTERNAL_HV) || \ (\intno == BOOK3S_INTERRUPT_ALIGNMENT) || \ (\intno == BOOK3S_INTERRUPT_PROGRAM) || \ (\intno == BOOK3S_INTERRUPT_FP_UNAVAIL) || \ diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index 7fea26fffb2..7a2a565f88c 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -43,6 +43,7 @@ struct kvmppc_vcpu_e500 { u32 host_pid[E500_PID_NUM]; u32 pid[E500_PID_NUM]; + u32 svr; u32 mas0; u32 mas1; @@ -58,6 +59,7 @@ struct kvmppc_vcpu_e500 { u32 hid1; u32 tlb0cfg; u32 tlb1cfg; + u64 mcar; struct kvm_vcpu vcpu; }; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bba3b9b72a3..186f150b9b8 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -223,6 +223,7 @@ struct kvm_vcpu_arch { ulong hflags; ulong guest_owned_ext; #endif + u32 vrsave; /* also USPRG0 */ u32 mmucr; ulong sprg4; ulong sprg5; @@ -232,6 +233,9 @@ struct kvm_vcpu_arch { ulong csrr1; ulong dsrr0; ulong dsrr1; + ulong mcsrr0; + ulong mcsrr1; + ulong mcsr; ulong esr; u32 dec; u32 decar; @@ -255,6 +259,7 @@ struct kvm_vcpu_arch { u32 dbsr; #ifdef CONFIG_KVM_EXIT_TIMING + struct mutex exit_timing_lock; struct kvmppc_exit_timing timing_exit; struct kvmppc_exit_timing timing_last_enter; u32 last_exit_type; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index ecb3bc74c34..9345238edec 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -61,6 +61,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); +extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); /* Core-specific hooks */ @@ -142,4 +143,12 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) return r; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + +void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index a077adc0b35..e0298d26ce5 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -210,6 +210,8 @@ struct dtl_entry { #define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */ #define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry)) +extern struct kmem_cache *dtl_cache; + /* * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls * reading from the dispatch trace log. If other code wants to consume diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e4f01915fbb..47cacddb14c 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -29,21 +29,6 @@ struct file; struct pci_controller; struct kimage; -#ifdef CONFIG_SMP -struct smp_ops_t { - void (*message_pass)(int target, int msg); - int (*probe)(void); - void (*kick_cpu)(int nr); - void (*setup_cpu)(int nr); - void (*bringup_done)(void); - void (*take_timebase)(void); - void (*give_timebase)(void); - int (*cpu_disable)(void); - void (*cpu_die)(unsigned int nr); - int (*cpu_bootable)(unsigned int nr); -}; -#endif - struct machdep_calls { char *name; #ifdef CONFIG_PPC64 @@ -267,6 +252,7 @@ struct machdep_calls { extern void e500_idle(void); extern void power4_idle(void); +extern void power7_idle(void); extern void ppc6xx_idle(void); extern void book3e_idle(void); @@ -311,12 +297,6 @@ extern sys_ctrler_t sys_ctrler; #endif /* CONFIG_PPC_PMAC */ -#ifdef CONFIG_SMP -/* Poor default implementations */ -extern void __devinit smp_generic_give_timebase(void); -extern void __devinit smp_generic_take_timebase(void); -#endif /* CONFIG_SMP */ - /* Functions to produce codes on the leds. * The SRC code should be unique for the message category and should diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 17194fcd404..3ea0f9a259d 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -43,6 +43,7 @@ #define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000) #define MAS0_ESEL(x) (((x) << 16) & 0x0FFF0000) #define MAS0_NV(x) ((x) & 0x00000FFF) +#define MAS0_ESEL_MASK 0x0FFF0000 #define MAS0_HES 0x00004000 #define MAS0_WQ_ALLWAYS 0x00000000 #define MAS0_WQ_COND 0x00001000 @@ -137,6 +138,21 @@ #define MMUCSR0_TLB2PS 0x00078000 /* TLB2 Page Size */ #define MMUCSR0_TLB3PS 0x00780000 /* TLB3 Page Size */ +/* MMUCFG bits */ +#define MMUCFG_MAVN_NASK 0x00000003 +#define MMUCFG_MAVN_V1_0 0x00000000 +#define MMUCFG_MAVN_V2_0 0x00000001 +#define MMUCFG_NTLB_MASK 0x0000000c +#define MMUCFG_NTLB_SHIFT 2 +#define MMUCFG_PIDSIZE_MASK 0x000007c0 +#define MMUCFG_PIDSIZE_SHIFT 6 +#define MMUCFG_TWC 0x00008000 +#define MMUCFG_LRAT 0x00010000 +#define MMUCFG_RASIZE_MASK 0x00fe0000 +#define MMUCFG_RASIZE_SHIFT 17 +#define MMUCFG_LPIDSIZE_MASK 0x0f000000 +#define MMUCFG_LPIDSIZE_SHIFT 24 + /* TLBnCFG encoding */ #define TLBnCFG_N_ENTRY 0x00000fff /* number of entries */ #define TLBnCFG_HES 0x00002000 /* HW select supported */ @@ -229,6 +245,10 @@ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; extern int mmu_linear_psize; extern int mmu_vmemmap_psize; +#ifdef CONFIG_PPC64 +extern unsigned long linear_map_top; +#endif + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */ diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index ae7b3efec8e..d865bd909c7 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -408,6 +408,7 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { } #endif /* CONFIG_PPC_SUBPAGE_PROT */ typedef unsigned long mm_context_id_t; +struct spinlock; typedef struct { mm_context_id_t id; @@ -423,6 +424,11 @@ typedef struct { #ifdef CONFIG_PPC_SUBPAGE_PROT struct subpage_prot_table spt; #endif /* CONFIG_PPC_SUBPAGE_PROT */ +#ifdef CONFIG_PPC_ICSWX + struct spinlock *cop_lockp; /* guard acop and cop_pid */ + unsigned long acop; /* mask of enabled coprocessor types */ + unsigned int cop_pid; /* pid value used with coprocessors */ +#endif /* CONFIG_PPC_ICSWX */ } mm_context_t; diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index bb40a06d3b7..4138b21ae80 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -56,11 +56,6 @@ */ #define MMU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x00200000) -/* This indicates that the processor uses the ISA 2.06 server tlbie - * mnemonics - */ -#define MMU_FTR_TLBIE_206 ASM_CONST(0x00400000) - /* Enable use of TLB reservation. Processor should support tlbsrx. * instruction and MAS0[WQ]. */ @@ -70,6 +65,53 @@ */ #define MMU_FTR_USE_PAIRED_MAS ASM_CONST(0x01000000) +/* MMU is SLB-based + */ +#define MMU_FTR_SLB ASM_CONST(0x02000000) + +/* Support 16M large pages + */ +#define MMU_FTR_16M_PAGE ASM_CONST(0x04000000) + +/* Supports TLBIEL variant + */ +#define MMU_FTR_TLBIEL ASM_CONST(0x08000000) + +/* Supports tlbies w/o locking + */ +#define MMU_FTR_LOCKLESS_TLBIE ASM_CONST(0x10000000) + +/* Large pages can be marked CI + */ +#define MMU_FTR_CI_LARGE_PAGE ASM_CONST(0x20000000) + +/* 1T segments available + */ +#define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) + +/* Doesn't support the B bit (1T segment) in SLBIE + */ +#define MMU_FTR_NO_SLBIE_B ASM_CONST(0x80000000) + +/* MMU feature bit sets for various CPUs */ +#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ + MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 +#define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2 +#define MMU_FTRS_PPC970 MMU_FTRS_POWER4 +#define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE +#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE +#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE +#define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ + MMU_FTR_CI_LARGE_PAGE +#define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ + MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B +#define MMU_FTRS_A2 MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | \ + MMU_FTR_USE_TLBIVAX_BCAST | \ + MMU_FTR_LOCK_BCAST_INVAL | \ + MMU_FTR_USE_TLBRSRV | \ + MMU_FTR_USE_PAIRED_MAS | \ + MMU_FTR_TLBIEL | \ + MMU_FTR_16M_PAGE #ifndef __ASSEMBLY__ #include <asm/cputable.h> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 81fb41289d6..a73668a5f30 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -32,6 +32,10 @@ extern void __destroy_context(unsigned long context_id); extern void mmu_context_init(void); #endif +extern void switch_cop(struct mm_struct *next); +extern int use_cop(unsigned long acop, struct mm_struct *mm); +extern void drop_cop(unsigned long acop, struct mm_struct *mm); + /* * switch_mm is the entry point called from the architecture independent * code in kernel/sched.c @@ -55,6 +59,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, if (prev == next) return; +#ifdef CONFIG_PPC_ICSWX + /* Switch coprocessor context only if prev or next uses a coprocessor */ + if (prev->context.acop || next->context.acop) + switch_cop(next); +#endif /* CONFIG_PPC_ICSWX */ + /* We must stop all altivec streams before changing the HW * context */ @@ -67,7 +77,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * sub architectures. */ #ifdef CONFIG_PPC_STD_MMU_64 - if (cpu_has_feature(CPU_FTR_SLB)) + if (mmu_has_feature(MMU_FTR_SLB)) switch_slb(tsk, next); else switch_stab(tsk, next); diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index 49baddcdd14..df18989e78d 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -262,6 +262,7 @@ struct mpic #ifdef CONFIG_SMP struct irq_chip hc_ipi; #endif + struct irq_chip hc_tm; const char *name; /* Flags */ unsigned int flags; @@ -280,7 +281,7 @@ struct mpic /* vector numbers used for internal sources (ipi/timers) */ unsigned int ipi_vecs[4]; - unsigned int timer_vecs[4]; + unsigned int timer_vecs[8]; /* Spurious vector to program into unused sources */ unsigned int spurious_vec; @@ -368,6 +369,8 @@ struct mpic * NOTE: This flag trumps MPIC_WANTS_RESET. */ #define MPIC_NO_RESET 0x00004000 +/* Freescale MPIC (compatible includes "fsl,mpic") */ +#define MPIC_FSL 0x00008000 /* MPIC HW modification ID */ #define MPIC_REGSET_MASK 0xf0000000 diff --git a/arch/powerpc/include/asm/pSeries_reconfig.h b/arch/powerpc/include/asm/pSeries_reconfig.h index d4b4bfa26fb..89d2f99c1bf 100644 --- a/arch/powerpc/include/asm/pSeries_reconfig.h +++ b/arch/powerpc/include/asm/pSeries_reconfig.h @@ -18,13 +18,18 @@ extern int pSeries_reconfig_notifier_register(struct notifier_block *); extern void pSeries_reconfig_notifier_unregister(struct notifier_block *); extern struct blocking_notifier_head pSeries_reconfig_chain; +/* Not the best place to put this, will be fixed when we move some + * of the rtas suspend-me stuff to pseries */ +extern void pSeries_coalesce_init(void); #else /* !CONFIG_PPC_PSERIES */ static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb) { return 0; } static inline void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) { } +static inline void pSeries_coalesce_init(void) { } #endif /* CONFIG_PPC_PSERIES */ + #endif /* __KERNEL__ */ #endif /* _PPC64_PSERIES_RECONFIG_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index ec57540cd7a..74126765106 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -92,9 +92,9 @@ struct paca_struct { * Now, starting in cacheline 2, the exception save areas */ /* used for most interrupts/exceptions */ - u64 exgen[10] __attribute__((aligned(0x80))); - u64 exmc[10]; /* used for machine checks */ - u64 exslb[10]; /* used for SLB/segment table misses + u64 exgen[11] __attribute__((aligned(0x80))); + u64 exmc[11]; /* used for machine checks */ + u64 exslb[11]; /* used for SLB/segment table misses * on the linear mapping */ /* SLB related definitions */ u16 vmalloc_sllp; @@ -106,7 +106,8 @@ struct paca_struct { pgd_t *pgd; /* Current PGD */ pgd_t *kernel_pgd; /* Kernel PGD */ u64 exgen[8] __attribute__((aligned(0x80))); - u64 extlb[EX_TLB_SIZE*3] __attribute__((aligned(0x80))); + /* We can have up to 3 levels of reentrancy in the TLB miss handler */ + u64 extlb[3][EX_TLB_SIZE / sizeof(u64)] __attribute__((aligned(0x80))); u64 exmc[8]; /* used for machine checks */ u64 excrit[8]; /* used for crit interrupts */ u64 exdbg[8]; /* used for debug interrupts */ @@ -125,7 +126,7 @@ struct paca_struct { struct task_struct *__current; /* Pointer to current */ u64 kstack; /* Saved Kernel stack addr */ u64 stab_rr; /* stab/slb round-robin counter */ - u64 saved_r1; /* r1 save for RTAS calls */ + u64 saved_r1; /* r1 save for RTAS calls or PM */ u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ u8 soft_enabled; /* irq soft-enable flag */ diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 812b2cd80ae..9356262fd3c 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -59,24 +59,7 @@ static __inline__ void clear_page(void *addr) : "ctr", "memory"); } -extern void copy_4K_page(void *to, void *from); - -#ifdef CONFIG_PPC_64K_PAGES -static inline void copy_page(void *to, void *from) -{ - unsigned int i; - for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) { - copy_4K_page(to, from); - to += 4096; - from += 4096; - } -} -#else /* CONFIG_PPC_64K_PAGES */ -static inline void copy_page(void *to, void *from) -{ - copy_4K_page(to, from); -} -#endif /* CONFIG_PPC_64K_PAGES */ +extern void copy_page(void *to, void *from); /* Log 2 of page table size */ extern u64 ppc64_pft_size; @@ -130,7 +113,7 @@ extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize); -#define slice_mm_new_context(mm) ((mm)->context.id == 0) +#define slice_mm_new_context(mm) ((mm)->context.id == MMU_NO_CONTEXT) #endif /* __ASSEMBLY__ */ #else diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 2b09cd522d3..81576ee0cfb 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -257,21 +257,20 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long old; - if ((pte_val(*ptep) & _PAGE_RW) == 0) - return; - old = pte_update(mm, addr, ptep, _PAGE_RW, 0); + if ((pte_val(*ptep) & _PAGE_RW) == 0) + return; + + pte_update(mm, addr, ptep, _PAGE_RW, 0); } static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long old; - if ((pte_val(*ptep) & _PAGE_RW) == 0) return; - old = pte_update(mm, addr, ptep, _PAGE_RW, 1); + + pte_update(mm, addr, ptep, _PAGE_RW, 1); } /* diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 1255569387b..e472659d906 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -41,6 +41,10 @@ #define PPC_INST_RFCI 0x4c000066 #define PPC_INST_RFDI 0x4c00004e #define PPC_INST_RFMCI 0x4c00004c +#define PPC_INST_MFSPR_DSCR 0x7c1102a6 +#define PPC_INST_MFSPR_DSCR_MASK 0xfc1fffff +#define PPC_INST_MTSPR_DSCR 0x7c1103a6 +#define PPC_INST_MTSPR_DSCR_MASK 0xfc1fffff #define PPC_INST_STRING 0x7c00042a #define PPC_INST_STRING_MASK 0xfc0007fe @@ -56,6 +60,17 @@ #define PPC_INST_TLBSRX_DOT 0x7c0006a5 #define PPC_INST_XXLOR 0xf0000510 +#define PPC_INST_NAP 0x4c000364 +#define PPC_INST_SLEEP 0x4c0003a4 + +/* A2 specific instructions */ +#define PPC_INST_ERATWE 0x7c0001a6 +#define PPC_INST_ERATRE 0x7c000166 +#define PPC_INST_ERATILX 0x7c000066 +#define PPC_INST_ERATIVAX 0x7c000666 +#define PPC_INST_ERATSX 0x7c000126 +#define PPC_INST_ERATSX_DOT 0x7c000127 + /* macros to insert fields into opcodes */ #define __PPC_RA(a) (((a) & 0x1f) << 16) #define __PPC_RB(b) (((b) & 0x1f) << 11) @@ -67,6 +82,8 @@ #define __PPC_XT(s) __PPC_XS(s) #define __PPC_T_TLB(t) (((t) & 0x3) << 21) #define __PPC_WC(w) (((w) & 0x3) << 21) +#define __PPC_WS(w) (((w) & 0x1f) << 11) + /* * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a * larx with EH set as an illegal instruction. @@ -113,6 +130,21 @@ #define PPC_TLBIVAX(a,b) stringify_in_c(.long PPC_INST_TLBIVAX | \ __PPC_RA(a) | __PPC_RB(b)) +#define PPC_ERATWE(s, a, w) stringify_in_c(.long PPC_INST_ERATWE | \ + __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) +#define PPC_ERATRE(s, a, w) stringify_in_c(.long PPC_INST_ERATRE | \ + __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) +#define PPC_ERATILX(t, a, b) stringify_in_c(.long PPC_INST_ERATILX | \ + __PPC_T_TLB(t) | __PPC_RA(a) | \ + __PPC_RB(b)) +#define PPC_ERATIVAX(s, a, b) stringify_in_c(.long PPC_INST_ERATIVAX | \ + __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_ERATSX(t, a, w) stringify_in_c(.long PPC_INST_ERATSX | \ + __PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_INST_ERATSX_DOT | \ + __PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b)) + + /* * Define what the VSX XX1 form instructions will look like, then add * the 128 bit load store instructions based on that. @@ -126,4 +158,7 @@ #define XXLOR(t, a, b) stringify_in_c(.long PPC_INST_XXLOR | \ VSX_XX3((t), (a), (b))) +#define PPC_NAP stringify_in_c(.long PPC_INST_NAP) +#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP) + #endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 98210067c1c..1b422381fc1 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -170,6 +170,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #define HMT_MEDIUM or 2,2,2 #define HMT_MEDIUM_HIGH or 5,5,5 # medium high priority #define HMT_HIGH or 3,3,3 +#define HMT_EXTRA_HIGH or 7,7,7 # power7 only #ifdef __KERNEL__ #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index de1967a1ff5..d50c2b6d9bc 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -238,6 +238,10 @@ struct thread_struct { #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ +#ifdef CONFIG_PPC64 + unsigned long dscr; + int dscr_inherit; +#endif }; #define ARCH_MIN_TASKALIGN 16 diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7e4abebe76c..c5cae0dd176 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -99,17 +99,23 @@ #define MSR_LE __MASK(MSR_LE_LG) /* Little Endian */ #if defined(CONFIG_PPC_BOOK3S_64) +#define MSR_64BIT MSR_SF + /* Server variant */ #define MSR_ MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV -#define MSR_KERNEL MSR_ | MSR_SF +#define MSR_KERNEL MSR_ | MSR_64BIT #define MSR_USER32 MSR_ | MSR_PR | MSR_EE -#define MSR_USER64 MSR_USER32 | MSR_SF +#define MSR_USER64 MSR_USER32 | MSR_64BIT #elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx) /* Default MSR for kernel mode. */ #define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR) #define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE) #endif +#ifndef MSR_64BIT +#define MSR_64BIT 0 +#endif + /* Floating Point Status and Control Register (FPSCR) Fields */ #define FPSCR_FX 0x80000000 /* FPU exception summary */ #define FPSCR_FEX 0x40000000 /* FPU enabled exception summary */ @@ -182,6 +188,8 @@ #define SPRN_CTR 0x009 /* Count Register */ #define SPRN_DSCR 0x11 +#define SPRN_CFAR 0x1c /* Come From Address Register */ +#define SPRN_ACOP 0x1F /* Available Coprocessor Register */ #define SPRN_CTRLF 0x088 #define SPRN_CTRLT 0x098 #define CTRL_CT 0xc0000000 /* current thread */ @@ -210,8 +218,43 @@ #define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */ #define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */ #define SPRN_SPURR 0x134 /* Scaled PURR */ +#define SPRN_HSPRG0 0x130 /* Hypervisor Scratch 0 */ +#define SPRN_HSPRG1 0x131 /* Hypervisor Scratch 1 */ +#define SPRN_HDSISR 0x132 +#define SPRN_HDAR 0x133 +#define SPRN_HDEC 0x136 /* Hypervisor Decrementer */ #define SPRN_HIOR 0x137 /* 970 Hypervisor interrupt offset */ +#define SPRN_RMOR 0x138 /* Real mode offset register */ +#define SPRN_HRMOR 0x139 /* Real mode offset register */ +#define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ +#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ #define SPRN_LPCR 0x13E /* LPAR Control Register */ +#define LPCR_VPM0 (1ul << (63-0)) +#define LPCR_VPM1 (1ul << (63-1)) +#define LPCR_ISL (1ul << (63-2)) +#define LPCR_DPFD_SH (63-11) +#define LPCR_VRMA_L (1ul << (63-12)) +#define LPCR_VRMA_LP0 (1ul << (63-15)) +#define LPCR_VRMA_LP1 (1ul << (63-16)) +#define LPCR_RMLS 0x1C000000 /* impl dependent rmo limit sel */ +#define LPCR_ILE 0x02000000 /* !HV irqs set MSR:LE */ +#define LPCR_PECE 0x00007000 /* powersave exit cause enable */ +#define LPCR_PECE0 0x00004000 /* ext. exceptions can cause exit */ +#define LPCR_PECE1 0x00002000 /* decrementer can cause exit */ +#define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ +#define LPCR_MER 0x00000800 /* Mediated External Exception */ +#define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ +#define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ +#define LPCR_RMI 0x00000002 /* real mode is cache inhibit */ +#define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */ +#define SPRN_LPID 0x13F /* Logical Partition Identifier */ +#define SPRN_HMER 0x150 /* Hardware m? error recovery */ +#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ +#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ +#define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */ +#define SPRN_TLBVPNR 0x155 /* P7 TLB control register */ +#define SPRN_TLBRPNR 0x156 /* P7 TLB control register */ +#define SPRN_TLBLPIDR 0x157 /* P7 TLB control register */ #define SPRN_DBAT0L 0x219 /* Data BAT 0 Lower Register */ #define SPRN_DBAT0U 0x218 /* Data BAT 0 Upper Register */ #define SPRN_DBAT1L 0x21B /* Data BAT 1 Lower Register */ @@ -434,16 +477,23 @@ #define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ #define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ -#define SRR1_WAKERESET 0x00380000 /* System reset */ #define SRR1_WAKESYSERR 0x00300000 /* System error */ #define SRR1_WAKEEE 0x00200000 /* External interrupt */ #define SRR1_WAKEMT 0x00280000 /* mtctrl */ +#define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */ #define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ #define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */ +#define SRR1_WAKERESET 0x00100000 /* System reset */ +#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */ +#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained, + * may not be recoverable */ +#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */ +#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */ #define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */ #define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */ #define SRR1_PROGTRAP 0x00020000 /* Trap */ #define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */ + #define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ #define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ @@ -673,12 +723,15 @@ * SPRG usage: * * All 64-bit: - * - SPRG1 stores PACA pointer + * - SPRG1 stores PACA pointer except 64-bit server in + * HV mode in which case it is HSPRG0 * * 64-bit server: * - SPRG0 unused (reserved for HV on Power4) * - SPRG2 scratch for exception vectors * - SPRG3 unused (user visible) + * - HSPRG0 stores PACA in HV mode + * - HSPRG1 scratch for "HV" exceptions * * 64-bit embedded * - SPRG0 generic exception scratch @@ -741,6 +794,41 @@ #ifdef CONFIG_PPC_BOOK3S_64 #define SPRN_SPRG_SCRATCH0 SPRN_SPRG2 +#define SPRN_SPRG_HPACA SPRN_HSPRG0 +#define SPRN_SPRG_HSCRATCH0 SPRN_HSPRG1 + +#define GET_PACA(rX) \ + BEGIN_FTR_SECTION_NESTED(66); \ + mfspr rX,SPRN_SPRG_PACA; \ + FTR_SECTION_ELSE_NESTED(66); \ + mfspr rX,SPRN_SPRG_HPACA; \ + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + +#define SET_PACA(rX) \ + BEGIN_FTR_SECTION_NESTED(66); \ + mtspr SPRN_SPRG_PACA,rX; \ + FTR_SECTION_ELSE_NESTED(66); \ + mtspr SPRN_SPRG_HPACA,rX; \ + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + +#define GET_SCRATCH0(rX) \ + BEGIN_FTR_SECTION_NESTED(66); \ + mfspr rX,SPRN_SPRG_SCRATCH0; \ + FTR_SECTION_ELSE_NESTED(66); \ + mfspr rX,SPRN_SPRG_HSCRATCH0; \ + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + +#define SET_SCRATCH0(rX) \ + BEGIN_FTR_SECTION_NESTED(66); \ + mtspr SPRN_SPRG_SCRATCH0,rX; \ + FTR_SECTION_ELSE_NESTED(66); \ + mtspr SPRN_SPRG_HSCRATCH0,rX; \ + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + +#else /* CONFIG_PPC_BOOK3S_64 */ +#define GET_SCRATCH0(rX) mfspr rX,SPRN_SPRG_SCRATCH0 +#define SET_SCRATCH0(rX) mtspr SPRN_SPRG_SCRATCH0,rX + #endif #ifdef CONFIG_PPC_BOOK3E_64 @@ -750,6 +838,10 @@ #define SPRN_SPRG_TLB_EXFRAME SPRN_SPRG2 #define SPRN_SPRG_TLB_SCRATCH SPRN_SPRG6 #define SPRN_SPRG_GEN_SCRATCH SPRN_SPRG0 + +#define SET_PACA(rX) mtspr SPRN_SPRG_PACA,rX +#define GET_PACA(rX) mfspr rX,SPRN_SPRG_PACA + #endif #ifdef CONFIG_PPC_BOOK3S_32 @@ -800,6 +892,8 @@ #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 #endif + + /* * An mtfsf instruction with the L bit set. On CPUs that support this a * full 64bits of FPSCR is restored and on other CPUs the L bit is ignored. @@ -894,6 +988,8 @@ #define PV_POWER5p 0x003B #define PV_POWER7 0x003F #define PV_970FX 0x003C +#define PV_POWER6 0x003E +#define PV_POWER7 0x003F #define PV_630 0x0040 #define PV_630p 0x0041 #define PV_970MP 0x0044 diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h new file mode 100644 index 00000000000..3d52a1132f3 --- /dev/null +++ b/arch/powerpc/include/asm/reg_a2.h @@ -0,0 +1,165 @@ +/* + * Register definitions specific to the A2 core + * + * Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ASM_POWERPC_REG_A2_H__ +#define __ASM_POWERPC_REG_A2_H__ + +#define SPRN_TENSR 0x1b5 +#define SPRN_TENS 0x1b6 /* Thread ENable Set */ +#define SPRN_TENC 0x1b7 /* Thread ENable Clear */ + +#define SPRN_A2_CCR0 0x3f0 /* Core Configuration Register 0 */ +#define SPRN_A2_CCR1 0x3f1 /* Core Configuration Register 1 */ +#define SPRN_A2_CCR2 0x3f2 /* Core Configuration Register 2 */ +#define SPRN_MMUCR0 0x3fc /* MMU Control Register 0 */ +#define SPRN_MMUCR1 0x3fd /* MMU Control Register 1 */ +#define SPRN_MMUCR2 0x3fe /* MMU Control Register 2 */ +#define SPRN_MMUCR3 0x3ff /* MMU Control Register 3 */ + +#define SPRN_IAR 0x372 + +#define SPRN_IUCR0 0x3f3 +#define IUCR0_ICBI_ACK 0x1000 + +#define SPRN_XUCR0 0x3f6 /* Execution Unit Config Register 0 */ + +#define A2_IERAT_SIZE 16 +#define A2_DERAT_SIZE 32 + +/* A2 MMUCR0 bits */ +#define MMUCR0_ECL 0x80000000 /* Extended Class for TLB fills */ +#define MMUCR0_TID_NZ 0x40000000 /* TID is non-zero */ +#define MMUCR0_TS 0x10000000 /* Translation space for TLB fills */ +#define MMUCR0_TGS 0x20000000 /* Guest space for TLB fills */ +#define MMUCR0_TLBSEL 0x0c000000 /* TLB or ERAT target for TLB fills */ +#define MMUCR0_TLBSEL_U 0x00000000 /* TLBSEL = UTLB */ +#define MMUCR0_TLBSEL_I 0x08000000 /* TLBSEL = I-ERAT */ +#define MMUCR0_TLBSEL_D 0x0c000000 /* TLBSEL = D-ERAT */ +#define MMUCR0_LOCKSRSH 0x02000000 /* Use TLB lock on tlbsx. */ +#define MMUCR0_TID_MASK 0x000000ff /* TID field */ + +/* A2 MMUCR1 bits */ +#define MMUCR1_IRRE 0x80000000 /* I-ERAT round robin enable */ +#define MMUCR1_DRRE 0x40000000 /* D-ERAT round robin enable */ +#define MMUCR1_REE 0x20000000 /* Reference Exception Enable*/ +#define MMUCR1_CEE 0x10000000 /* Change exception enable */ +#define MMUCR1_CSINV_ALL 0x00000000 /* Inval ERAT on all CS evts */ +#define MMUCR1_CSINV_NISYNC 0x04000000 /* Inval ERAT on all ex isync*/ +#define MMUCR1_CSINV_NEVER 0x0c000000 /* Don't inval ERAT on CS */ +#define MMUCR1_ICTID 0x00080000 /* IERAT class field as TID */ +#define MMUCR1_ITTID 0x00040000 /* IERAT thdid field as TID */ +#define MMUCR1_DCTID 0x00020000 /* DERAT class field as TID */ +#define MMUCR1_DTTID 0x00010000 /* DERAT thdid field as TID */ +#define MMUCR1_DCCD 0x00008000 /* DERAT class ignore */ +#define MMUCR1_TLBWE_BINV 0x00004000 /* back invalidate on tlbwe */ + +/* A2 MMUCR2 bits */ +#define MMUCR2_PSSEL_SHIFT 4 + +/* A2 MMUCR3 bits */ +#define MMUCR3_THID 0x0000000f /* Thread ID */ + +/* *** ERAT TLB bits definitions */ +#define TLB0_EPN_MASK ASM_CONST(0xfffffffffffff000) +#define TLB0_CLASS_MASK ASM_CONST(0x0000000000000c00) +#define TLB0_CLASS_00 ASM_CONST(0x0000000000000000) +#define TLB0_CLASS_01 ASM_CONST(0x0000000000000400) +#define TLB0_CLASS_10 ASM_CONST(0x0000000000000800) +#define TLB0_CLASS_11 ASM_CONST(0x0000000000000c00) +#define TLB0_V ASM_CONST(0x0000000000000200) +#define TLB0_X ASM_CONST(0x0000000000000100) +#define TLB0_SIZE_MASK ASM_CONST(0x00000000000000f0) +#define TLB0_SIZE_4K ASM_CONST(0x0000000000000010) +#define TLB0_SIZE_64K ASM_CONST(0x0000000000000030) +#define TLB0_SIZE_1M ASM_CONST(0x0000000000000050) +#define TLB0_SIZE_16M ASM_CONST(0x0000000000000070) +#define TLB0_SIZE_1G ASM_CONST(0x00000000000000a0) +#define TLB0_THDID_MASK ASM_CONST(0x000000000000000f) +#define TLB0_THDID_0 ASM_CONST(0x0000000000000001) +#define TLB0_THDID_1 ASM_CONST(0x0000000000000002) +#define TLB0_THDID_2 ASM_CONST(0x0000000000000004) +#define TLB0_THDID_3 ASM_CONST(0x0000000000000008) +#define TLB0_THDID_ALL ASM_CONST(0x000000000000000f) + +#define TLB1_RESVATTR ASM_CONST(0x00f0000000000000) +#define TLB1_U0 ASM_CONST(0x0008000000000000) +#define TLB1_U1 ASM_CONST(0x0004000000000000) +#define TLB1_U2 ASM_CONST(0x0002000000000000) +#define TLB1_U3 ASM_CONST(0x0001000000000000) +#define TLB1_R ASM_CONST(0x0000800000000000) +#define TLB1_C ASM_CONST(0x0000400000000000) +#define TLB1_RPN_MASK ASM_CONST(0x000003fffffff000) +#define TLB1_W ASM_CONST(0x0000000000000800) +#define TLB1_I ASM_CONST(0x0000000000000400) +#define TLB1_M ASM_CONST(0x0000000000000200) +#define TLB1_G ASM_CONST(0x0000000000000100) +#define TLB1_E ASM_CONST(0x0000000000000080) +#define TLB1_VF ASM_CONST(0x0000000000000040) +#define TLB1_UX ASM_CONST(0x0000000000000020) +#define TLB1_SX ASM_CONST(0x0000000000000010) +#define TLB1_UW ASM_CONST(0x0000000000000008) +#define TLB1_SW ASM_CONST(0x0000000000000004) +#define TLB1_UR ASM_CONST(0x0000000000000002) +#define TLB1_SR ASM_CONST(0x0000000000000001) + +#ifdef CONFIG_PPC_EARLY_DEBUG_WSP +#define WSP_UART_PHYS 0xffc000c000 +/* This needs to be careful chosen to hit a !0 congruence class + * in the TLB since we bolt it in way 3, which is already occupied + * by our linear mapping primary bolted entry in CC 0. + */ +#define WSP_UART_VIRT 0xf000000000001000 +#endif + +/* A2 erativax attributes definitions */ +#define ERATIVAX_RS_IS_ALL 0x000 +#define ERATIVAX_RS_IS_TID 0x040 +#define ERATIVAX_RS_IS_CLASS 0x080 +#define ERATIVAX_RS_IS_FULLMATCH 0x0c0 +#define ERATIVAX_CLASS_00 0x000 +#define ERATIVAX_CLASS_01 0x010 +#define ERATIVAX_CLASS_10 0x020 +#define ERATIVAX_CLASS_11 0x030 +#define ERATIVAX_PSIZE_4K (TLB_PSIZE_4K >> 1) +#define ERATIVAX_PSIZE_64K (TLB_PSIZE_64K >> 1) +#define ERATIVAX_PSIZE_1M (TLB_PSIZE_1M >> 1) +#define ERATIVAX_PSIZE_16M (TLB_PSIZE_16M >> 1) +#define ERATIVAX_PSIZE_1G (TLB_PSIZE_1G >> 1) + +/* A2 eratilx attributes definitions */ +#define ERATILX_T_ALL 0 +#define ERATILX_T_TID 1 +#define ERATILX_T_TGS 2 +#define ERATILX_T_FULLMATCH 3 +#define ERATILX_T_CLASS0 4 +#define ERATILX_T_CLASS1 5 +#define ERATILX_T_CLASS2 6 +#define ERATILX_T_CLASS3 7 + +/* XUCR0 bits */ +#define XUCR0_TRACE_UM_T0 0x40000000 /* Thread 0 */ +#define XUCR0_TRACE_UM_T1 0x20000000 /* Thread 1 */ +#define XUCR0_TRACE_UM_T2 0x10000000 /* Thread 2 */ +#define XUCR0_TRACE_UM_T3 0x08000000 /* Thread 3 */ + +/* A2 CCR0 register */ +#define A2_CCR0_PME_DISABLED 0x00000000 +#define A2_CCR0_PME_SLEEP 0x40000000 +#define A2_CCR0_PME_RVW 0x80000000 +#define A2_CCR0_PME_DISABLED2 0xc0000000 + +/* A2 CCR2 register */ +#define A2_CCR2_ERAT_ONLY_MODE 0x00000001 +#define A2_CCR2_ENABLE_ICSWX 0x00000002 +#define A2_CCR2_ENABLE_PC 0x20000000 +#define A2_CCR2_ENABLE_TRACE 0x40000000 + +#endif /* __ASM_POWERPC_REG_A2_H__ */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index b316794aa2b..0f0ad9fa01c 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -27,10 +27,12 @@ #define MSR_CM (1<<31) /* Computation Mode (0=32-bit, 1=64-bit) */ #if defined(CONFIG_PPC_BOOK3E_64) +#define MSR_64BIT MSR_CM + #define MSR_ MSR_ME | MSR_CE -#define MSR_KERNEL MSR_ | MSR_CM +#define MSR_KERNEL MSR_ | MSR_64BIT #define MSR_USER32 MSR_ | MSR_PR | MSR_EE | MSR_DE -#define MSR_USER64 MSR_USER32 | MSR_CM | MSR_DE +#define MSR_USER64 MSR_USER32 | MSR_64BIT #elif defined (CONFIG_40x) #define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE) #define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE) @@ -81,6 +83,10 @@ #define SPRN_IVOR13 0x19D /* Interrupt Vector Offset Register 13 */ #define SPRN_IVOR14 0x19E /* Interrupt Vector Offset Register 14 */ #define SPRN_IVOR15 0x19F /* Interrupt Vector Offset Register 15 */ +#define SPRN_IVOR38 0x1B0 /* Interrupt Vector Offset Register 38 */ +#define SPRN_IVOR39 0x1B1 /* Interrupt Vector Offset Register 39 */ +#define SPRN_IVOR40 0x1B2 /* Interrupt Vector Offset Register 40 */ +#define SPRN_IVOR41 0x1B3 /* Interrupt Vector Offset Register 41 */ #define SPRN_SPEFSCR 0x200 /* SPE & Embedded FP Status & Control */ #define SPRN_BBEAR 0x201 /* Branch Buffer Entry Address Register */ #define SPRN_BBTAR 0x202 /* Branch Buffer Target Address Register */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 9a1193e30f2..58625d1e780 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -158,7 +158,50 @@ struct rtas_error_log { unsigned long target:4; /* Target of failed operation */ unsigned long type:8; /* General event or error*/ unsigned long extended_log_length:32; /* length in bytes */ - unsigned char buffer[1]; + unsigned char buffer[1]; /* Start of extended log */ + /* Variable length. */ +}; + +#define RTAS_V6EXT_LOG_FORMAT_EVENT_LOG 14 + +#define RTAS_V6EXT_COMPANY_ID_IBM (('I' << 24) | ('B' << 16) | ('M' << 8)) + +/* RTAS general extended event log, Version 6. The extended log starts + * from "buffer" field of struct rtas_error_log defined above. + */ +struct rtas_ext_event_log_v6 { + /* Byte 0 */ + uint32_t log_valid:1; /* 1:Log valid */ + uint32_t unrecoverable_error:1; /* 1:Unrecoverable error */ + uint32_t recoverable_error:1; /* 1:recoverable (correctable */ + /* or successfully retried) */ + uint32_t degraded_operation:1; /* 1:Unrecoverable err, bypassed*/ + /* - degraded operation (e.g. */ + /* CPU or mem taken off-line) */ + uint32_t predictive_error:1; + uint32_t new_log:1; /* 1:"New" log (Always 1 for */ + /* data returned from RTAS */ + uint32_t big_endian:1; /* 1: Big endian */ + uint32_t :1; /* reserved */ + /* Byte 1 */ + uint32_t :8; /* reserved */ + /* Byte 2 */ + uint32_t powerpc_format:1; /* Set to 1 (indicating log is */ + /* in PowerPC format */ + uint32_t :3; /* reserved */ + uint32_t log_format:4; /* Log format indicator. Define */ + /* format used for byte 12-2047 */ + /* Byte 3 */ + uint32_t :8; /* reserved */ + /* Byte 4-11 */ + uint8_t reserved[8]; /* reserved */ + /* Byte 12-15 */ + uint32_t company_id; /* Company ID of the company */ + /* that defines the format for */ + /* the vendor specific log type */ + /* Byte 16-end of log */ + uint8_t vendor_log[1]; /* Start of vendor specific log */ + /* Variable length. */ }; /* diff --git a/arch/powerpc/include/asm/scom.h b/arch/powerpc/include/asm/scom.h new file mode 100644 index 00000000000..0cabfd7bc2d --- /dev/null +++ b/arch/powerpc/include/asm/scom.h @@ -0,0 +1,156 @@ +/* + * Copyright 2010 Benjamin Herrenschmidt, IBM Corp + * <benh@kernel.crashing.org> + * and David Gibson, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_POWERPC_SCOM_H +#define _ASM_POWERPC_SCOM_H + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +#ifdef CONFIG_PPC_SCOM + +/* + * The SCOM bus is a sideband bus used for accessing various internal + * registers of the processor or the chipset. The implementation details + * differ between processors and platforms, and the access method as + * well. + * + * This API allows to "map" ranges of SCOM register numbers associated + * with a given SCOM controller. The later must be represented by a + * device node, though some implementations might support NULL if there + * is no possible ambiguity + * + * Then, scom_read/scom_write can be used to accesses registers inside + * that range. The argument passed is a register number relative to + * the beginning of the range mapped. + */ + +typedef void *scom_map_t; + +/* Value for an invalid SCOM map */ +#define SCOM_MAP_INVALID (NULL) + +/* The scom_controller data structure is what the platform passes + * to the core code in scom_init, it provides the actual implementation + * of all the SCOM functions + */ +struct scom_controller { + scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count); + void (*unmap)(scom_map_t map); + + u64 (*read)(scom_map_t map, u32 reg); + void (*write)(scom_map_t map, u32 reg, u64 value); +}; + +extern const struct scom_controller *scom_controller; + +/** + * scom_init - Initialize the SCOM backend, called by the platform + * @controller: The platform SCOM controller + */ +static inline void scom_init(const struct scom_controller *controller) +{ + scom_controller = controller; +} + +/** + * scom_map_ok - Test is a SCOM mapping is successful + * @map: The result of scom_map to test + */ +static inline int scom_map_ok(scom_map_t map) +{ + return map != SCOM_MAP_INVALID; +} + +/** + * scom_map - Map a block of SCOM registers + * @ctrl_dev: Device node of the SCOM controller + * some implementations allow NULL here + * @reg: first SCOM register to map + * @count: Number of SCOM registers to map + */ + +static inline scom_map_t scom_map(struct device_node *ctrl_dev, + u64 reg, u64 count) +{ + return scom_controller->map(ctrl_dev, reg, count); +} + +/** + * scom_find_parent - Find the SCOM controller for a device + * @dev: OF node of the device + * + * This is not meant for general usage, but in combination with + * scom_map() allows to map registers not represented by the + * device own scom-reg property. Useful for applying HW workarounds + * on things not properly represented in the device-tree for example. + */ +struct device_node *scom_find_parent(struct device_node *dev); + + +/** + * scom_map_device - Map a device's block of SCOM registers + * @dev: OF node of the device + * @index: Register bank index (index in "scom-reg" property) + * + * This function will use the device-tree binding for SCOM which + * is to follow "scom-parent" properties until it finds a node with + * a "scom-controller" property to find the controller. It will then + * use the "scom-reg" property which is made of reg/count pairs, + * each of them having a size defined by the controller's #scom-cells + * property + */ +extern scom_map_t scom_map_device(struct device_node *dev, int index); + + +/** + * scom_unmap - Unmap a block of SCOM registers + * @map: Result of scom_map is to be unmapped + */ +static inline void scom_unmap(scom_map_t map) +{ + if (scom_map_ok(map)) + scom_controller->unmap(map); +} + +/** + * scom_read - Read a SCOM register + * @map: Result of scom_map + * @reg: Register index within that map + */ +static inline u64 scom_read(scom_map_t map, u32 reg) +{ + return scom_controller->read(map, reg); +} + +/** + * scom_write - Write to a SCOM register + * @map: Result of scom_map + * @reg: Register index within that map + * @value: Value to write + */ +static inline void scom_write(scom_map_t map, u32 reg, u64 value) +{ + scom_controller->write(map, reg, value); +} + +#endif /* CONFIG_PPC_SCOM */ +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif /* _ASM_POWERPC_SCOM_H */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index a902a0d3ae0..880b8c1e6e5 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -20,6 +20,7 @@ #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/kernel.h> +#include <linux/irqreturn.h> #ifndef __ASSEMBLY__ @@ -29,14 +30,32 @@ #include <asm/percpu.h> extern int boot_cpuid; +extern int boot_cpu_count; extern void cpu_die(void); #ifdef CONFIG_SMP -extern void smp_send_debugger_break(int cpu); -extern void smp_message_recv(int); +struct smp_ops_t { + void (*message_pass)(int cpu, int msg); +#ifdef CONFIG_PPC_SMP_MUXED_IPI + void (*cause_ipi)(int cpu, unsigned long data); +#endif + int (*probe)(void); + int (*kick_cpu)(int nr); + void (*setup_cpu)(int nr); + void (*bringup_done)(void); + void (*take_timebase)(void); + void (*give_timebase)(void); + int (*cpu_disable)(void); + void (*cpu_die)(unsigned int nr); + int (*cpu_bootable)(unsigned int nr); +}; + +extern void smp_send_debugger_break(void); extern void start_secondary_resume(void); +extern void __devinit smp_generic_give_timebase(void); +extern void __devinit smp_generic_take_timebase(void); DECLARE_PER_CPU(unsigned int, cpu_pvr); @@ -93,13 +112,16 @@ extern int cpu_to_core_id(int cpu); #define PPC_MSG_CALL_FUNC_SINGLE 2 #define PPC_MSG_DEBUGGER_BREAK 3 -/* - * irq controllers that have dedicated ipis per message and don't - * need additional code in the action handler may use this - */ +/* for irq controllers that have dedicated ipis per message (4) */ extern int smp_request_message_ipi(int virq, int message); extern const char *smp_ipi_name[]; +/* for irq controllers with only a single ipi */ +extern void smp_muxed_ipi_set_data(int cpu, unsigned long data); +extern void smp_muxed_ipi_message_pass(int cpu, int msg); +extern void smp_muxed_ipi_resend(void); +extern irqreturn_t smp_ipi_demux(void); + void smp_init_iSeries(void); void smp_init_pSeries(void); void smp_init_cell(void); @@ -149,7 +171,7 @@ extern int smt_enabled_at_boot; extern int smp_mpic_probe(void); extern void smp_mpic_setup_cpu(int cpu); -extern void smp_generic_kick_cpu(int nr); +extern int smp_generic_kick_cpu(int nr); extern void smp_generic_give_timebase(void); extern void smp_generic_take_timebase(void); @@ -169,6 +191,8 @@ extern unsigned long __secondary_hold_spinloop; extern unsigned long __secondary_hold_acknowledge; extern char __secondary_hold; +extern irqreturn_t debug_ipi_action(int irq, void *data); + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 60f64b132bd..8489d372077 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at) COMPAT_SYS_SPU(open_by_handle_at) COMPAT_SYS_SPU(clock_adjtime) SYSCALL_SPU(syncfs) +COMPAT_SYS_SPU(sendmmsg) diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index 5e474ddd227..2dc595dda03 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -219,8 +219,6 @@ extern int mem_init_done; /* set on boot once kmalloc can be called */ extern int init_bootmem_done; /* set once bootmem is available */ extern phys_addr_t memory_limit; extern unsigned long klimit; - -extern void *alloc_maybe_bootmem(size_t size, gfp_t mask); extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); extern int powersave_nap; /* set if nap mode can be used in idle loop */ diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index d50a380b2b6..81143fcbd11 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -79,6 +79,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) #elif defined(CONFIG_PPC_STD_MMU_64) +#define MMU_NO_CONTEXT 0 + /* * TLB flushing for 64-bit hash-MMU CPUs */ diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index 11ae699135b..58580e94a2b 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -52,6 +52,7 @@ extern void __init udbg_init_44x_as1(void); extern void __init udbg_init_40x_realmode(void); extern void __init udbg_init_cpm(void); extern void __init udbg_init_usbgecko(void); +extern void __init udbg_init_wsp(void); #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_UDBG_H */ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 3c215648ce6..6d23c8193ca 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -371,10 +371,11 @@ #define __NR_open_by_handle_at 346 #define __NR_clock_adjtime 347 #define __NR_syncfs 348 +#define __NR_sendmmsg 349 #ifdef __KERNEL__ -#define __NR_syscalls 349 +#define __NR_syscalls 350 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/asm/wsp.h b/arch/powerpc/include/asm/wsp.h new file mode 100644 index 00000000000..c7dc83088a3 --- /dev/null +++ b/arch/powerpc/include/asm/wsp.h @@ -0,0 +1,14 @@ +/* + * Copyright 2011 Michael Ellerman, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef __ASM_POWERPC_WSP_H +#define __ASM_POWERPC_WSP_H + +extern int wsp_get_chip_id(struct device_node *dn); + +#endif /* __ASM_POWERPC_WSP_H */ diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h new file mode 100644 index 00000000000..b183a406201 --- /dev/null +++ b/arch/powerpc/include/asm/xics.h @@ -0,0 +1,142 @@ +/* + * Common definitions accross all variants of ICP and ICS interrupt + * controllers. + */ + +#ifndef _XICS_H +#define _XICS_H + +#include <linux/interrupt.h> + +#define XICS_IPI 2 +#define XICS_IRQ_SPURIOUS 0 + +/* Want a priority other than 0. Various HW issues require this. */ +#define DEFAULT_PRIORITY 5 + +/* + * Mark IPIs as higher priority so we can take them inside interrupts that + * arent marked IRQF_DISABLED + */ +#define IPI_PRIORITY 4 + +/* The least favored priority */ +#define LOWEST_PRIORITY 0xFF + +/* The number of priorities defined above */ +#define MAX_NUM_PRIORITIES 3 + +/* Native ICP */ +extern int icp_native_init(void); + +/* PAPR ICP */ +extern int icp_hv_init(void); + +/* ICP ops */ +struct icp_ops { + unsigned int (*get_irq)(void); + void (*eoi)(struct irq_data *d); + void (*set_priority)(unsigned char prio); + void (*teardown_cpu)(void); + void (*flush_ipi)(void); +#ifdef CONFIG_SMP + void (*cause_ipi)(int cpu, unsigned long data); + irq_handler_t ipi_action; +#endif +}; + +extern const struct icp_ops *icp_ops; + +/* Native ICS */ +extern int ics_native_init(void); + +/* RTAS ICS */ +extern int ics_rtas_init(void); + +/* ICS instance, hooked up to chip_data of an irq */ +struct ics { + struct list_head link; + int (*map)(struct ics *ics, unsigned int virq); + void (*mask_unknown)(struct ics *ics, unsigned long vec); + long (*get_server)(struct ics *ics, unsigned long vec); + int (*host_match)(struct ics *ics, struct device_node *node); + char data[]; +}; + +/* Commons */ +extern unsigned int xics_default_server; +extern unsigned int xics_default_distrib_server; +extern unsigned int xics_interrupt_server_size; +extern struct irq_host *xics_host; + +struct xics_cppr { + unsigned char stack[MAX_NUM_PRIORITIES]; + int index; +}; + +DECLARE_PER_CPU(struct xics_cppr, xics_cppr); + +static inline void xics_push_cppr(unsigned int vec) +{ + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + + if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1)) + return; + + if (vec == XICS_IPI) + os_cppr->stack[++os_cppr->index] = IPI_PRIORITY; + else + os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY; +} + +static inline unsigned char xics_pop_cppr(void) +{ + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + + if (WARN_ON(os_cppr->index < 1)) + return LOWEST_PRIORITY; + + return os_cppr->stack[--os_cppr->index]; +} + +static inline void xics_set_base_cppr(unsigned char cppr) +{ + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + + /* we only really want to set the priority when there's + * just one cppr value on the stack + */ + WARN_ON(os_cppr->index != 0); + + os_cppr->stack[0] = cppr; +} + +static inline unsigned char xics_cppr_top(void) +{ + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + + return os_cppr->stack[os_cppr->index]; +} + +DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message); + +extern void xics_init(void); +extern void xics_setup_cpu(void); +extern void xics_update_irq_servers(void); +extern void xics_set_cpu_giq(unsigned int gserver, unsigned int join); +extern void xics_mask_unknown_vec(unsigned int vec); +extern irqreturn_t xics_ipi_dispatch(int cpu); +extern int xics_smp_probe(void); +extern void xics_register_ics(struct ics *ics); +extern void xics_teardown_cpu(void); +extern void xics_kexec_teardown_cpu(int secondary); +extern void xics_migrate_irqs_away(void); +#ifdef CONFIG_SMP +extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask, + unsigned int strict_check); +#else +#define xics_get_irq_server(virq, cpumask, strict_check) (xics_default_server) +#endif + + +#endif /* _XICS_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 3bb2a3e6a33..9aab3631257 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -38,11 +38,14 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ paca.o nvram_64.o firmware.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o +obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power7.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o +obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o +obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o obj-$(CONFIG_PPC_CLOCK) += clock.o procfs-y := proc_powerpc.o @@ -75,7 +78,6 @@ obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o dbell.o obj-$(CONFIG_PPC_BOOK3E_64) += dbell.o extra-y := head_$(CONFIG_WORD_SIZE).o -extra-$(CONFIG_PPC_BOOK3E_32) := head_new_booke.o extra-$(CONFIG_40x) := head_40x.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o @@ -103,6 +105,8 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o +obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o + obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 23e6a93145a..36e1c8a29be 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -74,6 +74,7 @@ int main(void) DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); + DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); #endif /* CONFIG_PPC64 */ @@ -395,6 +396,7 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); + DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S new file mode 100644 index 00000000000..7f818feaa7a --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_a2.S @@ -0,0 +1,114 @@ +/* + * A2 specific assembly support code + * + * Copyright 2009 Ben Herrenschmidt, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/asm-offsets.h> +#include <asm/ppc_asm.h> +#include <asm/ppc-opcode.h> +#include <asm/processor.h> +#include <asm/reg_a2.h> +#include <asm/reg.h> +#include <asm/thread_info.h> + +/* + * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity. + * This also prevents external LPID accesses but that isn't a problem when not a + * guest. Under PV, this setting will be ignored and MMUCR will return the right + * number of PID bits we can use. + */ +#define MMUCR1_EXTEND_PID \ + (MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \ + MMUCR1_DTTID | MMUCR1_DCCD) + +/* + * Use extended PIDs if enabled. + * Don't clear the ERATs on context sync events and enable I & D LRU. + * Enable ERAT back invalidate when tlbwe overwrites an entry. + */ +#define INITIAL_MMUCR1 \ + (MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \ + MMUCR1_DRRE | MMUCR1_TLBWE_BINV) + +_GLOBAL(__setup_cpu_a2) + /* Some of these are actually thread local and some are + * core local but doing it always won't hurt + */ + +#ifdef CONFIG_PPC_WSP_COPRO + /* Make sure ACOP starts out as zero */ + li r3,0 + mtspr SPRN_ACOP,r3 + + /* Enable icswx instruction */ + mfspr r3,SPRN_A2_CCR2 + ori r3,r3,A2_CCR2_ENABLE_ICSWX + mtspr SPRN_A2_CCR2,r3 + + /* Unmask all CTs in HACOP */ + li r3,-1 + mtspr SPRN_HACOP,r3 +#endif /* CONFIG_PPC_WSP_COPRO */ + + /* Enable doorbell */ + mfspr r3,SPRN_A2_CCR2 + oris r3,r3,A2_CCR2_ENABLE_PC@h + mtspr SPRN_A2_CCR2,r3 + isync + + /* Setup CCR0 to disable power saving for now as it's busted + * in the current implementations. Setup CCR1 to wake on + * interrupts normally (we write the default value but who + * knows what FW may have clobbered...) + */ + li r3,0 + mtspr SPRN_A2_CCR0, r3 + LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f) + mtspr SPRN_A2_CCR1, r3 + + /* Initialise MMUCR1 */ + lis r3,INITIAL_MMUCR1@h + ori r3,r3,INITIAL_MMUCR1@l + mtspr SPRN_MMUCR1,r3 + + /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ + LOAD_REG_IMMEDIATE(r3, 0x000a7531) + mtspr SPRN_MMUCR2,r3 + + /* Set MMUCR3 to write all thids bit to the TLB */ + LOAD_REG_IMMEDIATE(r3, 0x0000000f) + mtspr SPRN_MMUCR3,r3 + + /* Don't do ERAT stuff if running guest mode */ + mfmsr r3 + andis. r0,r3,MSR_GS@h + bne 1f + + /* Now set the I-ERAT watermark to 15 */ + lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h + mtspr SPRN_MMUCR0, r4 + li r4,A2_IERAT_SIZE-1 + PPC_ERATWE(r4,r4,3) + + /* Now set the D-ERAT watermark to 31 */ + lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h + mtspr SPRN_MMUCR0, r4 + li r4,A2_DERAT_SIZE-1 + PPC_ERATWE(r4,r4,3) + + /* And invalidate the beast just in case. That won't get rid of + * a bolted entry though it will be in LRU and so will go away eventually + * but let's not bother for now + */ + PPC_ERATILX(0,0,0) +1: + blr + +_GLOBAL(__restore_cpu_a2) + b __setup_cpu_a2 diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 913611105c1..8053db02b85 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -88,6 +88,9 @@ _GLOBAL(__setup_cpu_e5500) bl __e500_dcache_setup #ifdef CONFIG_PPC_BOOK3E_64 bl .__setup_base_ivors + bl .setup_perfmon_ivor + bl .setup_doorbell_ivors + bl .setup_ehv_ivors #else bl __setup_e500mc_ivors #endif diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S new file mode 100644 index 00000000000..4f9a93fcfe0 --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_power7.S @@ -0,0 +1,91 @@ +/* + * This file contains low level CPU setup functions. + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> + +/* Entry: r3 = crap, r4 = ptr to cputable entry + * + * Note that we can be called twice for pseudo-PVRs + */ +_GLOBAL(__setup_cpu_power7) + mflr r11 + bl __init_hvmode_206 + mtlr r11 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + bl __init_LPCR + bl __init_TLB + mtlr r11 + blr + +_GLOBAL(__restore_cpu_power7) + mflr r11 + mfmsr r3 + rldicl. r0,r3,4,63 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + bl __init_LPCR + bl __init_TLB + mtlr r11 + blr + +__init_hvmode_206: + /* Disable CPU_FTR_HVMODE_206 and exit if MSR:HV is not set */ + mfmsr r3 + rldicl. r0,r3,4,63 + bnelr + ld r5,CPU_SPEC_FEATURES(r4) + LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE_206) + xor r5,r5,r6 + std r5,CPU_SPEC_FEATURES(r4) + blr + +__init_LPCR: + /* Setup a sane LPCR: + * + * LPES = 0b01 (HSRR0/1 used for 0x500) + * PECE = 0b111 + * DPFD = 4 + * + * Other bits untouched for now + */ + mfspr r3,SPRN_LPCR + ori r3,r3,(LPCR_LPES0|LPCR_LPES1) + xori r3,r3, LPCR_LPES0 + ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) + li r5,7 + sldi r5,r5,LPCR_DPFD_SH + andc r3,r3,r5 + li r5,4 + sldi r5,r5,LPCR_DPFD_SH + or r3,r3,r5 + mtspr SPRN_LPCR,r3 + isync + blr + +__init_TLB: + /* Clear the TLB */ + li r6,128 + mtctr r6 + li r7,0xc00 /* IS field = 0b11 */ + ptesync +2: tlbiel r7 + addi r7,r7,0x1000 + bdnz 2b + ptesync +1: blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b9602ee06de..34d2722b945 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -62,10 +62,12 @@ extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_a2(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_pa6t(void); extern void __restore_cpu_ppc970(void); extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power7(void); +extern void __restore_cpu_a2(void); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -199,7 +201,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4 (gp)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER4, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -214,7 +216,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4+ (gq)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER4, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -230,7 +232,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -248,7 +250,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -284,7 +286,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -302,7 +304,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -318,7 +320,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5 (gr)", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -338,7 +340,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5+ (gs)", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -354,7 +356,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5+ (gs)", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -371,7 +373,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5+", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .oprofile_cpu_type = "ppc64/ibm-compat-v1", @@ -385,7 +387,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_POWER6, .cpu_user_features = COMMON_USER_POWER6 | PPC_FEATURE_POWER6_EXT, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER6, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -404,7 +406,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER6 (architected)", .cpu_features = CPU_FTRS_POWER6, .cpu_user_features = COMMON_USER_POWER6, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER6, .icache_bsize = 128, .dcache_bsize = 128, .oprofile_cpu_type = "ppc64/ibm-compat-v1", @@ -417,12 +419,13 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7 (architected)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .mmu_features = MMU_FTR_HPTE_TABLE | - MMU_FTR_TLBIE_206, + .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, .oprofile_type = PPC_OPROFILE_POWER4, .oprofile_cpu_type = "ppc64/ibm-compat-v1", + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, .platform = "power7", }, { /* Power7 */ @@ -431,14 +434,15 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7 (raw)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .mmu_features = MMU_FTR_HPTE_TABLE | - MMU_FTR_TLBIE_206, + .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power7", .oprofile_type = PPC_OPROFILE_POWER4, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, .platform = "power7", }, { /* Power7+ */ @@ -447,14 +451,15 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7+ (raw)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .mmu_features = MMU_FTR_HPTE_TABLE | - MMU_FTR_TLBIE_206, + .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power7", .oprofile_type = PPC_OPROFILE_POWER4, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, .platform = "power7+", }, { /* Cell Broadband Engine */ @@ -465,7 +470,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_user_features = COMMON_USER_PPC64 | PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_SMT, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_CELL, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 4, @@ -480,7 +485,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "PA6T", .cpu_features = CPU_FTRS_PA6T, .cpu_user_features = COMMON_USER_PA6T, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PA6T, .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 6, @@ -497,7 +502,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4 (compatible)", .cpu_features = CPU_FTRS_COMPATIBLE, .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -2005,7 +2010,22 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC32 */ #endif /* CONFIG_E500 */ -#ifdef CONFIG_PPC_BOOK3E_64 +#ifdef CONFIG_PPC_A2 + { /* Standard A2 (>= DD2) + FPU core */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00480000, + .cpu_name = "A2 (>= DD2)", + .cpu_features = CPU_FTRS_A2, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTRS_A2, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 0, + .cpu_setup = __setup_cpu_a2, + .cpu_restore = __restore_cpu_a2, + .machine_check = machine_check_generic, + .platform = "ppca2", + }, { /* This is a default entry to get going, to be replaced by * a real one at some stage */ @@ -2026,7 +2046,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "power6", }, -#endif +#endif /* CONFIG_PPC_A2 */ }; static struct cpu_spec the_cpu_spec; diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 5b5e1f002a8..4e6ee944495 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -64,9 +64,9 @@ void crash_ipi_callback(struct pt_regs *regs) return; hard_irq_disable(); - if (!cpu_isset(cpu, cpus_in_crash)) + if (!cpumask_test_cpu(cpu, &cpus_in_crash)) crash_save_cpu(regs, cpu); - cpu_set(cpu, cpus_in_crash); + cpumask_set_cpu(cpu, &cpus_in_crash); /* * Entered via soft-reset - could be the kdump @@ -77,8 +77,8 @@ void crash_ipi_callback(struct pt_regs *regs) * Tell the kexec CPU that entered via soft-reset and ready * to go down. */ - if (cpu_isset(cpu, cpus_in_sr)) { - cpu_clear(cpu, cpus_in_sr); + if (cpumask_test_cpu(cpu, &cpus_in_sr)) { + cpumask_clear_cpu(cpu, &cpus_in_sr); atomic_inc(&enter_on_soft_reset); } @@ -87,7 +87,7 @@ void crash_ipi_callback(struct pt_regs *regs) * This barrier is needed to make sure that all CPUs are stopped. * If not, soft-reset will be invoked to bring other CPUs. */ - while (!cpu_isset(crashing_cpu, cpus_in_crash)) + while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) cpu_relax(); if (ppc_md.kexec_cpu_down) @@ -109,7 +109,7 @@ static void crash_soft_reset_check(int cpu) { unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - cpu_clear(cpu, cpus_in_sr); + cpumask_clear_cpu(cpu, &cpus_in_sr); while (atomic_read(&enter_on_soft_reset) != ncpus) cpu_relax(); } @@ -132,7 +132,7 @@ static void crash_kexec_prepare_cpus(int cpu) */ printk(KERN_EMERG "Sending IPI to other cpus...\n"); msecs = 10000; - while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) { + while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { cpu_relax(); mdelay(1); } @@ -144,52 +144,24 @@ static void crash_kexec_prepare_cpus(int cpu) * user to do soft reset such that we get all. * Soft-reset will be used until better mechanism is implemented. */ - if (cpus_weight(cpus_in_crash) < ncpus) { + if (cpumask_weight(&cpus_in_crash) < ncpus) { printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", - ncpus - cpus_weight(cpus_in_crash)); + ncpus - cpumask_weight(&cpus_in_crash)); printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); - cpus_in_sr = CPU_MASK_NONE; + cpumask_clear(&cpus_in_sr); atomic_set(&enter_on_soft_reset, 0); - while (cpus_weight(cpus_in_crash) < ncpus) + while (cpumask_weight(&cpus_in_crash) < ncpus) cpu_relax(); } /* * Make sure all CPUs are entered via soft-reset if the kdump is * invoked using soft-reset. */ - if (cpu_isset(cpu, cpus_in_sr)) + if (cpumask_test_cpu(cpu, &cpus_in_sr)) crash_soft_reset_check(cpu); /* Leave the IPI callback set */ } -/* wait for all the CPUs to hit real mode but timeout if they don't come in */ -#ifdef CONFIG_PPC_STD_MMU_64 -static void crash_kexec_wait_realmode(int cpu) -{ - unsigned int msecs; - int i; - - msecs = 10000; - for (i=0; i < NR_CPUS && msecs > 0; i++) { - if (i == cpu) - continue; - - while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { - barrier(); - if (!cpu_possible(i)) { - break; - } - if (!cpu_online(i)) { - break; - } - msecs--; - mdelay(1); - } - } - mb(); -} -#endif /* CONFIG_PPC_STD_MMU_64 */ - /* * This function will be called by secondary cpus or by kexec cpu * if soft-reset is activated to stop some CPUs. @@ -210,7 +182,7 @@ void crash_kexec_secondary(struct pt_regs *regs) * exited using 'x'(exit and recover) or * kexec_should_crash() failed for all running tasks. */ - cpu_clear(cpu, cpus_in_sr); + cpumask_clear_cpu(cpu, &cpus_in_sr); local_irq_restore(flags); return; } @@ -224,7 +196,7 @@ void crash_kexec_secondary(struct pt_regs *regs) * then start kexec boot. */ crash_soft_reset_check(cpu); - cpu_set(crashing_cpu, cpus_in_crash); + cpumask_set_cpu(crashing_cpu, &cpus_in_crash); if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); machine_kexec(kexec_crash_image); @@ -234,7 +206,6 @@ void crash_kexec_secondary(struct pt_regs *regs) } #else /* ! CONFIG_SMP */ -static inline void crash_kexec_wait_realmode(int cpu) {} static void crash_kexec_prepare_cpus(int cpu) { @@ -253,10 +224,40 @@ static void crash_kexec_prepare_cpus(int cpu) void crash_kexec_secondary(struct pt_regs *regs) { - cpus_in_sr = CPU_MASK_NONE; + cpumask_clear(&cpus_in_sr); } #endif /* CONFIG_SMP */ +/* wait for all the CPUs to hit real mode but timeout if they don't come in */ +#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64) +static void crash_kexec_wait_realmode(int cpu) +{ + unsigned int msecs; + int i; + + msecs = 10000; + for (i=0; i < nr_cpu_ids && msecs > 0; i++) { + if (i == cpu) + continue; + + while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + if (!cpu_possible(i)) { + break; + } + if (!cpu_online(i)) { + break; + } + msecs--; + mdelay(1); + } + } + mb(); +} +#else +static inline void crash_kexec_wait_realmode(int cpu) {} +#endif /* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */ + /* * Register a function to be called on shutdown. Only use this if you * can't reset your device in the second kernel. @@ -345,7 +346,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crashing_cpu = smp_processor_id(); crash_save_cpu(regs, crashing_cpu); crash_kexec_prepare_cpus(crashing_cpu); - cpu_set(crashing_cpu, cpus_in_crash); + cpumask_set_cpu(crashing_cpu, &cpus_in_crash); crash_kexec_wait_realmode(crashing_cpu); machine_kexec_mask_interrupts(); diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 3307a52d797..2cc451aaaca 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -13,84 +13,35 @@ #include <linux/kernel.h> #include <linux/smp.h> #include <linux/threads.h> -#include <linux/percpu.h> +#include <linux/hardirq.h> #include <asm/dbell.h> #include <asm/irq_regs.h> #ifdef CONFIG_SMP -struct doorbell_cpu_info { - unsigned long messages; /* current messages bits */ - unsigned int tag; /* tag value */ -}; - -static DEFINE_PER_CPU(struct doorbell_cpu_info, doorbell_cpu_info); - void doorbell_setup_this_cpu(void) { - struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); + unsigned long tag = mfspr(SPRN_PIR) & 0x3fff; - info->messages = 0; - info->tag = mfspr(SPRN_PIR) & 0x3fff; + smp_muxed_ipi_set_data(smp_processor_id(), tag); } -void doorbell_message_pass(int target, int msg) +void doorbell_cause_ipi(int cpu, unsigned long data) { - struct doorbell_cpu_info *info; - int i; - - if (target < NR_CPUS) { - info = &per_cpu(doorbell_cpu_info, target); - set_bit(msg, &info->messages); - ppc_msgsnd(PPC_DBELL, 0, info->tag); - } - else if (target == MSG_ALL_BUT_SELF) { - for_each_online_cpu(i) { - if (i == smp_processor_id()) - continue; - info = &per_cpu(doorbell_cpu_info, i); - set_bit(msg, &info->messages); - ppc_msgsnd(PPC_DBELL, 0, info->tag); - } - } - else { /* target == MSG_ALL */ - for_each_online_cpu(i) { - info = &per_cpu(doorbell_cpu_info, i); - set_bit(msg, &info->messages); - } - ppc_msgsnd(PPC_DBELL, PPC_DBELL_MSG_BRDCAST, 0); - } + ppc_msgsnd(PPC_DBELL, 0, data); } void doorbell_exception(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); - int msg; - /* Warning: regs can be NULL when called from irq enable */ + irq_enter(); - if (!info->messages || (num_online_cpus() < 2)) - goto out; + smp_ipi_demux(); - for (msg = 0; msg < 4; msg++) - if (test_and_clear_bit(msg, &info->messages)) - smp_message_recv(msg); - -out: + irq_exit(); set_irq_regs(old_regs); } - -void doorbell_check_self(void) -{ - struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); - - if (!info->messages) - return; - - ppc_msgsnd(PPC_DBELL, 0, info->tag); -} - #else /* CONFIG_SMP */ void doorbell_exception(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d82878c4daa..d834425186a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -421,6 +421,12 @@ BEGIN_FTR_SECTION std r24,THREAD_VRSAVE(r3) END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + mfspr r25,SPRN_DSCR + std r25,THREAD_DSCR(r3) +END_FTR_SECTION_IFSET(CPU_FTR_DSCR) +#endif and. r0,r0,r22 beq+ 1f andc r22,r22,r0 @@ -462,10 +468,10 @@ BEGIN_FTR_SECTION FTR_SECTION_ELSE_NESTED(95) clrrdi r6,r8,40 /* get its 1T ESID */ clrrdi r9,r1,40 /* get current sp 1T ESID */ - ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_1T_SEGMENT, 95) + ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95) FTR_SECTION_ELSE b 2f -ALT_FTR_SECTION_END_IFSET(CPU_FTR_SLB) +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB) clrldi. r0,r6,2 /* is new ESID c00000000? */ cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */ cror eq,4*cr1+eq,eq @@ -479,7 +485,7 @@ BEGIN_FTR_SECTION li r9,MMU_SEGSIZE_1T /* insert B field */ oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0 -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) /* Update the last bolted SLB. No write barriers are needed * here, provided we only update the current CPU's SLB shadow @@ -491,7 +497,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) std r7,SLBSHADOW_STACKVSID(r9) /* Save VSID */ std r0,SLBSHADOW_STACKESID(r9) /* Save ESID */ - /* No need to check for CPU_FTR_NO_SLBIE_B here, since when + /* No need to check for MMU_FTR_NO_SLBIE_B here, since when * we have 1TB segments, the only CPUs known to have the errata * only support less than 1TB of system memory and we'll never * actually hit this code path. @@ -522,6 +528,15 @@ BEGIN_FTR_SECTION mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + ld r0,THREAD_DSCR(r4) + cmpd r0,r25 + beq 1f + mtspr SPRN_DSCR,r0 +1: +END_FTR_SECTION_IFSET(CPU_FTR_DSCR) +#endif /* r3-r13 are destroyed -- Cort */ REST_8GPRS(14, r1) @@ -838,7 +853,7 @@ _GLOBAL(enter_rtas) _STATIC(rtas_return_loc) /* relocation is off at this point */ - mfspr r4,SPRN_SPRG_PACA /* Get PACA */ + GET_PACA(r4) clrldi r4,r4,2 /* convert to realmode address */ bcl 20,31,$+4 @@ -869,7 +884,7 @@ _STATIC(rtas_restore_regs) REST_8GPRS(14, r1) /* Restore the non-volatiles */ REST_10GPRS(22, r1) /* ditto */ - mfspr r13,SPRN_SPRG_PACA + GET_PACA(r13) ld r4,_CCR(r1) mtcr r4 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 9651acc3504..d24d4400cc7 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -17,6 +17,7 @@ #include <asm/cputable.h> #include <asm/setup.h> #include <asm/thread_info.h> +#include <asm/reg_a2.h> #include <asm/exception-64e.h> #include <asm/bug.h> #include <asm/irqflags.h> @@ -252,9 +253,6 @@ exception_marker: .balign 0x1000 .globl interrupt_base_book3e interrupt_base_book3e: /* fake trap */ - /* Note: If real debug exceptions are supported by the HW, the vector - * below will have to be patched up to point to an appropriate handler - */ EXCEPTION_STUB(0x000, machine_check) /* 0x0200 */ EXCEPTION_STUB(0x020, critical_input) /* 0x0580 */ EXCEPTION_STUB(0x040, debug_crit) /* 0x0d00 */ @@ -271,8 +269,13 @@ interrupt_base_book3e: /* fake trap */ EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */ EXCEPTION_STUB(0x1c0, data_tlb_miss) EXCEPTION_STUB(0x1e0, instruction_tlb_miss) + EXCEPTION_STUB(0x260, perfmon) EXCEPTION_STUB(0x280, doorbell) EXCEPTION_STUB(0x2a0, doorbell_crit) + EXCEPTION_STUB(0x2c0, guest_doorbell) + EXCEPTION_STUB(0x2e0, guest_doorbell_crit) + EXCEPTION_STUB(0x300, hypercall) + EXCEPTION_STUB(0x320, ehpriv) .globl interrupt_end_book3e interrupt_end_book3e: @@ -454,6 +457,70 @@ interrupt_end_book3e: kernel_dbg_exc: b . /* NYI */ +/* Debug exception as a debug interrupt*/ + START_EXCEPTION(debug_debug); + DBG_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS) + + /* + * If there is a single step or branch-taken exception in an + * exception entry sequence, it was probably meant to apply to + * the code where the exception occurred (since exception entry + * doesn't turn off DE automatically). We simulate the effect + * of turning off DE on entry to an exception handler by turning + * off DE in the DSRR1 value and clearing the debug status. + */ + + mfspr r14,SPRN_DBSR /* check single-step/branch taken */ + andis. r15,r14,DBSR_IC@h + beq+ 1f + + LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) + LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e) + cmpld cr0,r10,r14 + cmpld cr1,r10,r15 + blt+ cr0,1f + bge+ cr1,1f + + /* here it looks like we got an inappropriate debug exception. */ + lis r14,DBSR_IC@h /* clear the IC event */ + rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value */ + mtspr SPRN_DBSR,r14 + mtspr SPRN_DSRR1,r11 + lwz r10,PACA_EXDBG+EX_CR(r13) /* restore registers */ + ld r1,PACA_EXDBG+EX_R1(r13) + ld r14,PACA_EXDBG+EX_R14(r13) + ld r15,PACA_EXDBG+EX_R15(r13) + mtcr r10 + ld r10,PACA_EXDBG+EX_R10(r13) /* restore registers */ + ld r11,PACA_EXDBG+EX_R11(r13) + mfspr r13,SPRN_SPRG_DBG_SCRATCH + rfdi + + /* Normal debug exception */ + /* XXX We only handle coming from userspace for now since we can't + * quite save properly an interrupted kernel state yet + */ +1: andi. r14,r11,MSR_PR; /* check for userspace again */ + beq kernel_dbg_exc; /* if from kernel mode */ + + /* Now we mash up things to make it look like we are coming on a + * normal exception + */ + mfspr r15,SPRN_SPRG_DBG_SCRATCH + mtspr SPRN_SPRG_GEN_SCRATCH,r15 + mfspr r14,SPRN_DBSR + EXCEPTION_COMMON(0xd00, PACA_EXDBG, INTS_DISABLE_ALL) + std r14,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r14 + ld r14,PACA_EXDBG+EX_R14(r13) + ld r15,PACA_EXDBG+EX_R15(r13) + bl .save_nvgprs + bl .DebugException + b .ret_from_except + + MASKABLE_EXCEPTION(0x260, perfmon, .performance_monitor_exception, ACK_NONE) + /* Doorbell interrupt */ MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE) @@ -468,6 +535,11 @@ kernel_dbg_exc: // b ret_from_crit_except b . + MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x2e0, guest_doorbell_crit, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x310, hypercall, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE) + /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -587,7 +659,12 @@ fast_exception_return: BAD_STACK_TRAMPOLINE(0x000) BAD_STACK_TRAMPOLINE(0x100) BAD_STACK_TRAMPOLINE(0x200) +BAD_STACK_TRAMPOLINE(0x260) +BAD_STACK_TRAMPOLINE(0x2c0) +BAD_STACK_TRAMPOLINE(0x2e0) BAD_STACK_TRAMPOLINE(0x300) +BAD_STACK_TRAMPOLINE(0x310) +BAD_STACK_TRAMPOLINE(0x320) BAD_STACK_TRAMPOLINE(0x400) BAD_STACK_TRAMPOLINE(0x500) BAD_STACK_TRAMPOLINE(0x600) @@ -864,8 +941,23 @@ have_hes: * that will have to be made dependent on whether we are running under * a hypervisor I suppose. */ - ori r3,r3,MAS0_HES | MAS0_WQ_ALLWAYS - mtspr SPRN_MAS0,r3 + + /* BEWARE, MAGIC + * This code is called as an ordinary function on the boot CPU. But to + * avoid duplication, this code is also used in SCOM bringup of + * secondary CPUs. We read the code between the initial_tlb_code_start + * and initial_tlb_code_end labels one instruction at a time and RAM it + * into the new core via SCOM. That doesn't process branches, so there + * must be none between those two labels. It also means if this code + * ever takes any parameters, the SCOM code must also be updated to + * provide them. + */ + .globl a2_tlbinit_code_start +a2_tlbinit_code_start: + + ori r11,r3,MAS0_WQ_ALLWAYS + oris r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */ + mtspr SPRN_MAS0,r11 lis r3,(MAS1_VALID | MAS1_IPROT)@h ori r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT mtspr SPRN_MAS1,r3 @@ -879,18 +971,86 @@ have_hes: /* Write the TLB entry */ tlbwe + .globl a2_tlbinit_after_linear_map +a2_tlbinit_after_linear_map: + /* Now we branch the new virtual address mapped by this entry */ LOAD_REG_IMMEDIATE(r3,1f) mtctr r3 bctr 1: /* We are now running at PAGE_OFFSET, clean the TLB of everything - * else (XXX we should scan for bolted crap from the firmware too) + * else (including IPROTed things left by firmware) + * r4 = TLBnCFG + * r3 = current address (more or less) */ + + li r5,0 + mtspr SPRN_MAS6,r5 + tlbsx 0,r3 + + rlwinm r9,r4,0,TLBnCFG_N_ENTRY + rlwinm r10,r4,8,0xff + addi r10,r10,-1 /* Get inner loop mask */ + + li r3,1 + + mfspr r5,SPRN_MAS1 + rlwinm r5,r5,0,(~(MAS1_VALID|MAS1_IPROT)) + + mfspr r6,SPRN_MAS2 + rldicr r6,r6,0,51 /* Extract EPN */ + + mfspr r7,SPRN_MAS0 + rlwinm r7,r7,0,0xffff0fff /* Clear HES and WQ */ + + rlwinm r8,r7,16,0xfff /* Extract ESEL */ + +2: add r4,r3,r8 + and r4,r4,r10 + + rlwimi r7,r4,16,MAS0_ESEL_MASK + + mtspr SPRN_MAS0,r7 + mtspr SPRN_MAS1,r5 + mtspr SPRN_MAS2,r6 + tlbwe + + addi r3,r3,1 + and. r4,r3,r10 + + bne 3f + addis r6,r6,(1<<30)@h +3: + cmpw r3,r9 + blt 2b + + .globl a2_tlbinit_after_iprot_flush +a2_tlbinit_after_iprot_flush: + +#ifdef CONFIG_PPC_EARLY_DEBUG_WSP + /* Now establish early debug mappings if applicable */ + /* Restore the MAS0 we used for linear mapping load */ + mtspr SPRN_MAS0,r11 + + lis r3,(MAS1_VALID | MAS1_IPROT)@h + ori r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT) + mtspr SPRN_MAS1,r3 + LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G) + mtspr SPRN_MAS2,r3 + LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW) + mtspr SPRN_MAS7_MAS3,r3 + /* re-use the MAS8 value from the linear mapping */ + tlbwe +#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ + PPC_TLBILX(0,0,0) sync isync + .globl a2_tlbinit_code_end +a2_tlbinit_code_end: + /* We translate LR and return */ mflr r3 tovirt(r3,r3) @@ -1040,3 +1200,33 @@ _GLOBAL(__setup_base_ivors) sync blr + +_GLOBAL(setup_perfmon_ivor) + SET_IVOR(35, 0x260) /* Performance Monitor */ + blr + +_GLOBAL(setup_doorbell_ivors) + SET_IVOR(36, 0x280) /* Processor Doorbell */ + SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */ + + /* Check MMUCFG[LPIDSIZE] to determine if we have category E.HV */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beqlr + + SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */ + SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */ + blr + +_GLOBAL(setup_ehv_ivors) + /* + * We may be running as a guest and lack E.HV even on a chip + * that normally has it. + */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beqlr + + SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */ + SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */ + blr diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index aeb739e1876..a85f4874cba 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -37,23 +37,51 @@ .globl __start_interrupts __start_interrupts: - STD_EXCEPTION_PSERIES(0x100, system_reset) + .globl system_reset_pSeries; +system_reset_pSeries: + HMT_MEDIUM; + DO_KVM 0x100; + SET_SCRATCH0(r13) +#ifdef CONFIG_PPC_P7_NAP +BEGIN_FTR_SECTION + /* Running native on arch 2.06 or later, check if we are + * waking up from nap. We only handle no state loss and + * supervisor state loss. We do -not- handle hypervisor + * state loss at this time. + */ + mfspr r13,SPRN_SRR1 + rlwinm r13,r13,47-31,30,31 + cmpwi cr0,r13,1 + bne 1f + b .power7_wakeup_noloss +1: cmpwi cr0,r13,2 + bne 1f + b .power7_wakeup_loss + /* Total loss of HV state is fatal, we could try to use the + * PIR to locate a PACA, then use an emergency stack etc... + * but for now, let's just stay stuck here + */ +1: cmpwi cr0,r13,3 + beq . +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) +#endif /* CONFIG_PPC_P7_NAP */ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD) . = 0x200 _machine_check_pSeries: HMT_MEDIUM DO_KVM 0x200 - mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) . = 0x300 .globl data_access_pSeries data_access_pSeries: HMT_MEDIUM DO_KVM 0x300 - mtspr SPRN_SPRG_SCRATCH0,r13 + SET_SCRATCH0(r13) BEGIN_FTR_SECTION - mfspr r13,SPRN_SPRG_PACA + GET_PACA(r13) std r9,PACA_EXSLB+EX_R9(r13) std r10,PACA_EXSLB+EX_R10(r13) mfspr r10,SPRN_DAR @@ -67,22 +95,22 @@ BEGIN_FTR_SECTION std r11,PACA_EXGEN+EX_R11(r13) ld r11,PACA_EXSLB+EX_R9(r13) std r12,PACA_EXGEN+EX_R12(r13) - mfspr r12,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r12) std r10,PACA_EXGEN+EX_R10(r13) std r11,PACA_EXGEN+EX_R9(r13) std r12,PACA_EXGEN+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(data_access_common) + EXCEPTION_PROLOG_PSERIES_1(data_access_common, EXC_STD) FTR_SECTION_ELSE - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common) -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB) . = 0x380 .globl data_access_slb_pSeries data_access_slb_pSeries: HMT_MEDIUM DO_KVM 0x380 - mtspr SPRN_SPRG_SCRATCH0,r13 - mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ + SET_SCRATCH0(r13) + GET_PACA(r13) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ @@ -95,7 +123,7 @@ data_access_slb_pSeries: std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r10) std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ #ifndef CONFIG_RELOCATABLE @@ -113,15 +141,15 @@ data_access_slb_pSeries: bctr #endif - STD_EXCEPTION_PSERIES(0x400, instruction_access) + STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access) . = 0x480 .globl instruction_access_slb_pSeries instruction_access_slb_pSeries: HMT_MEDIUM DO_KVM 0x480 - mtspr SPRN_SPRG_SCRATCH0,r13 - mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ + SET_SCRATCH0(r13) + GET_PACA(r13) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ @@ -134,7 +162,7 @@ instruction_access_slb_pSeries: std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r10) std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ #ifndef CONFIG_RELOCATABLE @@ -147,13 +175,29 @@ instruction_access_slb_pSeries: bctr #endif - MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt) - STD_EXCEPTION_PSERIES(0x600, alignment) - STD_EXCEPTION_PSERIES(0x700, program_check) - STD_EXCEPTION_PSERIES(0x800, fp_unavailable) - MASKABLE_EXCEPTION_PSERIES(0x900, decrementer) - STD_EXCEPTION_PSERIES(0xa00, trap_0a) - STD_EXCEPTION_PSERIES(0xb00, trap_0b) + /* We open code these as we can't have a ". = x" (even with + * x = "." within a feature section + */ + . = 0x500; + .globl hardware_interrupt_pSeries; + .globl hardware_interrupt_hv; +hardware_interrupt_pSeries: +hardware_interrupt_hv: + BEGIN_FTR_SECTION + _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD) + FTR_SECTION_ELSE + _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV) + ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206) + + STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) + STD_EXCEPTION_PSERIES(0x700, 0x700, program_check) + STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable) + + MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer) + MASKABLE_EXCEPTION_HV(0x980, 0x980, decrementer) + + STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a) + STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b) . = 0xc00 .globl system_call_pSeries @@ -165,13 +209,13 @@ BEGIN_FTR_SECTION beq- 1f END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) mr r9,r13 - mfspr r13,SPRN_SPRG_PACA + GET_PACA(r13) mfspr r11,SPRN_SRR0 - ld r12,PACAKBASE(r13) - ld r10,PACAKMSR(r13) - LOAD_HANDLER(r12, system_call_entry) - mtspr SPRN_SRR0,r12 mfspr r12,SPRN_SRR1 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10, system_call_entry) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 rfid b . /* prevent speculative execution */ @@ -183,8 +227,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) rfid /* return to userspace */ b . - STD_EXCEPTION_PSERIES(0xd00, single_step) - STD_EXCEPTION_PSERIES(0xe00, trap_0e) + STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step) + + /* At 0xe??? we have a bunch of hypervisor exceptions, we branch + * out of line to handle them + */ + . = 0xe00 + b h_data_storage_hv + . = 0xe20 + b h_instr_storage_hv + . = 0xe40 + b emulation_assist_hv + . = 0xe50 + b hmi_exception_hv + . = 0xe60 + b hmi_exception_hv /* We need to deal with the Altivec unavailable exception * here which is at 0xf20, thus in the middle of the @@ -193,39 +250,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) */ performance_monitor_pSeries_1: . = 0xf00 - DO_KVM 0xf00 b performance_monitor_pSeries altivec_unavailable_pSeries_1: . = 0xf20 - DO_KVM 0xf20 b altivec_unavailable_pSeries vsx_unavailable_pSeries_1: . = 0xf40 - DO_KVM 0xf40 b vsx_unavailable_pSeries #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error) + STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) #endif /* CONFIG_CBE_RAS */ - STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) + STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint) #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1600, cbe_maintenance) + STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance) #endif /* CONFIG_CBE_RAS */ - STD_EXCEPTION_PSERIES(0x1700, altivec_assist) + STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist) #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1800, cbe_thermal) + STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal) #endif /* CONFIG_CBE_RAS */ . = 0x3000 -/*** pSeries interrupt support ***/ +/*** Out of line interrupts support ***/ + + /* moved from 0xe00 */ + STD_EXCEPTION_HV(., 0xe00, h_data_storage) + STD_EXCEPTION_HV(., 0xe20, h_instr_storage) + STD_EXCEPTION_HV(., 0xe40, emulation_assist) + STD_EXCEPTION_HV(., 0xe60, hmi_exception) /* need to flush cache ? */ /* moved from 0xf00 */ - STD_EXCEPTION_PSERIES(., performance_monitor) - STD_EXCEPTION_PSERIES(., altivec_unavailable) - STD_EXCEPTION_PSERIES(., vsx_unavailable) + STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor) + STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable) + STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable) /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -240,17 +300,30 @@ masked_interrupt: rotldi r10,r10,16 mtspr SPRN_SRR1,r10 ld r10,PACA_EXGEN+EX_R10(r13) - mfspr r13,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r13) rfid b . +masked_Hinterrupt: + stb r10,PACAHARDIRQEN(r13) + mtcrf 0x80,r9 + ld r9,PACA_EXGEN+EX_R9(r13) + mfspr r10,SPRN_HSRR1 + rldicl r10,r10,48,1 /* clear MSR_EE */ + rotldi r10,r10,16 + mtspr SPRN_HSRR1,r10 + ld r10,PACA_EXGEN+EX_R10(r13) + GET_SCRATCH0(r13) + hrfid + b . + .align 7 do_stab_bolted_pSeries: std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r10) std r10,PACA_EXSLB+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted) + EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) #ifdef CONFIG_PPC_PSERIES /* @@ -260,15 +333,15 @@ do_stab_bolted_pSeries: .align 7 system_reset_fwnmi: HMT_MEDIUM - mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD) .globl machine_check_fwnmi .align 7 machine_check_fwnmi: HMT_MEDIUM - mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) #endif /* CONFIG_PPC_PSERIES */ @@ -282,7 +355,7 @@ slb_miss_user_pseries: std r10,PACA_EXGEN+EX_R10(r13) std r11,PACA_EXGEN+EX_R11(r13) std r12,PACA_EXGEN+EX_R12(r13) - mfspr r10,SPRG_SCRATCH0 + GET_SCRATCH0(r10) ld r11,PACA_EXSLB+EX_R9(r13) ld r12,PACA_EXSLB+EX_R3(r13) std r10,PACA_EXGEN+EX_R13(r13) @@ -342,6 +415,8 @@ machine_check_common: STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) + STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) + STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception) STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) #ifdef CONFIG_ALTIVEC @@ -386,9 +461,24 @@ bad_stack: std r12,_XER(r1) SAVE_GPR(0,r1) SAVE_GPR(2,r1) - SAVE_4GPRS(3,r1) - SAVE_2GPRS(7,r1) - SAVE_10GPRS(12,r1) + ld r10,EX_R3(r3) + std r10,GPR3(r1) + SAVE_GPR(4,r1) + SAVE_4GPRS(5,r1) + ld r9,EX_R9(r3) + ld r10,EX_R10(r3) + SAVE_2GPRS(9,r1) + ld r9,EX_R11(r3) + ld r10,EX_R12(r3) + ld r11,EX_R13(r3) + std r9,GPR11(r1) + std r10,GPR12(r1) + std r11,GPR13(r1) +BEGIN_FTR_SECTION + ld r10,EX_CFAR(r3) + std r10,ORIG_GPR3(r1) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + SAVE_8GPRS(14,r1) SAVE_10GPRS(22,r1) lhz r12,PACA_TRAP_SAVE(r13) std r12,_TRAP(r1) @@ -397,6 +487,9 @@ bad_stack: li r12,0 std r12,0(r11) ld r2,PACATOC(r13) + ld r11,exception_marker@toc(r2) + std r12,RESULT(r1) + std r11,STACK_FRAME_OVERHEAD-16(r1) 1: addi r3,r1,STACK_FRAME_OVERHEAD bl .kernel_bad_stack b 1b @@ -419,6 +512,19 @@ data_access_common: li r5,0x300 b .do_hash_page /* Try to handle as hpte fault */ + .align 7 + .globl h_data_storage_common +h_data_storage_common: + mfspr r10,SPRN_HDAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_HDSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .unknown_exception + b .ret_from_except + .align 7 .globl instruction_access_common instruction_access_common: @@ -428,6 +534,8 @@ instruction_access_common: li r5,0x400 b .do_hash_page /* Try to handle as hpte fault */ + STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception) + /* * Here is the common SLB miss user that is used when going to virtual * mode for SLB misses, that is currently not used @@ -750,7 +858,7 @@ _STATIC(do_hash_page) BEGIN_FTR_SECTION andis. r0,r4,0x0020 /* Is it a segment table fault? */ bne- do_ste_alloc /* If so handle it */ -END_FTR_SECTION_IFCLR(CPU_FTR_SLB) +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) clrrdi r11,r1,THREAD_SHIFT lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index c5c24beb838..ba250d505e0 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -805,19 +805,6 @@ _ENTRY(copy_and_flush) blr #ifdef CONFIG_SMP -#ifdef CONFIG_GEMINI - .globl __secondary_start_gemini -__secondary_start_gemini: - mfspr r4,SPRN_HID0 - ori r4,r4,HID0_ICFI - li r3,0 - ori r3,r3,HID0_ICE - andc r4,r4,r3 - mtspr SPRN_HID0,r4 - sync - b __secondary_start -#endif /* CONFIG_GEMINI */ - .globl __secondary_start_mpc86xx __secondary_start_mpc86xx: mfspr r3, SPRN_PIR @@ -890,15 +877,6 @@ __secondary_start: mtspr SPRN_SRR1,r4 SYNC RFI - -_GLOBAL(start_secondary_resume) - /* Reset stack */ - rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD - li r3,0 - std r3,0(r1) /* Zero the stack frame pointer */ - bl start_secondary - b . #endif /* CONFIG_SMP */ #ifdef CONFIG_KVM_BOOK3S_HANDLER diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 3a319f9c9d3..ba504099844 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -147,6 +147,8 @@ __secondary_hold: mtctr r4 mr r3,r24 li r4,0 + /* Make sure that patched code is visible */ + isync bctr #else BUG_OPCODE @@ -216,19 +218,25 @@ generic_secondary_common_init: */ LOAD_REG_ADDR(r13, paca) /* Load paca pointer */ ld r13,0(r13) /* Get base vaddr of paca array */ +#ifndef CONFIG_SMP + addi r13,r13,PACA_SIZE /* know r13 if used accidentally */ + b .kexec_wait /* wait for next kernel if !SMP */ +#else + LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */ + lwz r7,0(r7) /* also the max paca allocated */ li r5,0 /* logical cpu id */ 1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ cmpw r6,r24 /* Compare to our id */ beq 2f addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */ addi r5,r5,1 - cmpwi r5,NR_CPUS + cmpw r5,r7 /* Check if more pacas exist */ blt 1b mr r3,r24 /* not found, copy phys to r3 */ b .kexec_wait /* next kernel might do better */ -2: mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG */ +2: SET_PACA(r13) #ifdef CONFIG_PPC_BOOK3E addi r12,r13,PACA_EXTLB /* and TLB exc frame in another */ mtspr SPRN_SPRG_TLB_EXFRAME,r12 @@ -236,34 +244,39 @@ generic_secondary_common_init: /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 -3: HMT_LOW - lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ - /* start. */ - -#ifndef CONFIG_SMP - b 3b /* Never go on non-SMP */ -#else - cmpwi 0,r23,0 - beq 3b /* Loop until told to go */ - - sync /* order paca.run and cur_cpu_spec */ /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) ld r23,0(r23) ld r23,CPU_SPEC_RESTORE(r23) cmpdi 0,r23,0 - beq 4f + beq 3f ld r23,0(r23) mtctr r23 bctrl -4: /* Create a temp kernel stack for use before relocation is on. */ +3: LOAD_REG_ADDR(r3, boot_cpu_count) /* Decrement boot_cpu_count */ + lwarx r4,0,r3 + subi r4,r4,1 + stwcx. r4,0,r3 + bne 3b + isync + +4: HMT_LOW + lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ + /* start. */ + cmpwi 0,r23,0 + beq 4b /* Loop until told to go */ + + sync /* order paca.run and cur_cpu_spec */ + isync /* In case code patching happened */ + + /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) subi r1,r1,STACK_FRAME_OVERHEAD b __secondary_start -#endif +#endif /* SMP */ /* * Turn the MMU off. @@ -534,7 +547,7 @@ _GLOBAL(pmac_secondary_start) ld r4,0(r4) /* Get base vaddr of paca array */ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r4 /* for this processor. */ - mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG*/ + SET_PACA(r13) /* Save vaddr of paca in an SPRG*/ /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) @@ -645,7 +658,7 @@ _GLOBAL(enable_64b_mode) oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ mtmsr r11 #else /* CONFIG_PPC_BOOK3E */ - li r12,(MSR_SF | MSR_ISF)@highest + li r12,(MSR_64BIT | MSR_ISF)@highest sldi r12,r12,48 or r11,r11,r12 mtmsrd r11 diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S new file mode 100644 index 00000000000..f8f0bc7f1d4 --- /dev/null +++ b/arch/powerpc/kernel/idle_power7.S @@ -0,0 +1,97 @@ +/* + * This file contains the power_save function for 970-family CPUs. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ppc-opcode.h> + +#undef DEBUG + + .text + +_GLOBAL(power7_idle) + /* Now check if user or arch enabled NAP mode */ + LOAD_REG_ADDRBASE(r3,powersave_nap) + lwz r4,ADDROFF(powersave_nap)(r3) + cmpwi 0,r4,0 + beqlr + + /* NAP is a state loss, we create a regs frame on the + * stack, fill it up with the state we care about and + * stick a pointer to it in PACAR1. We really only + * need to save PC, some CR bits and the NV GPRs, + * but for now an interrupt frame will do. + */ + mflr r0 + std r0,16(r1) + stdu r1,-INT_FRAME_SIZE(r1) + std r0,_LINK(r1) + std r0,_NIP(r1) + +#ifndef CONFIG_SMP + /* Make sure FPU, VSX etc... are flushed as we may lose + * state when going to nap mode + */ + bl .discard_lazy_cpu_state +#endif /* CONFIG_SMP */ + + /* Hard disable interrupts */ + mfmsr r9 + rldicl r9,r9,48,1 + rotldi r9,r9,16 + mtmsrd r9,1 /* hard-disable interrupts */ + li r0,0 + stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ + stb r0,PACAHARDIRQEN(r13) + + /* Continue saving state */ + SAVE_GPR(2, r1) + SAVE_NVGPRS(r1) + mfcr r3 + std r3,_CCR(r1) + std r9,_MSR(r1) + std r1,PACAR1(r13) + + /* Magic NAP mode enter sequence */ + std r0,0(r1) + ptesync + ld r0,0(r1) +1: cmp cr0,r0,r0 + bne 1b + PPC_NAP + b . + +_GLOBAL(power7_wakeup_loss) + GET_PACA(r13) + ld r1,PACAR1(r13) + REST_NVGPRS(r1) + REST_GPR(2, r1) + ld r3,_CCR(r1) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtcr r3 + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid + +_GLOBAL(power7_wakeup_noloss) + GET_PACA(r13) + ld r1,PACAR1(r13) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 5c1118e3194..ffafaea3d26 100644 --- a/arch/powerpc/platforms/cell/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -17,8 +17,7 @@ #include <asm/machdep.h> #include <asm/pgtable.h> #include <asm/ppc-pci.h> - -#include "io-workarounds.h" +#include <asm/io-workarounds.h> #define IOWA_MAX_BUS 8 @@ -145,7 +144,19 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, return res; } -/* Regist new bus to support workaround */ +/* Enable IO workaround */ +static void __devinit io_workaround_init(void) +{ + static int io_workaround_inited; + + if (io_workaround_inited) + return; + ppc_pci_io = iowa_pci_io; + ppc_md.ioremap = iowa_ioremap; + io_workaround_inited = 1; +} + +/* Register new bus to support workaround */ void __devinit iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops, int (*initfunc)(struct iowa_bus *, void *), void *data) @@ -153,6 +164,8 @@ void __devinit iowa_register_bus(struct pci_controller *phb, struct iowa_bus *bus; struct device_node *np = phb->dn; + io_workaround_init(); + if (iowa_bus_count >= IOWA_MAX_BUS) { pr_err("IOWA:Too many pci bridges, " "workarounds disabled for %s\n", np->full_name); @@ -162,6 +175,7 @@ void __devinit iowa_register_bus(struct pci_controller *phb, bus = &iowa_busses[iowa_bus_count]; bus->phb = phb; bus->ops = ops; + bus->private = data; if (initfunc) if ((*initfunc)(bus, data)) @@ -172,14 +186,3 @@ void __devinit iowa_register_bus(struct pci_controller *phb, pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name); } -/* enable IO workaround */ -void __devinit io_workaround_init(void) -{ - static int io_workaround_inited; - - if (io_workaround_inited) - return; - ppc_pci_io = iowa_pci_io; - ppc_md.ioremap = iowa_ioremap; - io_workaround_inited = 1; -} diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f621b7d2d86..a24d37d4cf5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -66,7 +66,6 @@ #include <asm/ptrace.h> #include <asm/machdep.h> #include <asm/udbg.h> -#include <asm/dbell.h> #include <asm/smp.h> #ifdef CONFIG_PPC64 @@ -160,7 +159,8 @@ notrace void arch_local_irq_restore(unsigned long en) #if defined(CONFIG_BOOKE) && defined(CONFIG_SMP) /* Check for pending doorbell interrupts and resend to ourself */ - doorbell_check_self(); + if (cpu_has_feature(CPU_FTR_DBELL)) + smp_muxed_ipi_resend(); #endif /* @@ -397,24 +397,28 @@ struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly; void exc_lvl_ctx_init(void) { struct thread_info *tp; - int i, hw_cpu; + int i, cpu_nr; for_each_possible_cpu(i) { - hw_cpu = get_hard_smp_processor_id(i); - memset((void *)critirq_ctx[hw_cpu], 0, THREAD_SIZE); - tp = critirq_ctx[hw_cpu]; - tp->cpu = i; +#ifdef CONFIG_PPC64 + cpu_nr = i; +#else + cpu_nr = get_hard_smp_processor_id(i); +#endif + memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = critirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; tp->preempt_count = 0; #ifdef CONFIG_BOOKE - memset((void *)dbgirq_ctx[hw_cpu], 0, THREAD_SIZE); - tp = dbgirq_ctx[hw_cpu]; - tp->cpu = i; + memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = dbgirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; tp->preempt_count = 0; - memset((void *)mcheckirq_ctx[hw_cpu], 0, THREAD_SIZE); - tp = mcheckirq_ctx[hw_cpu]; - tp->cpu = i; + memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = mcheckirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; tp->preempt_count = HARDIRQ_OFFSET; #endif } @@ -477,20 +481,41 @@ void do_softirq(void) * IRQ controller and virtual interrupts */ +/* The main irq map itself is an array of NR_IRQ entries containing the + * associate host and irq number. An entry with a host of NULL is free. + * An entry can be allocated if it's free, the allocator always then sets + * hwirq first to the host's invalid irq number and then fills ops. + */ +struct irq_map_entry { + irq_hw_number_t hwirq; + struct irq_host *host; +}; + static LIST_HEAD(irq_hosts); static DEFINE_RAW_SPINLOCK(irq_big_lock); -static unsigned int revmap_trees_allocated; static DEFINE_MUTEX(revmap_trees_mutex); -struct irq_map_entry irq_map[NR_IRQS]; +static struct irq_map_entry irq_map[NR_IRQS]; static unsigned int irq_virq_count = NR_IRQS; static struct irq_host *irq_default_host; +irq_hw_number_t irqd_to_hwirq(struct irq_data *d) +{ + return irq_map[d->irq].hwirq; +} +EXPORT_SYMBOL_GPL(irqd_to_hwirq); + irq_hw_number_t virq_to_hw(unsigned int virq) { return irq_map[virq].hwirq; } EXPORT_SYMBOL_GPL(virq_to_hw); +bool virq_is_host(unsigned int virq, struct irq_host *host) +{ + return irq_map[virq].host == host; +} +EXPORT_SYMBOL_GPL(virq_is_host); + static int default_irq_host_match(struct irq_host *h, struct device_node *np) { return h->of_node != NULL && h->of_node == np; @@ -511,7 +536,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, /* Allocate structure and revmap table if using linear mapping */ if (revmap_type == IRQ_HOST_MAP_LINEAR) size += revmap_arg * sizeof(unsigned int); - host = zalloc_maybe_bootmem(size, GFP_KERNEL); + host = kzalloc(size, GFP_KERNEL); if (host == NULL) return NULL; @@ -561,14 +586,14 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, irq_map[i].host = host; smp_wmb(); - /* Clear norequest flags */ - irq_clear_status_flags(i, IRQ_NOREQUEST); - /* Legacy flags are left to default at this point, * one can then use irq_create_mapping() to * explicitly change them */ ops->map(host, i, i); + + /* Clear norequest flags */ + irq_clear_status_flags(i, IRQ_NOREQUEST); } break; case IRQ_HOST_MAP_LINEAR: @@ -579,6 +604,9 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, smp_wmb(); host->revmap_data.linear.revmap = rmap; break; + case IRQ_HOST_MAP_TREE: + INIT_RADIX_TREE(&host->revmap_data.tree, GFP_KERNEL); + break; default: break; } @@ -636,8 +664,6 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq, goto error; } - irq_clear_status_flags(virq, IRQ_NOREQUEST); - /* map it */ smp_wmb(); irq_map[virq].hwirq = hwirq; @@ -648,6 +674,8 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq, goto errdesc; } + irq_clear_status_flags(virq, IRQ_NOREQUEST); + return 0; errdesc: @@ -704,8 +732,6 @@ unsigned int irq_create_mapping(struct irq_host *host, */ virq = irq_find_mapping(host, hwirq); if (virq != NO_IRQ) { - if (host->ops->remap) - host->ops->remap(host, virq, hwirq); pr_debug("irq: -> existing mapping on virq %d\n", virq); return virq; } @@ -786,14 +812,15 @@ void irq_dispose_mapping(unsigned int virq) return; host = irq_map[virq].host; - WARN_ON (host == NULL); - if (host == NULL) + if (WARN_ON(host == NULL)) return; /* Never unmap legacy interrupts */ if (host->revmap_type == IRQ_HOST_MAP_LEGACY) return; + irq_set_status_flags(virq, IRQ_NOREQUEST); + /* remove chip and handler */ irq_set_chip_and_handler(virq, NULL, NULL); @@ -813,13 +840,6 @@ void irq_dispose_mapping(unsigned int virq) host->revmap_data.linear.revmap[hwirq] = NO_IRQ; break; case IRQ_HOST_MAP_TREE: - /* - * Check if radix tree allocated yet, if not then nothing to - * remove. - */ - smp_rmb(); - if (revmap_trees_allocated < 1) - break; mutex_lock(&revmap_trees_mutex); radix_tree_delete(&host->revmap_data.tree, hwirq); mutex_unlock(&revmap_trees_mutex); @@ -830,8 +850,6 @@ void irq_dispose_mapping(unsigned int virq) smp_mb(); irq_map[virq].hwirq = host->inval_irq; - irq_set_status_flags(virq, IRQ_NOREQUEST); - irq_free_descs(virq, 1); /* Free it */ irq_free_virt(virq, 1); @@ -877,16 +895,9 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host, struct irq_map_entry *ptr; unsigned int virq; - WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE); - - /* - * Check if the radix tree exists and has bee initialized. - * If not, we fallback to slow mode - */ - if (revmap_trees_allocated < 2) + if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_TREE)) return irq_find_mapping(host, hwirq); - /* Now try to resolve */ /* * No rcu_read_lock(ing) needed, the ptr returned can't go under us * as it's referencing an entry in the static irq_map table. @@ -909,16 +920,7 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host, void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq, irq_hw_number_t hwirq) { - - WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE); - - /* - * Check if the radix tree exists yet. - * If not, then the irq will be inserted into the tree when it gets - * initialized. - */ - smp_rmb(); - if (revmap_trees_allocated < 1) + if (WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE)) return; if (virq != NO_IRQ) { @@ -934,7 +936,8 @@ unsigned int irq_linear_revmap(struct irq_host *host, { unsigned int *revmap; - WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR); + if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_LINEAR)) + return irq_find_mapping(host, hwirq); /* Check revmap bounds */ if (unlikely(hwirq >= host->revmap_data.linear.size)) @@ -1028,53 +1031,6 @@ int arch_early_irq_init(void) return 0; } -/* We need to create the radix trees late */ -static int irq_late_init(void) -{ - struct irq_host *h; - unsigned int i; - - /* - * No mutual exclusion with respect to accessors of the tree is needed - * here as the synchronization is done via the state variable - * revmap_trees_allocated. - */ - list_for_each_entry(h, &irq_hosts, link) { - if (h->revmap_type == IRQ_HOST_MAP_TREE) - INIT_RADIX_TREE(&h->revmap_data.tree, GFP_KERNEL); - } - - /* - * Make sure the radix trees inits are visible before setting - * the flag - */ - smp_wmb(); - revmap_trees_allocated = 1; - - /* - * Insert the reverse mapping for those interrupts already present - * in irq_map[]. - */ - mutex_lock(&revmap_trees_mutex); - for (i = 0; i < irq_virq_count; i++) { - if (irq_map[i].host && - (irq_map[i].host->revmap_type == IRQ_HOST_MAP_TREE)) - radix_tree_insert(&irq_map[i].host->revmap_data.tree, - irq_map[i].hwirq, &irq_map[i]); - } - mutex_unlock(&revmap_trees_mutex); - - /* - * Make sure the radix trees insertions are visible before setting - * the flag - */ - smp_wmb(); - revmap_trees_allocated = 2; - - return 0; -} -arch_initcall(irq_late_init); - #ifdef CONFIG_VIRQ_DEBUG static int virq_debug_show(struct seq_file *m, void *private) { @@ -1082,10 +1038,11 @@ static int virq_debug_show(struct seq_file *m, void *private) struct irq_desc *desc; const char *p; static const char none[] = "none"; + void *data; int i; - seq_printf(m, "%-5s %-7s %-15s %s\n", "virq", "hwirq", - "chip name", "host name"); + seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq", + "chip name", "chip data", "host name"); for (i = 1; i < nr_irqs; i++) { desc = irq_to_desc(i); @@ -1098,7 +1055,7 @@ static int virq_debug_show(struct seq_file *m, void *private) struct irq_chip *chip; seq_printf(m, "%5d ", i); - seq_printf(m, "0x%05lx ", virq_to_hw(i)); + seq_printf(m, "0x%05lx ", irq_map[i].hwirq); chip = irq_desc_get_chip(desc); if (chip && chip->name) @@ -1107,6 +1064,9 @@ static int virq_debug_show(struct seq_file *m, void *private) p = none; seq_printf(m, "%-15s ", p); + data = irq_desc_get_chip_data(desc); + seq_printf(m, "0x%16p ", data); + if (irq_map[i].host && irq_map[i].host->of_node) p = irq_map[i].host->of_node->full_name; else diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 42850ee00ad..bd9d35f59cf 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -109,7 +109,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs) #ifdef CONFIG_SMP void kgdb_roundup_cpus(unsigned long flags) { - smp_send_debugger_break(MSG_ALL_BUT_SELF); + smp_send_debugger_break(); } #endif diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 301db65f05a..84daabe2fcb 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -132,34 +132,6 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v) /* * Methods used to fetch LPAR data when running on a pSeries platform. */ -/** - * h_get_mpp - * H_GET_MPP hcall returns info in 7 parms - */ -int h_get_mpp(struct hvcall_mpp_data *mpp_data) -{ - int rc; - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; - - rc = plpar_hcall9(H_GET_MPP, retbuf); - - mpp_data->entitled_mem = retbuf[0]; - mpp_data->mapped_mem = retbuf[1]; - - mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; - mpp_data->pool_num = retbuf[2] & 0xffff; - - mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; - mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; - mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; - - mpp_data->pool_size = retbuf[4]; - mpp_data->loan_request = retbuf[5]; - mpp_data->backing_mem = retbuf[6]; - - return rc; -} -EXPORT_SYMBOL(h_get_mpp); struct hvcall_ppp_data { u64 entitlement; @@ -345,6 +317,30 @@ static void parse_mpp_data(struct seq_file *m) seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem); } +/** + * parse_mpp_x_data + * Parse out data returned from h_get_mpp_x + */ +static void parse_mpp_x_data(struct seq_file *m) +{ + struct hvcall_mpp_x_data mpp_x_data; + + if (!firmware_has_feature(FW_FEATURE_XCMO)) + return; + if (h_get_mpp_x(&mpp_x_data)) + return; + + seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes); + + if (mpp_x_data.pool_coalesced_bytes) + seq_printf(m, "pool_coalesced_bytes=%ld\n", + mpp_x_data.pool_coalesced_bytes); + if (mpp_x_data.pool_purr_cycles) + seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles); + if (mpp_x_data.pool_spurr_cycles) + seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles); +} + #define SPLPAR_CHARACTERISTICS_TOKEN 20 #define SPLPAR_MAXLENGTH 1026*(sizeof(char)) @@ -520,6 +516,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) parse_system_parameter_string(m); parse_ppp_data(m); parse_mpp_data(m); + parse_mpp_x_data(m); pseries_cmo_data(m); splpar_dispatch_data(m); diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 094bd9821ad..998a1002860 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -694,6 +694,17 @@ _GLOBAL(kernel_thread) addi r1,r1,16 blr +#ifdef CONFIG_SMP +_GLOBAL(start_secondary_resume) + /* Reset stack */ + rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r3,0 + stw r3,0(r1) /* Zero the stack frame pointer */ + bl start_secondary + b . +#endif /* CONFIG_SMP */ + /* * This routine is just here to keep GCC happy - sigh... */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 206a321a71d..e89df59cdc5 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -462,7 +462,8 @@ _GLOBAL(disable_kernel_fp) * wait for the flag to change, indicating this kernel is going away but * the slave code for the next one is at addresses 0 to 100. * - * This is used by all slaves. + * This is used by all slaves, even those that did not find a matching + * paca in the secondary startup code. * * Physical (hardware) cpu id should be in r3. */ @@ -471,10 +472,6 @@ _GLOBAL(kexec_wait) 1: mflr r5 addi r5,r5,kexec_flag-1b - li r4,KEXEC_STATE_REAL_MODE - stb r4,PACAKEXECSTATE(r13) - SYNC - 99: HMT_LOW #ifdef CONFIG_KEXEC /* use no memory without kexec */ lwz r4,0(r5) @@ -499,11 +496,17 @@ kexec_flag: * * get phys id from paca * switch to real mode + * mark the paca as no longer used * join other cpus in kexec_wait(phys_id) */ _GLOBAL(kexec_smp_wait) lhz r3,PACAHWCPUID(r13) bl real_mode + + li r4,KEXEC_STATE_REAL_MODE + stb r4,PACAKEXECSTATE(r13) + SYNC + b .kexec_wait /* diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 10f0aadee95..efeb8818418 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -7,7 +7,7 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/threads.h> +#include <linux/smp.h> #include <linux/module.h> #include <linux/memblock.h> @@ -156,18 +156,29 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) /* Put the paca pointer into r13 and SPRG_PACA */ void setup_paca(struct paca_struct *new_paca) { + /* Setup r13 */ local_paca = new_paca; - mtspr(SPRN_SPRG_PACA, local_paca); + #ifdef CONFIG_PPC_BOOK3E + /* On Book3E, initialize the TLB miss exception frames */ mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); +#else + /* In HV mode, we setup both HPACA and PACA to avoid problems + * if we do a GET_PACA() before the feature fixups have been + * applied + */ + if (cpu_has_feature(CPU_FTR_HVMODE_206)) + mtspr(SPRN_SPRG_HPACA, local_paca); #endif + mtspr(SPRN_SPRG_PACA, local_paca); + } static int __initdata paca_size; void __init allocate_pacas(void) { - int nr_cpus, cpu, limit; + int cpu, limit; /* * We can't take SLB misses on the paca, and we want to access them @@ -179,23 +190,18 @@ void __init allocate_pacas(void) if (firmware_has_feature(FW_FEATURE_ISERIES)) limit = min(limit, HvPagesToMap * HVPAGESIZE); - nr_cpus = NR_CPUS; - /* On iSeries we know we can never have more than 64 cpus */ - if (firmware_has_feature(FW_FEATURE_ISERIES)) - nr_cpus = min(64, nr_cpus); - - paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus); + paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit)); memset(paca, 0, paca_size); printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", - paca_size, nr_cpus, paca); + paca_size, nr_cpu_ids, paca); - allocate_lppacas(nr_cpus, limit); + allocate_lppacas(nr_cpu_ids, limit); /* Can't use for_each_*_cpu, as they aren't functional yet */ - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_cpu_ids; cpu++) initialise_paca(&paca[cpu], cpu); } diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index d225d99fe39..6baabc13306 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -43,10 +43,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data) const u32 *regs; struct pci_dn *pdn; - pdn = alloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); + pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); if (pdn == NULL) return NULL; - memset(pdn, 0, sizeof(*pdn)); dn->data = pdn; pdn->node = dn; pdn->phb = phb; diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index ef3ef566235..7d28f540200 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -54,7 +54,6 @@ extern void single_step_exception(struct pt_regs *regs); extern int sys_sigreturn(struct pt_regs *regs); EXPORT_SYMBOL(clear_pages); -EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(ISA_DMA_THRESHOLD); EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); @@ -88,9 +87,7 @@ EXPORT_SYMBOL(__copy_tofrom_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__strnlen_user); -#ifdef CONFIG_PPC64 -EXPORT_SYMBOL(copy_4K_page); -#endif +EXPORT_SYMBOL(copy_page); #if defined(CONFIG_PCI) && defined(CONFIG_PPC32) EXPORT_SYMBOL(isa_io_base); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index f74f355a961..095043d7994 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -702,6 +702,8 @@ void prepare_to_copy(struct task_struct *tsk) /* * Copy a thread.. */ +extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */ + int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) @@ -755,11 +757,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, _ALIGN_UP(sizeof(struct thread_info), 16); #ifdef CONFIG_PPC_STD_MMU_64 - if (cpu_has_feature(CPU_FTR_SLB)) { + if (mmu_has_feature(MMU_FTR_SLB)) { unsigned long sp_vsid; unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; - if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T) << SLB_VSID_SHIFT_1T; else @@ -769,6 +771,20 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.ksp_vsid = sp_vsid; } #endif /* CONFIG_PPC_STD_MMU_64 */ +#ifdef CONFIG_PPC64 + if (cpu_has_feature(CPU_FTR_DSCR)) { + if (current->thread.dscr_inherit) { + p->thread.dscr_inherit = 1; + p->thread.dscr = current->thread.dscr; + } else if (0 != dscr_default) { + p->thread.dscr_inherit = 1; + p->thread.dscr = dscr_default; + } else { + p->thread.dscr_inherit = 0; + p->thread.dscr = 0; + } + } +#endif /* * The PPC64 ABI makes use of a TOC to contain function diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index e74fa12afc8..48aeb55faae 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -68,6 +68,7 @@ int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; u64 ppc64_rma_size; #endif +static phys_addr_t first_memblock_size; static int __init early_parse_mem(char *p) { @@ -123,18 +124,19 @@ static void __init move_device_tree(void) */ static struct ibm_pa_feature { unsigned long cpu_features; /* CPU_FTR_xxx bit */ + unsigned long mmu_features; /* MMU_FTR_xxx bit */ unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */ unsigned char pabyte; /* byte number in ibm,pa-features */ unsigned char pabit; /* bit number (big-endian) */ unsigned char invert; /* if 1, pa bit set => clear feature */ } ibm_pa_features[] __initdata = { - {0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, - {0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, - {CPU_FTR_SLB, 0, 0, 2, 0}, - {CPU_FTR_CTRL, 0, 0, 3, 0}, - {CPU_FTR_NOEXECUTE, 0, 0, 6, 0}, - {CPU_FTR_NODSISRALIGN, 0, 1, 1, 1}, - {CPU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, + {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, + {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, + {0, MMU_FTR_SLB, 0, 0, 2, 0}, + {CPU_FTR_CTRL, 0, 0, 0, 3, 0}, + {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, + {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, + {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, }; @@ -166,9 +168,11 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs, if (bit ^ fp->invert) { cur_cpu_spec->cpu_features |= fp->cpu_features; cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs; + cur_cpu_spec->mmu_features |= fp->mmu_features; } else { cur_cpu_spec->cpu_features &= ~fp->cpu_features; cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs; + cur_cpu_spec->mmu_features &= ~fp->mmu_features; } } } @@ -268,13 +272,13 @@ static int __init early_init_dt_scan_cpus(unsigned long node, const char *uname, int depth, void *data) { - static int logical_cpuid = 0; char *type = of_get_flat_dt_prop(node, "device_type", NULL); const u32 *prop; const u32 *intserv; int i, nthreads; unsigned long len; - int found = 0; + int found = -1; + int found_thread = 0; /* We are scanning "cpu" nodes only */ if (type == NULL || strcmp(type, "cpu") != 0) @@ -298,11 +302,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * version 2 of the kexec param format adds the phys cpuid of * booted proc. */ - if (initial_boot_params && initial_boot_params->version >= 2) { - if (intserv[i] == - initial_boot_params->boot_cpuid_phys) { - found = 1; - break; + if (initial_boot_params->version >= 2) { + if (intserv[i] == initial_boot_params->boot_cpuid_phys) { + found = boot_cpu_count; + found_thread = i; } } else { /* @@ -311,23 +314,20 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * off secondary threads. */ if (of_get_flat_dt_prop(node, - "linux,boot-cpu", NULL) != NULL) { - found = 1; - break; - } + "linux,boot-cpu", NULL) != NULL) + found = boot_cpu_count; } - #ifdef CONFIG_SMP /* logical cpu id is always 0 on UP kernels */ - logical_cpuid++; + boot_cpu_count++; #endif } - if (found) { - DBG("boot cpu: logical %d physical %d\n", logical_cpuid, - intserv[i]); - boot_cpuid = logical_cpuid; - set_hard_smp_processor_id(boot_cpuid, intserv[i]); + if (found >= 0) { + DBG("boot cpu: logical %d physical %d\n", found, + intserv[found_thread]); + boot_cpuid = found; + set_hard_smp_processor_id(found, intserv[found_thread]); /* * PAPR defines "logical" PVR values for cpus that @@ -509,11 +509,14 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) size = 0x80000000ul - base; } #endif - - /* First MEMBLOCK added, do some special initializations */ - if (memstart_addr == ~(phys_addr_t)0) - setup_initial_memory_limit(base, size); - memstart_addr = min((u64)memstart_addr, base); + /* Keep track of the beginning of memory -and- the size of + * the very first block in the device-tree as it represents + * the RMA on ppc64 server + */ + if (base < memstart_addr) { + memstart_addr = base; + first_memblock_size = size; + } /* Add the chunk to the MEMBLOCK list */ memblock_add(base, size); @@ -698,6 +701,7 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); + setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Save command line for /proc/cmdline and then parse parameters */ strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 941ff4dbc56..c016033ba78 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -335,6 +335,7 @@ static void __init prom_printf(const char *format, ...) const char *p, *q, *s; va_list args; unsigned long v; + long vs; struct prom_t *_prom = &RELOC(prom); va_start(args, format); @@ -368,12 +369,35 @@ static void __init prom_printf(const char *format, ...) v = va_arg(args, unsigned long); prom_print_hex(v); break; + case 'd': + ++q; + vs = va_arg(args, int); + if (vs < 0) { + prom_print(RELOC("-")); + vs = -vs; + } + prom_print_dec(vs); + break; case 'l': ++q; - if (*q == 'u') { /* '%lu' */ + if (*q == 0) + break; + else if (*q == 'x') { + ++q; + v = va_arg(args, unsigned long); + prom_print_hex(v); + } else if (*q == 'u') { /* '%lu' */ ++q; v = va_arg(args, unsigned long); prom_print_dec(v); + } else if (*q == 'd') { /* %ld */ + ++q; + vs = va_arg(args, long); + if (vs < 0) { + prom_print(RELOC("-")); + vs = -vs; + } + prom_print_dec(vs); } break; } @@ -676,8 +700,10 @@ static void __init early_cmdline_parse(void) #endif /* CONFIG_PCI_MSI */ #ifdef CONFIG_PPC_SMLPAR #define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */ +#define OV5_XCMO 0x40 /* Page Coalescing */ #else #define OV5_CMO 0x00 +#define OV5_XCMO 0x00 #endif #define OV5_TYPE1_AFFINITY 0x80 /* Type 1 NUMA affinity */ @@ -732,7 +758,7 @@ static unsigned char ibm_architecture_vec[] = { OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_DONATE_DEDICATE_CPU | OV5_MSI, 0, - OV5_CMO, + OV5_CMO | OV5_XCMO, OV5_TYPE1_AFFINITY, 0, 0, diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 2097f2b3cba..271ff6318ed 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -42,6 +42,7 @@ #include <asm/time.h> #include <asm/mmu.h> #include <asm/topology.h> +#include <asm/pSeries_reconfig.h> struct rtas_t rtas = { .lock = __ARCH_SPIN_LOCK_UNLOCKED @@ -494,7 +495,7 @@ unsigned int rtas_busy_delay(int status) might_sleep(); ms = rtas_busy_delay_time(status); - if (ms) + if (ms && need_resched()) msleep(ms); return ms; @@ -731,6 +732,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w atomic_set(&data->error, rc); start_topology_update(); + pSeries_coalesce_init(); if (wake_when_done) { atomic_set(&data->done, 1); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 21f30cb6807..79fca2651b6 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -381,7 +381,7 @@ static void __init cpu_init_thread_core_maps(int tpc) int i; threads_per_core = tpc; - threads_core_mask = CPU_MASK_NONE; + cpumask_clear(&threads_core_mask); /* This implementation only supports power of 2 number of threads * for simplicity and performance @@ -390,7 +390,7 @@ static void __init cpu_init_thread_core_maps(int tpc) BUG_ON(tpc != (1 << threads_shift)); for (i = 0; i < tpc; i++) - cpu_set(i, threads_core_mask); + cpumask_set_cpu(i, &threads_core_mask); printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n", tpc, tpc > 1 ? "s" : ""); @@ -404,7 +404,7 @@ static void __init cpu_init_thread_core_maps(int tpc) * cpu_present_mask * * Having the possible map set up early allows us to restrict allocations - * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. + * of things like irqstacks to nr_cpu_ids rather than NR_CPUS. * * We do not initialize the online map here; cpus set their own bits in * cpu_online_mask as they come up. @@ -424,7 +424,7 @@ void __init smp_setup_cpu_maps(void) DBG("smp_setup_cpu_maps()\n"); - while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { + while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) { const int *intserv; int j, len; @@ -443,7 +443,7 @@ void __init smp_setup_cpu_maps(void) intserv = &cpu; /* assume logical == phys */ } - for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { + for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, intserv[j]); set_cpu_present(cpu, true); @@ -483,12 +483,12 @@ void __init smp_setup_cpu_maps(void) if (cpu_has_feature(CPU_FTR_SMT)) maxcpus *= nthreads; - if (maxcpus > NR_CPUS) { + if (maxcpus > nr_cpu_ids) { printk(KERN_WARNING "Partition configured for %d cpus, " "operating system maximum is %d.\n", - maxcpus, NR_CPUS); - maxcpus = NR_CPUS; + maxcpus, nr_cpu_ids); + maxcpus = nr_cpu_ids; } else printk(KERN_INFO "Partition configured for %d cpus.\n", maxcpus); @@ -510,7 +510,7 @@ void __init smp_setup_cpu_maps(void) cpu_init_thread_core_maps(nthreads); /* Now that possible cpus are set, set nr_cpu_ids for later use */ - nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; + setup_nr_cpu_ids(); free_unused_pacas(); } @@ -602,6 +602,10 @@ int check_legacy_ioport(unsigned long base_port) * name instead */ if (!np) np = of_find_node_by_name(NULL, "8042"); + if (np) { + of_i8042_kbd_irq = 1; + of_i8042_aux_irq = 12; + } break; case FDC_BASE: /* FDC1 */ np = of_find_node_by_type(NULL, "fdc"); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 1d2fbc90530..620d792b52e 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -48,6 +48,7 @@ extern void bootx_init(unsigned long r4, unsigned long phys); int boot_cpuid = -1; EXPORT_SYMBOL_GPL(boot_cpuid); +int __initdata boot_cpu_count; int boot_cpuid_phys; int smp_hw_index[NR_CPUS]; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5a0401fcaeb..a88bf2713d4 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -62,6 +62,7 @@ #include <asm/udbg.h> #include <asm/kexec.h> #include <asm/mmu_context.h> +#include <asm/code-patching.h> #include "setup.h" @@ -72,6 +73,7 @@ #endif int boot_cpuid = 0; +int __initdata boot_cpu_count; u64 ppc64_pft_size; /* Pick defaults since we might want to patch instructions @@ -233,6 +235,7 @@ void early_setup_secondary(void) void smp_release_cpus(void) { unsigned long *ptr; + int i; DBG(" -> smp_release_cpus()\n"); @@ -245,7 +248,16 @@ void smp_release_cpus(void) ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop - PHYSICAL_START); *ptr = __pa(generic_secondary_smp_init); - mb(); + + /* And wait a bit for them to catch up */ + for (i = 0; i < 100000; i++) { + mb(); + HMT_low(); + if (boot_cpu_count == 0) + break; + udelay(1); + } + DBG("boot_cpu_count = %d\n", boot_cpu_count); DBG(" <- smp_release_cpus()\n"); } @@ -423,17 +435,30 @@ void __init setup_system(void) DBG(" <- setup_system()\n"); } -static u64 slb0_limit(void) +/* This returns the limit below which memory accesses to the linear + * mapping are guarnateed not to cause a TLB or SLB miss. This is + * used to allocate interrupt or emergency stacks for which our + * exception entry path doesn't deal with being interrupted. + */ +static u64 safe_stack_limit(void) { - if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { +#ifdef CONFIG_PPC_BOOK3E + /* Freescale BookE bolts the entire linear mapping */ + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + return linear_map_top; + /* Other BookE, we assume the first GB is bolted */ + return 1ul << 30; +#else + /* BookS, the first segment is bolted */ + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) return 1UL << SID_SHIFT_1T; - } return 1UL << SID_SHIFT; +#endif } static void __init irqstack_early_init(void) { - u64 limit = slb0_limit(); + u64 limit = safe_stack_limit(); unsigned int i; /* @@ -453,6 +478,9 @@ static void __init irqstack_early_init(void) #ifdef CONFIG_PPC_BOOK3E static void __init exc_lvl_early_init(void) { + extern unsigned int interrupt_base_book3e; + extern unsigned int exc_debug_debug_book3e; + unsigned int i; for_each_possible_cpu(i) { @@ -463,6 +491,10 @@ static void __init exc_lvl_early_init(void) mcheckirq_ctx[i] = (struct thread_info *) __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); } + + if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) + patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1, + (unsigned long)&exc_debug_debug_book3e, 0); } #else #define exc_lvl_early_init() @@ -486,7 +518,7 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(slb0_limit(), ppc64_rma_size); + limit = min(safe_stack_limit(), ppc64_rma_size); for_each_possible_cpu(i) { unsigned long sp; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 27c4a4584f8..da989fff19c 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -381,7 +381,7 @@ badframe: regs, uc, &uc->uc_mcontext); #endif if (show_unhandled_signals && printk_ratelimit()) - printk(regs->msr & MSR_SF ? fmt64 : fmt32, + printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "rt_sigreturn", (long)uc, regs->nip, regs->link); @@ -469,7 +469,7 @@ badframe: regs, frame, newsp); #endif if (show_unhandled_signals && printk_ratelimit()) - printk(regs->msr & MSR_SF ? fmt64 : fmt32, + printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "setup_rt_frame", (long)frame, regs->nip, regs->link); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9f9c204bef6..4a6f2ec7e76 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -95,7 +95,7 @@ int smt_enabled_at_boot = 1; static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; #ifdef CONFIG_PPC64 -void __devinit smp_generic_kick_cpu(int nr) +int __devinit smp_generic_kick_cpu(int nr) { BUG_ON(nr < 0 || nr >= NR_CPUS); @@ -106,37 +106,10 @@ void __devinit smp_generic_kick_cpu(int nr) */ paca[nr].cpu_start = 1; smp_mb(); -} -#endif -void smp_message_recv(int msg) -{ - switch(msg) { - case PPC_MSG_CALL_FUNCTION: - generic_smp_call_function_interrupt(); - break; - case PPC_MSG_RESCHEDULE: - scheduler_ipi(); - break; - case PPC_MSG_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; - case PPC_MSG_DEBUGGER_BREAK: - if (crash_ipi_function_ptr) { - crash_ipi_function_ptr(get_irq_regs()); - break; - } -#ifdef CONFIG_DEBUGGER - debugger_ipi(get_irq_regs()); - break; -#endif /* CONFIG_DEBUGGER */ - /* FALLTHROUGH */ - default: - printk("SMP %d: smp_message_recv(): unknown msg %d\n", - smp_processor_id(), msg); - break; - } + return 0; } +#endif static irqreturn_t call_function_action(int irq, void *data) { @@ -156,9 +129,17 @@ static irqreturn_t call_function_single_action(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t debug_ipi_action(int irq, void *data) +irqreturn_t debug_ipi_action(int irq, void *data) { - smp_message_recv(PPC_MSG_DEBUGGER_BREAK); + if (crash_ipi_function_ptr) { + crash_ipi_function_ptr(get_irq_regs()); + return IRQ_HANDLED; + } + +#ifdef CONFIG_DEBUGGER + debugger_ipi(get_irq_regs()); +#endif /* CONFIG_DEBUGGER */ + return IRQ_HANDLED; } @@ -197,6 +178,66 @@ int smp_request_message_ipi(int virq, int msg) return err; } +#ifdef CONFIG_PPC_SMP_MUXED_IPI +struct cpu_messages { + int messages; /* current messages */ + unsigned long data; /* data for cause ipi */ +}; +static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); + +void smp_muxed_ipi_set_data(int cpu, unsigned long data) +{ + struct cpu_messages *info = &per_cpu(ipi_message, cpu); + + info->data = data; +} + +void smp_muxed_ipi_message_pass(int cpu, int msg) +{ + struct cpu_messages *info = &per_cpu(ipi_message, cpu); + char *message = (char *)&info->messages; + + message[msg] = 1; + mb(); + smp_ops->cause_ipi(cpu, info->data); +} + +void smp_muxed_ipi_resend(void) +{ + struct cpu_messages *info = &__get_cpu_var(ipi_message); + + if (info->messages) + smp_ops->cause_ipi(smp_processor_id(), info->data); +} + +irqreturn_t smp_ipi_demux(void) +{ + struct cpu_messages *info = &__get_cpu_var(ipi_message); + unsigned int all; + + mb(); /* order any irq clear */ + + do { + all = xchg_local(&info->messages, 0); + +#ifdef __BIG_ENDIAN + if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION))) + generic_smp_call_function_interrupt(); + if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE))) + scheduler_ipi(); + if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE))) + generic_smp_call_function_single_interrupt(); + if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK))) + debug_ipi_action(0, NULL); +#else +#error Unsupported ENDIAN +#endif + } while (info->messages); + + return IRQ_HANDLED; +} +#endif /* CONFIG_PPC_SMP_MUXED_IPI */ + void smp_send_reschedule(int cpu) { if (likely(smp_ops)) @@ -216,11 +257,18 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION); } -#ifdef CONFIG_DEBUGGER -void smp_send_debugger_break(int cpu) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +void smp_send_debugger_break(void) { - if (likely(smp_ops)) - smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); + int cpu; + int me = raw_smp_processor_id(); + + if (unlikely(!smp_ops)) + return; + + for_each_online_cpu(cpu) + if (cpu != me) + smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); } #endif @@ -228,9 +276,9 @@ void smp_send_debugger_break(int cpu) void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { crash_ipi_function_ptr = crash_ipi_callback; - if (crash_ipi_callback && smp_ops) { + if (crash_ipi_callback) { mb(); - smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK); + smp_send_debugger_break(); } } #endif @@ -410,8 +458,6 @@ int __cpuinit __cpu_up(unsigned int cpu) { int rc, c; - secondary_ti = current_set[cpu]; - if (smp_ops == NULL || (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))) return -EINVAL; @@ -421,6 +467,8 @@ int __cpuinit __cpu_up(unsigned int cpu) if (rc) return rc; + secondary_ti = current_set[cpu]; + /* Make sure callin-map entry is 0 (can be leftover a CPU * hotplug */ @@ -434,7 +482,11 @@ int __cpuinit __cpu_up(unsigned int cpu) /* wake up cpus */ DBG("smp: kicking cpu %d\n", cpu); - smp_ops->kick_cpu(cpu); + rc = smp_ops->kick_cpu(cpu); + if (rc) { + pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc); + return rc; + } /* * wait to see if the cpu made a callin (is actually up). @@ -507,7 +559,7 @@ int cpu_first_thread_of_core(int core) } EXPORT_SYMBOL_GPL(cpu_first_thread_of_core); -/* Must be called when no change can occur to cpu_present_map, +/* Must be called when no change can occur to cpu_present_mask, * i.e. during cpu online or offline. */ static struct device_node *cpu_to_l2cache(int cpu) @@ -608,7 +660,7 @@ void __init smp_cpus_done(unsigned int max_cpus) * se we pin us down to CPU 0 for a short while */ alloc_cpumask_var(&old_mask, GFP_NOWAIT); - cpumask_copy(old_mask, ¤t->cpus_allowed); + cpumask_copy(old_mask, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); if (smp_ops && smp_ops->setup_cpu) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c0d8c2006bf..f0f2199e64e 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -182,6 +182,41 @@ static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL); static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr); static SYSDEV_ATTR(purr, 0600, show_purr, store_purr); + +unsigned long dscr_default = 0; +EXPORT_SYMBOL(dscr_default); + +static ssize_t show_dscr_default(struct sysdev_class *class, + struct sysdev_class_attribute *attr, char *buf) +{ + return sprintf(buf, "%lx\n", dscr_default); +} + +static ssize_t __used store_dscr_default(struct sysdev_class *class, + struct sysdev_class_attribute *attr, const char *buf, + size_t count) +{ + unsigned long val; + int ret = 0; + + ret = sscanf(buf, "%lx", &val); + if (ret != 1) + return -EINVAL; + dscr_default = val; + + return count; +} + +static SYSDEV_CLASS_ATTR(dscr_default, 0600, + show_dscr_default, store_dscr_default); + +static void sysfs_create_dscr_default(void) +{ + int err = 0; + if (cpu_has_feature(CPU_FTR_DSCR)) + err = sysfs_create_file(&cpu_sysdev_class.kset.kobj, + &attr_dscr_default.attr); +} #endif /* CONFIG_PPC64 */ #ifdef HAS_PPC_PMC_PA6T @@ -617,6 +652,9 @@ static int __init topology_init(void) if (cpu_online(cpu)) register_cpu_online(cpu); } +#ifdef CONFIG_PPC64 + sysfs_create_dscr_default(); +#endif /* CONFIG_PPC64 */ return 0; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index d782cd71c07..b13306b0d92 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -198,7 +198,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) } else if (show_unhandled_signals && unhandled_signal(current, signr) && printk_ratelimit()) { - printk(regs->msr & MSR_SF ? fmt64 : fmt32, + printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, signr, addr, regs->nip, regs->link, code); } @@ -220,7 +220,7 @@ void system_reset_exception(struct pt_regs *regs) } #ifdef CONFIG_KEXEC - cpu_set(smp_processor_id(), cpus_in_sr); + cpumask_set_cpu(smp_processor_id(), &cpus_in_sr); #endif die("System Reset", regs, SIGABRT); @@ -908,6 +908,26 @@ static int emulate_instruction(struct pt_regs *regs) return emulate_isel(regs, instword); } +#ifdef CONFIG_PPC64 + /* Emulate the mfspr rD, DSCR. */ + if (((instword & PPC_INST_MFSPR_DSCR_MASK) == PPC_INST_MFSPR_DSCR) && + cpu_has_feature(CPU_FTR_DSCR)) { + PPC_WARN_EMULATED(mfdscr, regs); + rd = (instword >> 21) & 0x1f; + regs->gpr[rd] = mfspr(SPRN_DSCR); + return 0; + } + /* Emulate the mtspr DSCR, rD. */ + if (((instword & PPC_INST_MTSPR_DSCR_MASK) == PPC_INST_MTSPR_DSCR) && + cpu_has_feature(CPU_FTR_DSCR)) { + PPC_WARN_EMULATED(mtdscr, regs); + rd = (instword >> 21) & 0x1f; + mtspr(SPRN_DSCR, regs->gpr[rd]); + current->thread.dscr_inherit = 1; + return 0; + } +#endif + return -EINVAL; } @@ -1505,6 +1525,10 @@ struct ppc_emulated ppc_emulated = { #ifdef CONFIG_VSX WARN_EMULATED_SETUP(vsx), #endif +#ifdef CONFIG_PPC64 + WARN_EMULATED_SETUP(mfdscr), + WARN_EMULATED_SETUP(mtdscr), +#endif }; u32 ppc_warn_emulated; diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index e39cad83c88..23d65abbedc 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -62,6 +62,8 @@ void __init udbg_early_init(void) udbg_init_cpm(); #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO) udbg_init_usbgecko(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) + udbg_init_wsp(); #endif #ifdef CONFIG_PPC_EARLY_DEBUG diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index baa33a7517b..6837f839ab7 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -11,6 +11,7 @@ #include <linux/types.h> #include <asm/udbg.h> #include <asm/io.h> +#include <asm/reg_a2.h> extern u8 real_readb(volatile u8 __iomem *addr); extern void real_writeb(u8 data, volatile u8 __iomem *addr); @@ -298,3 +299,53 @@ void __init udbg_init_40x_realmode(void) udbg_getc_poll = NULL; } #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_WSP +static void udbg_wsp_flush(void) +{ + if (udbg_comport) { + while ((readb(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + } +} + +static void udbg_wsp_putc(char c) +{ + if (udbg_comport) { + if (c == '\n') + udbg_wsp_putc('\r'); + udbg_wsp_flush(); + writeb(c, &udbg_comport->thr); eieio(); + } +} + +static int udbg_wsp_getc(void) +{ + if (udbg_comport) { + while ((readb(&udbg_comport->lsr) & LSR_DR) == 0) + ; /* wait for char */ + return readb(&udbg_comport->rbr); + } + return -1; +} + +static int udbg_wsp_getc_poll(void) +{ + if (udbg_comport) + if (readb(&udbg_comport->lsr) & LSR_DR) + return readb(&udbg_comport->rbr); + return -1; +} + +void __init udbg_init_wsp(void) +{ + udbg_comport = (struct NS16550 __iomem *)WSP_UART_VIRT; + + udbg_init_uart(udbg_comport, 57600, 50000000); + + udbg_putc = udbg_wsp_putc; + udbg_flush = udbg_wsp_flush; + udbg_getc = udbg_wsp_getc; + udbg_getc_poll = udbg_wsp_getc_poll; +} +#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 9de6f396cf8..4d5a3edff49 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -102,7 +102,7 @@ _GLOBAL(giveup_altivec) MTMSRD(r5) /* enable use of VMX now */ isync PPC_LCMPI 0,r3,0 - beqlr- /* if no previous owner, done */ + beqlr /* if no previous owner, done */ addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r5,0 diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 74d0e742114..da3a1225c0a 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -107,6 +107,16 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, return 0; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_44x *vcpu_44x; diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 65ea083a5b2..549bb2c9a47 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -158,7 +158,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); } - kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); return emulated; } @@ -179,7 +178,6 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); } - kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); return emulated; } diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index c961de40c67..0f95b5cce03 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -236,7 +236,7 @@ void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) { - return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions); + return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); } void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 2b9c9088d00..1a1b34487e7 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -35,9 +35,7 @@ #if defined(CONFIG_PPC_BOOK3S_64) -#define LOAD_SHADOW_VCPU(reg) \ - mfspr reg, SPRN_SPRG_PACA - +#define LOAD_SHADOW_VCPU(reg) GET_PACA(reg) #define SHADOW_VCPU_OFF PACA_KVM_SVCPU #define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR) #define FUNC(name) GLUE(.,name) @@ -72,7 +70,7 @@ .global kvmppc_trampoline_\intno kvmppc_trampoline_\intno: - mtspr SPRN_SPRG_SCRATCH0, r13 /* Save r13 */ + SET_SCRATCH0(r13) /* Save r13 */ /* * First thing to do is to find out if we're coming @@ -91,7 +89,7 @@ kvmppc_trampoline_\intno: lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) mtcr r12 PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) - mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ + GET_SCRATCH0(r13) /* r13 = original r13 */ b kvmppc_resume_\intno /* Get back original handler */ /* Now we know we're handling a KVM guest */ @@ -114,6 +112,9 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL +#ifdef CONFIG_PPC_BOOK3S_64 +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL_HV +#endif INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_FP_UNAVAIL @@ -158,7 +159,7 @@ kvmppc_handler_skip_ins: lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) mtcr r12 PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) - mfspr r13, SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r13) /* And get back into the code */ RFI diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 7c52ed0b705..451264274b8 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -155,14 +155,20 @@ kvmppc_handler_trampoline_exit: PPC_LL r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13) /* Save guest PC and MSR */ - mfsrr0 r3 + andi. r0,r12,0x2 + beq 1f + mfspr r3,SPRN_HSRR0 + mfspr r4,SPRN_HSRR1 + andi. r12,r12,0x3ffd + b 2f +1: mfsrr0 r3 mfsrr1 r4 - +2: PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13) PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13) /* Get scratch'ed off registers */ - mfspr r9, SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r9) PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) lwz r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ef76acb455c..8462b3a1c1c 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvmppc_set_msr(vcpu, regs->msr); vcpu->arch.shared->srr0 = regs->srr0; vcpu->arch.shared->srr1 = regs->srr1; + kvmppc_set_pid(vcpu, regs->pid); vcpu->arch.shared->sprg0 = regs->sprg0; vcpu->arch.shared->sprg1 = regs->sprg1; vcpu->arch.shared->sprg2 = regs->sprg2; @@ -584,16 +585,165 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } +static void get_sregs_base(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + u64 tb = get_tb(); + + sregs->u.e.features |= KVM_SREGS_E_BASE; + + sregs->u.e.csrr0 = vcpu->arch.csrr0; + sregs->u.e.csrr1 = vcpu->arch.csrr1; + sregs->u.e.mcsr = vcpu->arch.mcsr; + sregs->u.e.esr = vcpu->arch.esr; + sregs->u.e.dear = vcpu->arch.shared->dar; + sregs->u.e.tsr = vcpu->arch.tsr; + sregs->u.e.tcr = vcpu->arch.tcr; + sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); + sregs->u.e.tb = tb; + sregs->u.e.vrsave = vcpu->arch.vrsave; +} + +static int set_sregs_base(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_BASE)) + return 0; + + vcpu->arch.csrr0 = sregs->u.e.csrr0; + vcpu->arch.csrr1 = sregs->u.e.csrr1; + vcpu->arch.mcsr = sregs->u.e.mcsr; + vcpu->arch.esr = sregs->u.e.esr; + vcpu->arch.shared->dar = sregs->u.e.dear; + vcpu->arch.vrsave = sregs->u.e.vrsave; + vcpu->arch.tcr = sregs->u.e.tcr; + + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) + vcpu->arch.dec = sregs->u.e.dec; + + kvmppc_emulate_dec(vcpu); + + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { + /* + * FIXME: existing KVM timer handling is incomplete. + * TSR cannot be read by the guest, and its value in + * vcpu->arch is always zero. For now, just handle + * the case where the caller is trying to inject a + * decrementer interrupt. + */ + + if ((sregs->u.e.tsr & TSR_DIS) && + (vcpu->arch.tcr & TCR_DIE)) + kvmppc_core_queue_dec(vcpu); + } + + return 0; +} + +static void get_sregs_arch206(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + sregs->u.e.features |= KVM_SREGS_E_ARCH206; + + sregs->u.e.pir = 0; + sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0; + sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1; + sregs->u.e.decar = vcpu->arch.decar; + sregs->u.e.ivpr = vcpu->arch.ivpr; +} + +static int set_sregs_arch206(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206)) + return 0; + + if (sregs->u.e.pir != 0) + return -EINVAL; + + vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0; + vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1; + vcpu->arch.decar = sregs->u.e.decar; + vcpu->arch.ivpr = sregs->u.e.ivpr; + + return 0; +} + +void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + sregs->u.e.features |= KVM_SREGS_E_IVOR; + + sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; + sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; + sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; + sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; + sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; + sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; + sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; + sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; + sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; + sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; + sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; + sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; + sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; + sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; + sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; + sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; +} + +int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0]; + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2]; + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3]; + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4]; + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5]; + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6]; + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8]; + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10]; + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11]; + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13]; + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15]; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return -ENOTSUPP; + sregs->pvr = vcpu->arch.pvr; + + get_sregs_base(vcpu, sregs); + get_sregs_arch206(vcpu, sregs); + kvmppc_core_get_sregs(vcpu, sregs); + return 0; } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return -ENOTSUPP; + int ret; + + if (vcpu->arch.pvr != sregs->pvr) + return -EINVAL; + + ret = set_sregs_base(vcpu, sregs); + if (ret < 0) + return ret; + + ret = set_sregs_arch206(vcpu, sregs); + if (ret < 0) + return ret; + + return kvmppc_core_set_sregs(vcpu, sregs); } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 1cc471faac2..b58ccae9590 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -380,7 +380,6 @@ lightweight_exit: * because host interrupt handlers would get confused. */ lwz r1, VCPU_GPR(r1)(r4) - /* XXX handle USPRG0 */ /* Host interrupt handlers may have clobbered these guest-readable * SPRGs, so we need to reload them here with the guest's values. */ lwz r3, VCPU_SPRG4(r4) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index e3768ee9b59..318dbc61ba4 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -63,6 +63,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) /* Registers init */ vcpu->arch.pvr = mfspr(SPRN_PVR); + vcpu_e500->svr = mfspr(SPRN_SVR); /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ vcpu->vcpu_id = 0; @@ -96,6 +97,81 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, return 0; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE | + KVM_SREGS_E_PM; + sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL; + + sregs->u.e.impl.fsl.features = 0; + sregs->u.e.impl.fsl.svr = vcpu_e500->svr; + sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; + sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; + + sregs->u.e.mas0 = vcpu_e500->mas0; + sregs->u.e.mas1 = vcpu_e500->mas1; + sregs->u.e.mas2 = vcpu_e500->mas2; + sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3; + sregs->u.e.mas4 = vcpu_e500->mas4; + sregs->u.e.mas6 = vcpu_e500->mas6; + + sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); + sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; + sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg; + sregs->u.e.tlbcfg[2] = 0; + sregs->u.e.tlbcfg[3] = 0; + + sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; + sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; + sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; + sregs->u.e.ivor_high[3] = + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; + + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { + vcpu_e500->svr = sregs->u.e.impl.fsl.svr; + vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0; + vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; + } + + if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { + vcpu_e500->mas0 = sregs->u.e.mas0; + vcpu_e500->mas1 = sregs->u.e.mas1; + vcpu_e500->mas2 = sregs->u.e.mas2; + vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32; + vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3; + vcpu_e500->mas4 = sregs->u.e.mas4; + vcpu_e500->mas6 = sregs->u.e.mas6; + } + + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + if (sregs->u.e.features & KVM_SREGS_E_SPE) { + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = + sregs->u.e.ivor_high[0]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = + sregs->u.e.ivor_high[1]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = + sregs->u.e.ivor_high[2]; + } + + if (sregs->u.e.features & KVM_SREGS_E_PM) { + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = + sregs->u.e.ivor_high[3]; + } + + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_e500 *vcpu_e500; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 8e3edfbc963..69cd665a0ca 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, <yu.liu@freescale.com> * @@ -78,8 +78,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) switch (sprn) { case SPRN_PID: - vcpu_e500->pid[0] = vcpu->arch.shadow_pid = - vcpu->arch.pid = spr_val; + kvmppc_set_pid(vcpu, spr_val); break; case SPRN_PID1: vcpu_e500->pid[1] = spr_val; break; @@ -175,6 +174,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; case SPRN_HID1: kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; + case SPRN_SVR: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break; case SPRN_MMUCSR0: kvmppc_set_gpr(vcpu, rt, 0); break; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index d6d6d47a75a..b18fe353397 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, yu.liu@freescale.com * @@ -24,6 +24,7 @@ #include "../mm/mmu_decl.h" #include "e500_tlb.h" #include "trace.h" +#include "timing.h" #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) @@ -506,6 +507,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) vcpu_e500->mas7 = 0; } + kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); return EMULATE_DONE; } @@ -571,6 +573,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) write_host_tlbe(vcpu_e500, stlbsel, sesel); } + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); return EMULATE_DONE; } @@ -672,6 +675,14 @@ int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, return -1; } +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + vcpu_e500->pid[0] = vcpu->arch.shadow_pid = + vcpu->arch.pid = pid; +} + void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) { struct tlbe *tlbe; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index c64fd2909bb..141dce3c681 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -114,6 +114,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) } } +u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) +{ + u64 jd = tb - vcpu->arch.dec_jiffies; + return vcpu->arch.dec - jd; +} + /* XXX to do: * lhax * lhaux @@ -279,11 +285,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case SPRN_DEC: { - u64 jd = get_tb() - vcpu->arch.dec_jiffies; - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd); - pr_debug("mfDEC: %x - %llx = %lx\n", - vcpu->arch.dec, jd, - kvmppc_get_gpr(vcpu, rt)); + kvmppc_set_gpr(vcpu, rt, + kvmppc_get_dec(vcpu, get_tb())); break; } default: @@ -294,6 +297,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; } + kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); break; case OP_31_XOP_STHX: @@ -363,6 +367,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) printk("mtspr: unknown spr %x\n", sprn); break; } + kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); break; case OP_31_XOP_DCBI: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 99758460efd..616dd516ca1 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -175,7 +175,11 @@ int kvm_dev_ioctl_check_extension(long ext) int r; switch (ext) { +#ifdef CONFIG_BOOKE + case KVM_CAP_PPC_BOOKE_SREGS: +#else case KVM_CAP_PPC_SEGSTATE: +#endif case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: @@ -284,6 +288,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; +#ifdef CONFIG_KVM_EXIT_TIMING + mutex_init(&vcpu->arch.exit_timing_lock); +#endif + return 0; } @@ -294,12 +302,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { +#ifdef CONFIG_BOOKE + /* + * vrsave (formerly usprg0) isn't used by Linux, but may + * be used by the guest. + * + * On non-booke this is associated with Altivec and + * is handled by code in book3s.c. + */ + mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); +#endif kvmppc_core_vcpu_load(vcpu, cpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvmppc_core_vcpu_put(vcpu); +#ifdef CONFIG_BOOKE + vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); +#endif } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index a021f5827a3..319177df958 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -34,8 +34,8 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) { int i; - /* pause guest execution to avoid concurrent updates */ - mutex_lock(&vcpu->mutex); + /* Take a lock to avoid concurrent updates */ + mutex_lock(&vcpu->arch.exit_timing_lock); vcpu->arch.last_exit_type = 0xDEAD; for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { @@ -49,7 +49,7 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) vcpu->arch.timing_exit.tv64 = 0; vcpu->arch.timing_last_enter.tv64 = 0; - mutex_unlock(&vcpu->mutex); + mutex_unlock(&vcpu->arch.exit_timing_lock); } static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) @@ -65,6 +65,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) return; } + mutex_lock(&vcpu->arch.exit_timing_lock); + vcpu->arch.timing_count_type[type]++; /* sum */ @@ -93,6 +95,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) vcpu->arch.timing_min_duration[type] = duration; if (unlikely(duration > vcpu->arch.timing_max_duration[type])) vcpu->arch.timing_max_duration[type] = duration; + + mutex_unlock(&vcpu->arch.exit_timing_lock); } void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) @@ -147,17 +151,30 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private) { struct kvm_vcpu *vcpu = m->private; int i; + u64 min, max, sum, sum_quad; seq_printf(m, "%s", "type count min max sum sum_squared\n"); + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { + + min = vcpu->arch.timing_min_duration[i]; + do_div(min, tb_ticks_per_usec); + max = vcpu->arch.timing_max_duration[i]; + do_div(max, tb_ticks_per_usec); + sum = vcpu->arch.timing_sum_duration[i]; + do_div(sum, tb_ticks_per_usec); + sum_quad = vcpu->arch.timing_sum_quad_duration[i]; + do_div(sum_quad, tb_ticks_per_usec); + seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", kvm_exit_names[i], vcpu->arch.timing_count_type[i], - vcpu->arch.timing_min_duration[i], - vcpu->arch.timing_max_duration[i], - vcpu->arch.timing_sum_duration[i], - vcpu->arch.timing_sum_quad_duration[i]); + min, + max, + sum, + sum_quad); + } return 0; } diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index f53e09c7dac..13b676c20d1 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c @@ -6,14 +6,6 @@ #include <asm/system.h> -void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask) -{ - if (mem_init_done) - return kmalloc(size, mask); - else - return alloc_bootmem(size); -} - void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) { void *p; diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 4d4eeb90048..53dcb6b1b70 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -6,6 +6,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include <asm/page.h> #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> @@ -15,9 +16,9 @@ PPC64_CACHES: .tc ppc64_caches[TC],ppc64_caches .section ".text" - -_GLOBAL(copy_4K_page) - li r5,4096 /* 4K page size */ +_GLOBAL(copy_page) + lis r5,PAGE_SIZE@h + ori r5,r5,PAGE_SIZE@l BEGIN_FTR_SECTION ld r10,PPC64_CACHES@toc(r2) lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */ diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c index deac4d30daf..e91615abae6 100644 --- a/arch/powerpc/lib/devres.c +++ b/arch/powerpc/lib/devres.c @@ -9,11 +9,11 @@ #include <linux/device.h> /* devres_*(), devm_ioremap_release() */ #include <linux/gfp.h> -#include <linux/io.h> /* ioremap_flags() */ +#include <linux/io.h> /* ioremap_prot() */ #include <linux/module.h> /* EXPORT_SYMBOL() */ /** - * devm_ioremap_prot - Managed ioremap_flags() + * devm_ioremap_prot - Managed ioremap_prot() * @dev: Generic device to remap IO address for * @offset: BUS offset to map * @size: Size of map @@ -31,7 +31,7 @@ void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, if (!ptr) return NULL; - addr = ioremap_flags(offset, size, flags); + addr = ioremap_prot(offset, size, flags); if (addr) { *ptr = addr; devres_add(dev, ptr); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index ae5189ab004..9a52349874e 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/kprobes.h> #include <linux/ptrace.h> +#include <linux/prefetch.h> #include <asm/sstep.h> #include <asm/processor.h> #include <asm/uaccess.h> @@ -45,6 +46,18 @@ extern int do_stxvd2x(int rn, unsigned long ea); #endif /* + * Emulate the truncation of 64 bit values in 32-bit mode. + */ +static unsigned long truncate_if_32bit(unsigned long msr, unsigned long val) +{ +#ifdef __powerpc64__ + if ((msr & MSR_64BIT) == 0) + val &= 0xffffffffUL; +#endif + return val; +} + +/* * Determine whether a conditional branch instruction would branch. */ static int __kprobes branch_taken(unsigned int instr, struct pt_regs *regs) @@ -90,11 +103,8 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs if (instr & 0x04000000) /* update forms */ regs->gpr[ra] = ea; } -#ifdef __powerpc64__ - if (!(regs->msr & MSR_SF)) - ea &= 0xffffffffUL; -#endif - return ea; + + return truncate_if_32bit(regs->msr, ea); } #ifdef __powerpc64__ @@ -113,9 +123,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg if ((instr & 3) == 1) /* update forms */ regs->gpr[ra] = ea; } - if (!(regs->msr & MSR_SF)) - ea &= 0xffffffffUL; - return ea; + + return truncate_if_32bit(regs->msr, ea); } #endif /* __powerpc64 */ @@ -136,11 +145,8 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs if (do_update) /* update forms */ regs->gpr[ra] = ea; } -#ifdef __powerpc64__ - if (!(regs->msr & MSR_SF)) - ea &= 0xffffffffUL; -#endif - return ea; + + return truncate_if_32bit(regs->msr, ea); } /* @@ -466,7 +472,7 @@ static void __kprobes set_cr0(struct pt_regs *regs, int rd) regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); #ifdef __powerpc64__ - if (!(regs->msr & MSR_SF)) + if (!(regs->msr & MSR_64BIT)) val = (int) val; #endif if (val < 0) @@ -487,7 +493,7 @@ static void __kprobes add_with_carry(struct pt_regs *regs, int rd, ++val; regs->gpr[rd] = val; #ifdef __powerpc64__ - if (!(regs->msr & MSR_SF)) { + if (!(regs->msr & MSR_64BIT)) { val = (unsigned int) val; val1 = (unsigned int) val1; } @@ -570,8 +576,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) if ((instr & 2) == 0) imm += regs->nip; regs->nip += 4; - if ((regs->msr & MSR_SF) == 0) - regs->nip &= 0xffffffffUL; + regs->nip = truncate_if_32bit(regs->msr, regs->nip); if (instr & 1) regs->link = regs->nip; if (branch_taken(instr, regs)) @@ -604,13 +609,9 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) imm -= 0x04000000; if ((instr & 2) == 0) imm += regs->nip; - if (instr & 1) { - regs->link = regs->nip + 4; - if ((regs->msr & MSR_SF) == 0) - regs->link &= 0xffffffffUL; - } - if ((regs->msr & MSR_SF) == 0) - imm &= 0xffffffffUL; + if (instr & 1) + regs->link = truncate_if_32bit(regs->msr, regs->nip + 4); + imm = truncate_if_32bit(regs->msr, imm); regs->nip = imm; return 1; case 19: @@ -618,11 +619,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 16: /* bclr */ case 528: /* bcctr */ imm = (instr & 0x400)? regs->ctr: regs->link; - regs->nip += 4; - if ((regs->msr & MSR_SF) == 0) { - regs->nip &= 0xffffffffUL; - imm &= 0xffffffffUL; - } + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + imm = truncate_if_32bit(regs->msr, imm); if (instr & 1) regs->link = regs->nip; if (branch_taken(instr, regs)) @@ -1616,11 +1614,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 0; /* invoke DSI if -EFAULT? */ } instr_done: - regs->nip += 4; -#ifdef __powerpc64__ - if ((regs->msr & MSR_SF) == 0) - regs->nip &= 0xffffffffUL; -#endif + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); return 1; logical_done: diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 5b7dd4ea02b..a242b5d7cbe 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -118,7 +118,7 @@ _GLOBAL(__hash_page_4K) BEGIN_FTR_SECTION cmpdi r9,0 /* check segment size */ bne 3f -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) /* Calc va and put it in r29 */ rldicr r29,r5,28,63-28 rldicl r3,r3,0,36 @@ -401,7 +401,7 @@ _GLOBAL(__hash_page_4K) BEGIN_FTR_SECTION cmpdi r9,0 /* check segment size */ bne 3f -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) /* Calc va and put it in r29 */ rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */ rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */ @@ -715,7 +715,7 @@ BEGIN_FTR_SECTION andi. r0,r31,_PAGE_NO_CACHE /* If so, bail out and refault as a 4k page */ bne- ht64_bail_ok -END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE) +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_CI_LARGE_PAGE) /* Prepare new PTE value (turn access RW into DIRTY, then * add BUSY and ACCESSED) */ @@ -736,7 +736,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE) BEGIN_FTR_SECTION cmpdi r9,0 /* check segment size */ bne 3f -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) /* Calc va and put it in r29 */ rldicr r29,r5,28,63-28 rldicl r3,r3,0,36 diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 784a400e078..dfd764896db 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -50,9 +50,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize) case MMU_PAGE_4K: va &= ~0xffful; va |= ssize << 8; - asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), - %2) - : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206) + asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) + : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206) : "memory"); break; default: @@ -61,9 +60,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize) va |= penc << 12; va |= ssize << 8; va |= 1; /* L */ - asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), - %2) - : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206) + asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2) + : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206) : "memory"); break; } @@ -98,8 +96,8 @@ static inline void __tlbiel(unsigned long va, int psize, int ssize) static inline void tlbie(unsigned long va, int psize, int ssize, int local) { - unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL); - int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (use_local) use_local = mmu_psize_defs[psize].tlbiel; @@ -503,7 +501,7 @@ static void native_flush_hash_range(unsigned long number, int local) } pte_iterate_hashed_end(); } - if (cpu_has_feature(CPU_FTR_TLBIEL) && + if (mmu_has_feature(MMU_FTR_TLBIEL) && mmu_psize_defs[psize].tlbiel && local) { asm volatile("ptesync":::"memory"); for (i = 0; i < number; i++) { @@ -517,7 +515,7 @@ static void native_flush_hash_range(unsigned long number, int local) } asm volatile("ptesync":::"memory"); } else { - int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 58a022d0f46..26b2872b3d0 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -53,6 +53,7 @@ #include <asm/sections.h> #include <asm/spu.h> #include <asm/udbg.h> +#include <asm/code-patching.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -258,11 +259,11 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node, for (; size >= 4; size -= 4, ++prop) { if (prop[0] == 40) { DBG("1T segment support detected\n"); - cur_cpu_spec->cpu_features |= CPU_FTR_1T_SEGMENT; + cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT; return 1; } } - cur_cpu_spec->cpu_features &= ~CPU_FTR_NO_SLBIE_B; + cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B; return 0; } @@ -288,7 +289,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, if (prop != NULL) { DBG("Page sizes from device-tree:\n"); size /= 4; - cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE); + cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); while(size > 0) { unsigned int shift = prop[0]; unsigned int slbenc = prop[1]; @@ -316,7 +317,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, break; case 0x18: idx = MMU_PAGE_16M; - cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE; + cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; break; case 0x22: idx = MMU_PAGE_16G; @@ -411,7 +412,7 @@ static void __init htab_init_page_sizes(void) * Not in the device-tree, let's fallback on known size * list for 16M capable GP & GR */ - if (cpu_has_feature(CPU_FTR_16M_PAGE)) + if (mmu_has_feature(MMU_FTR_16M_PAGE)) memcpy(mmu_psize_defs, mmu_psize_defaults_gp, sizeof(mmu_psize_defaults_gp)); found: @@ -441,7 +442,7 @@ static void __init htab_init_page_sizes(void) mmu_vmalloc_psize = MMU_PAGE_64K; if (mmu_linear_psize == MMU_PAGE_4K) mmu_linear_psize = MMU_PAGE_64K; - if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) { + if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) { /* * Don't use 64k pages for ioremap on pSeries, since * that would stop us accessing the HEA ethernet. @@ -547,15 +548,7 @@ int remove_section_mapping(unsigned long start, unsigned long end) } #endif /* CONFIG_MEMORY_HOTPLUG */ -static inline void make_bl(unsigned int *insn_addr, void *func) -{ - unsigned long funcp = *((unsigned long *)func); - int offset = funcp - (unsigned long)insn_addr; - - *insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc)); - flush_icache_range((unsigned long)insn_addr, 4+ - (unsigned long)insn_addr); -} +#define FUNCTION_TEXT(A) ((*(unsigned long *)(A))) static void __init htab_finish_init(void) { @@ -570,16 +563,33 @@ static void __init htab_finish_init(void) extern unsigned int *ht64_call_hpte_remove; extern unsigned int *ht64_call_hpte_updatepp; - make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert); - make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert); - make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove); - make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp); + patch_branch(ht64_call_hpte_insert1, + FUNCTION_TEXT(ppc_md.hpte_insert), + BRANCH_SET_LINK); + patch_branch(ht64_call_hpte_insert2, + FUNCTION_TEXT(ppc_md.hpte_insert), + BRANCH_SET_LINK); + patch_branch(ht64_call_hpte_remove, + FUNCTION_TEXT(ppc_md.hpte_remove), + BRANCH_SET_LINK); + patch_branch(ht64_call_hpte_updatepp, + FUNCTION_TEXT(ppc_md.hpte_updatepp), + BRANCH_SET_LINK); + #endif /* CONFIG_PPC_HAS_HASH_64K */ - make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert); - make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert); - make_bl(htab_call_hpte_remove, ppc_md.hpte_remove); - make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp); + patch_branch(htab_call_hpte_insert1, + FUNCTION_TEXT(ppc_md.hpte_insert), + BRANCH_SET_LINK); + patch_branch(htab_call_hpte_insert2, + FUNCTION_TEXT(ppc_md.hpte_insert), + BRANCH_SET_LINK); + patch_branch(htab_call_hpte_remove, + FUNCTION_TEXT(ppc_md.hpte_remove), + BRANCH_SET_LINK); + patch_branch(htab_call_hpte_updatepp, + FUNCTION_TEXT(ppc_md.hpte_updatepp), + BRANCH_SET_LINK); } static void __init htab_initialize(void) @@ -598,7 +608,7 @@ static void __init htab_initialize(void) /* Initialize page sizes */ htab_init_page_sizes(); - if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { mmu_kernel_ssize = MMU_SEGSIZE_1T; mmu_highuser_ssize = MMU_SEGSIZE_1T; printk(KERN_INFO "Using 1TB segments\n"); @@ -739,7 +749,7 @@ void __init early_init_mmu(void) /* Initialize stab / SLB management except on iSeries */ - if (cpu_has_feature(CPU_FTR_SLB)) + if (mmu_has_feature(MMU_FTR_SLB)) slb_initialize(); else if (!firmware_has_feature(FW_FEATURE_ISERIES)) stab_initialize(get_paca()->stab_real); @@ -756,7 +766,7 @@ void __cpuinit early_init_mmu_secondary(void) * in real mode on pSeries and we want a virtual address on * iSeries anyway */ - if (cpu_has_feature(CPU_FTR_SLB)) + if (mmu_has_feature(MMU_FTR_SLB)) slb_initialize(); else stab_initialize(get_paca()->stab_addr); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 9bb249c3046..0b9a5c1901b 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -529,7 +529,7 @@ static int __init hugetlbpage_init(void) { int psize; - if (!cpu_has_feature(CPU_FTR_16M_PAGE)) + if (!mmu_has_feature(MMU_FTR_16M_PAGE)) return -ENODEV; for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 2535828aa84..3bafc3deca6 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -20,9 +20,205 @@ #include <linux/idr.h> #include <linux/module.h> #include <linux/gfp.h> +#include <linux/slab.h> #include <asm/mmu_context.h> +#ifdef CONFIG_PPC_ICSWX +/* + * The processor and its L2 cache cause the icswx instruction to + * generate a COP_REQ transaction on PowerBus. The transaction has + * no address, and the processor does not perform an MMU access + * to authenticate the transaction. The command portion of the + * PowerBus COP_REQ transaction includes the LPAR_ID (LPID) and + * the coprocessor Process ID (PID), which the coprocessor compares + * to the authorized LPID and PID held in the coprocessor, to determine + * if the process is authorized to generate the transaction. + * The data of the COP_REQ transaction is 128-byte or less and is + * placed in cacheable memory on a 128-byte cache line boundary. + * + * The task to use a coprocessor should use use_cop() to allocate + * a coprocessor PID before executing icswx instruction. use_cop() + * also enables the coprocessor context switching. Drop_cop() is + * used to free the coprocessor PID. + * + * Example: + * Host Fabric Interface (HFI) is a PowerPC network coprocessor. + * Each HFI have multiple windows. Each HFI window serves as a + * network device sending to and receiving from HFI network. + * HFI immediate send function uses icswx instruction. The immediate + * send function allows small (single cache-line) packets be sent + * without using the regular HFI send FIFO and doorbell, which are + * much slower than immediate send. + * + * For each task intending to use HFI immediate send, the HFI driver + * calls use_cop() to obtain a coprocessor PID for the task. + * The HFI driver then allocate a free HFI window and save the + * coprocessor PID to the HFI window to allow the task to use the + * HFI window. + * + * The HFI driver repeatedly creates immediate send packets and + * issues icswx instruction to send data through the HFI window. + * The HFI compares the coprocessor PID in the CPU PID register + * to the PID held in the HFI window to determine if the transaction + * is allowed. + * + * When the task to release the HFI window, the HFI driver calls + * drop_cop() to release the coprocessor PID. + */ + +#define COP_PID_NONE 0 +#define COP_PID_MIN (COP_PID_NONE + 1) +#define COP_PID_MAX (0xFFFF) + +static DEFINE_SPINLOCK(mmu_context_acop_lock); +static DEFINE_IDA(cop_ida); + +void switch_cop(struct mm_struct *next) +{ + mtspr(SPRN_PID, next->context.cop_pid); + mtspr(SPRN_ACOP, next->context.acop); +} + +static int new_cop_pid(struct ida *ida, int min_id, int max_id, + spinlock_t *lock) +{ + int index; + int err; + +again: + if (!ida_pre_get(ida, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(lock); + err = ida_get_new_above(ida, min_id, &index); + spin_unlock(lock); + + if (err == -EAGAIN) + goto again; + else if (err) + return err; + + if (index > max_id) { + spin_lock(lock); + ida_remove(ida, index); + spin_unlock(lock); + return -ENOMEM; + } + + return index; +} + +static void sync_cop(void *arg) +{ + struct mm_struct *mm = arg; + + if (mm == current->active_mm) + switch_cop(current->active_mm); +} + +/** + * Start using a coprocessor. + * @acop: mask of coprocessor to be used. + * @mm: The mm the coprocessor to associate with. Most likely current mm. + * + * Return a positive PID if successful. Negative errno otherwise. + * The returned PID will be fed to the coprocessor to determine if an + * icswx transaction is authenticated. + */ +int use_cop(unsigned long acop, struct mm_struct *mm) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_ICSWX)) + return -ENODEV; + + if (!mm || !acop) + return -EINVAL; + + /* We need to make sure mm_users doesn't change */ + down_read(&mm->mmap_sem); + spin_lock(mm->context.cop_lockp); + + if (mm->context.cop_pid == COP_PID_NONE) { + ret = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX, + &mmu_context_acop_lock); + if (ret < 0) + goto out; + + mm->context.cop_pid = ret; + } + mm->context.acop |= acop; + + sync_cop(mm); + + /* + * If this is a threaded process then there might be other threads + * running. We need to send an IPI to force them to pick up any + * change in PID and ACOP. + */ + if (atomic_read(&mm->mm_users) > 1) + smp_call_function(sync_cop, mm, 1); + + ret = mm->context.cop_pid; + +out: + spin_unlock(mm->context.cop_lockp); + up_read(&mm->mmap_sem); + + return ret; +} +EXPORT_SYMBOL_GPL(use_cop); + +/** + * Stop using a coprocessor. + * @acop: mask of coprocessor to be stopped. + * @mm: The mm the coprocessor associated with. + */ +void drop_cop(unsigned long acop, struct mm_struct *mm) +{ + int free_pid = COP_PID_NONE; + + if (!cpu_has_feature(CPU_FTR_ICSWX)) + return; + + if (WARN_ON_ONCE(!mm)) + return; + + /* We need to make sure mm_users doesn't change */ + down_read(&mm->mmap_sem); + spin_lock(mm->context.cop_lockp); + + mm->context.acop &= ~acop; + + if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) { + free_pid = mm->context.cop_pid; + mm->context.cop_pid = COP_PID_NONE; + } + + sync_cop(mm); + + /* + * If this is a threaded process then there might be other threads + * running. We need to send an IPI to force them to pick up any + * change in PID and ACOP. + */ + if (atomic_read(&mm->mm_users) > 1) + smp_call_function(sync_cop, mm, 1); + + if (free_pid != COP_PID_NONE) { + spin_lock(&mmu_context_acop_lock); + ida_remove(&cop_ida, free_pid); + spin_unlock(&mmu_context_acop_lock); + } + + spin_unlock(mm->context.cop_lockp); + up_read(&mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(drop_cop); + +#endif /* CONFIG_PPC_ICSWX */ + static DEFINE_SPINLOCK(mmu_context_lock); static DEFINE_IDA(mmu_context_ida); @@ -31,7 +227,6 @@ static DEFINE_IDA(mmu_context_ida); * Each segment contains 2^28 bytes. Each context maps 2^44 bytes, * so we can support 2^19-1 contexts (19 == 35 + 28 - 44). */ -#define NO_CONTEXT 0 #define MAX_CONTEXT ((1UL << 19) - 1) int __init_new_context(void) @@ -79,6 +274,16 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) slice_set_user_psize(mm, mmu_virtual_psize); subpage_prot_init_new_context(mm); mm->context.id = index; +#ifdef CONFIG_PPC_ICSWX + mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL); + if (!mm->context.cop_lockp) { + __destroy_context(index); + subpage_prot_free(mm); + mm->context.id = MMU_NO_CONTEXT; + return -ENOMEM; + } + spin_lock_init(mm->context.cop_lockp); +#endif /* CONFIG_PPC_ICSWX */ return 0; } @@ -93,7 +298,12 @@ EXPORT_SYMBOL_GPL(__destroy_context); void destroy_context(struct mm_struct *mm) { +#ifdef CONFIG_PPC_ICSWX + drop_cop(mm->context.acop, mm); + kfree(mm->context.cop_lockp); + mm->context.cop_lockp = NULL; +#endif /* CONFIG_PPC_ICSWX */ __destroy_context(mm->context.id); subpage_prot_free(mm); - mm->context.id = NO_CONTEXT; + mm->context.id = MMU_NO_CONTEXT; } diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index c0aab52da3a..336807de550 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -338,12 +338,14 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, return NOTIFY_OK; switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); break; #ifdef CONFIG_HOTPLUG_CPU + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); @@ -407,7 +409,17 @@ void __init mmu_context_init(void) } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { first_context = 1; last_context = 65535; - } else { + } else +#ifdef CONFIG_PPC_BOOK3E_MMU + if (mmu_has_feature(MMU_FTR_TYPE_3E)) { + u32 mmucfg = mfspr(SPRN_MMUCFG); + u32 pid_bits = (mmucfg & MMUCFG_PIDSIZE_MASK) + >> MMUCFG_PIDSIZE_SHIFT; + first_context = 1; + last_context = (1UL << (pid_bits + 1)) - 1; + } else +#endif + { first_context = 1; last_context = 255; } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 5ec1dad2a19..2164006fe17 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -311,14 +311,13 @@ EXPORT_SYMBOL_GPL(of_node_to_nid); static int __init find_min_common_depth(void) { int depth; - struct device_node *rtas_root; struct device_node *chosen; + struct device_node *root; const char *vec5; - rtas_root = of_find_node_by_path("/rtas"); - - if (!rtas_root) - return -1; + root = of_find_node_by_path("/rtas"); + if (!root) + root = of_find_node_by_path("/"); /* * This property is a set of 32-bit integers, each representing @@ -332,7 +331,7 @@ static int __init find_min_common_depth(void) * NUMA boundary and the following are progressively less significant * boundaries. There can be more than one level of NUMA. */ - distance_ref_points = of_get_property(rtas_root, + distance_ref_points = of_get_property(root, "ibm,associativity-reference-points", &distance_ref_points_depth); @@ -376,11 +375,11 @@ static int __init find_min_common_depth(void) distance_ref_points_depth = MAX_DISTANCE_REF_POINTS; } - of_node_put(rtas_root); + of_node_put(root); return depth; err: - of_node_put(rtas_root); + of_node_put(root); return -1; } @@ -1453,7 +1452,7 @@ int arch_update_cpu_topology(void) unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; struct sys_device *sysdev; - for_each_cpu_mask(cpu, cpu_associativity_changes_mask) { + for_each_cpu(cpu,&cpu_associativity_changes_mask) { vphn_get_associativity(cpu, associativity); nid = associativity_to_nid(associativity); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 8dc41c0157f..51f87956f8f 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -133,7 +133,15 @@ ioremap(phys_addr_t addr, unsigned long size) EXPORT_SYMBOL(ioremap); void __iomem * -ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) +ioremap_wc(phys_addr_t addr, unsigned long size) +{ + return __ioremap_caller(addr, size, _PAGE_NO_CACHE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wc); + +void __iomem * +ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) { /* writeable implies dirty for kernel addresses */ if (flags & _PAGE_RW) @@ -152,7 +160,7 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); } -EXPORT_SYMBOL(ioremap_flags); +EXPORT_SYMBOL(ioremap_prot); void __iomem * __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 88927a05cdc..6e595f6496d 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -255,7 +255,17 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size) return __ioremap_caller(addr, size, flags, caller); } -void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, +void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size) +{ + unsigned long flags = _PAGE_NO_CACHE; + void *caller = __builtin_return_address(0); + + if (ppc_md.ioremap) + return ppc_md.ioremap(addr, size, flags, caller); + return __ioremap_caller(addr, size, flags, caller); +} + +void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) { void *caller = __builtin_return_address(0); @@ -311,7 +321,8 @@ void iounmap(volatile void __iomem *token) } EXPORT_SYMBOL(ioremap); -EXPORT_SYMBOL(ioremap_flags); +EXPORT_SYMBOL(ioremap_wc); +EXPORT_SYMBOL(ioremap_prot); EXPORT_SYMBOL(__ioremap); EXPORT_SYMBOL(__ioremap_at); EXPORT_SYMBOL(iounmap); diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 1d98ecc8eec..e22276cb67a 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -24,6 +24,7 @@ #include <asm/firmware.h> #include <linux/compiler.h> #include <asm/udbg.h> +#include <asm/code-patching.h> extern void slb_allocate_realmode(unsigned long ea); @@ -166,7 +167,7 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2) int esid_1t_count; /* System is not 1T segment size capable. */ - if (!cpu_has_feature(CPU_FTR_1T_SEGMENT)) + if (!mmu_has_feature(MMU_FTR_1T_SEGMENT)) return (GET_ESID(addr1) == GET_ESID(addr2)); esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) + @@ -201,7 +202,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) */ hard_irq_disable(); offset = get_paca()->slb_cache_ptr; - if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && + if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) && offset <= SLB_CACHE_ENTRIES) { int i; asm volatile("isync" : : : "memory"); @@ -249,9 +250,8 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) static inline void patch_slb_encoding(unsigned int *insn_addr, unsigned int immed) { - *insn_addr = (*insn_addr & 0xffff0000) | immed; - flush_icache_range((unsigned long)insn_addr, 4+ - (unsigned long)insn_addr); + int insn = (*insn_addr & 0xffff0000) | immed; + patch_instruction(insn_addr, insn); } void slb_set_size(u16 size) diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 95ce3558169..ef653dc95b6 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -58,7 +58,7 @@ _GLOBAL(slb_miss_kernel_load_linear) li r11,0 BEGIN_FTR_SECTION b slb_finish_load -END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) b slb_finish_load_1T 1: @@ -87,7 +87,7 @@ _GLOBAL(slb_miss_kernel_load_vmemmap) 6: BEGIN_FTR_SECTION b slb_finish_load -END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) b slb_finish_load_1T 0: /* user address: proto-VSID = context << 15 | ESID. First check @@ -138,11 +138,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) ld r9,PACACONTEXTID(r13) BEGIN_FTR_SECTION cmpldi r10,0x1000 -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) rldimi r10,r9,USER_ESID_BITS,0 BEGIN_FTR_SECTION bge slb_finish_load_1T -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) b slb_finish_load 8: /* invalid EA */ diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 446a01842a7..41e31642a86 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -243,7 +243,7 @@ void __init stabs_alloc(void) { int cpu; - if (cpu_has_feature(CPU_FTR_SLB)) + if (mmu_has_feature(MMU_FTR_SLB)) return; for_each_possible_cpu(cpu) { diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c index aa46e9d1e77..19395f18b1d 100644 --- a/arch/powerpc/platforms/44x/iss4xx.c +++ b/arch/powerpc/platforms/44x/iss4xx.c @@ -87,7 +87,7 @@ static void __cpuinit smp_iss4xx_setup_cpu(int cpu) mpic_setup_this_cpu(); } -static void __cpuinit smp_iss4xx_kick_cpu(int cpu) +static int __cpuinit smp_iss4xx_kick_cpu(int cpu) { struct device_node *cpunode = of_get_cpu_node(cpu, NULL); const u64 *spin_table_addr_prop; @@ -104,7 +104,7 @@ static void __cpuinit smp_iss4xx_kick_cpu(int cpu) NULL); if (spin_table_addr_prop == NULL) { pr_err("CPU%d: Can't start, missing cpu-release-addr !\n", cpu); - return; + return -ENOENT; } /* Assume it's mapped as part of the linear mapping. This is a bit @@ -117,6 +117,8 @@ static void __cpuinit smp_iss4xx_kick_cpu(int cpu) smp_wmb(); spin_table[1] = __pa(start_secondary_47x); mb(); + + return 0; } static struct smp_ops_t iss_smp_ops = { diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c index cfc4b200998..9f09319352c 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -61,7 +61,7 @@ irq_to_pic_bit(unsigned int irq) static void cpld_mask_irq(struct irq_data *d) { - unsigned int cpld_irq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d); void __iomem *pic_mask = irq_to_pic_mask(cpld_irq); out_8(pic_mask, @@ -71,7 +71,7 @@ cpld_mask_irq(struct irq_data *d) static void cpld_unmask_irq(struct irq_data *d) { - unsigned int cpld_irq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d); void __iomem *pic_mask = irq_to_pic_mask(cpld_irq); out_8(pic_mask, @@ -97,7 +97,7 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp, status |= (ignore | mask); if (status == 0xff) - return NO_IRQ_IGNORE; + return NO_IRQ; cpld_irq = ffz(status) + offset; @@ -109,14 +109,14 @@ cpld_pic_cascade(unsigned int irq, struct irq_desc *desc) { irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status, &cpld_regs->pci_mask); - if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) { + if (irq != NO_IRQ) { generic_handle_irq(irq); return; } irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status, &cpld_regs->misc_mask); - if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) { + if (irq != NO_IRQ) { generic_handle_irq(irq); return; } diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 57a6a349e93..96f85e5e0cd 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -56,7 +56,7 @@ static void media5200_irq_unmask(struct irq_data *d) spin_lock_irqsave(&media5200_irq.lock, flags); val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE); - val |= 1 << (MEDIA5200_IRQ_SHIFT + irq_map[d->irq].hwirq); + val |= 1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d)); out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val); spin_unlock_irqrestore(&media5200_irq.lock, flags); } @@ -68,7 +68,7 @@ static void media5200_irq_mask(struct irq_data *d) spin_lock_irqsave(&media5200_irq.lock, flags); val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE); - val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irq_map[d->irq].hwirq)); + val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d))); out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val); spin_unlock_irqrestore(&media5200_irq.lock, flags); } diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index 1dd15400f6f..1a9a4957057 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -157,48 +157,30 @@ static inline void io_be_clrbit(u32 __iomem *addr, int bitno) */ static void mpc52xx_extirq_mask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_clrbit(&intr->ctrl, 11 - l2irq); } static void mpc52xx_extirq_unmask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_setbit(&intr->ctrl, 11 - l2irq); } static void mpc52xx_extirq_ack(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_setbit(&intr->ctrl, 27-l2irq); } static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type) { u32 ctrl_reg, type; - int irq; - int l2irq; + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; void *handler = handle_level_irq; - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - - pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__, irq, l2irq, flow_type); + pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__, + (int) irqd_to_hwirq(d), l2irq, flow_type); switch (flow_type) { case IRQF_TRIGGER_HIGH: type = 0; break; @@ -237,23 +219,13 @@ static int mpc52xx_null_set_type(struct irq_data *d, unsigned int flow_type) static void mpc52xx_main_mask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_setbit(&intr->main_mask, 16 - l2irq); } static void mpc52xx_main_unmask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_clrbit(&intr->main_mask, 16 - l2irq); } @@ -270,23 +242,13 @@ static struct irq_chip mpc52xx_main_irqchip = { */ static void mpc52xx_periph_mask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_setbit(&intr->per_mask, 31 - l2irq); } static void mpc52xx_periph_unmask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_clrbit(&intr->per_mask, 31 - l2irq); } @@ -303,34 +265,19 @@ static struct irq_chip mpc52xx_periph_irqchip = { */ static void mpc52xx_sdma_mask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_setbit(&sdma->IntMask, l2irq); } static void mpc52xx_sdma_unmask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_clrbit(&sdma->IntMask, l2irq); } static void mpc52xx_sdma_ack(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; out_be32(&sdma->IntPend, 1 << l2irq); } @@ -539,7 +486,7 @@ void __init mpc52xx_init_irq(void) unsigned int mpc52xx_get_irq(void) { u32 status; - int irq = NO_IRQ_IGNORE; + int irq; status = in_be32(&intr->enc_status); if (status & 0x00000400) { /* critical */ @@ -562,6 +509,8 @@ unsigned int mpc52xx_get_irq(void) } else { irq |= (MPC52xx_IRQ_L1_PERP << MPC52xx_IRQ_L1_OFFSET); } + } else { + return NO_IRQ; } return irq_linear_revmap(mpc52xx_irqhost, irq); diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c index 4a4eb6ffa12..8ccf9ed62fe 100644 --- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c @@ -42,7 +42,7 @@ struct pq2ads_pci_pic { static void pq2ads_pci_mask_irq(struct irq_data *d) { struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d); - int irq = NUM_IRQS - virq_to_hw(d->irq) - 1; + int irq = NUM_IRQS - irqd_to_hwirq(d) - 1; if (irq != -1) { unsigned long flags; @@ -58,7 +58,7 @@ static void pq2ads_pci_mask_irq(struct irq_data *d) static void pq2ads_pci_unmask_irq(struct irq_data *d) { struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d); - int irq = NUM_IRQS - virq_to_hw(d->irq) - 1; + int irq = NUM_IRQS - irqd_to_hwirq(d) - 1; if (irq != -1) { unsigned long flags; @@ -112,16 +112,8 @@ static int pci_pic_host_map(struct irq_host *h, unsigned int virq, return 0; } -static void pci_host_unmap(struct irq_host *h, unsigned int virq) -{ - /* remove chip and handler */ - irq_set_chip_data(virq, NULL); - irq_set_chip(virq, NULL); -} - static struct irq_host_ops pci_pic_host_ops = { .map = pci_pic_host_map, - .unmap = pci_host_unmap, }; int __init pq2ads_pci_init_irq(void) diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 0d00ff9d05a..d6a93a10c0f 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -41,7 +41,7 @@ extern void __early_start(void); #define NUM_BOOT_ENTRY 8 #define SIZE_BOOT_ENTRY (NUM_BOOT_ENTRY * sizeof(u32)) -static void __init +static int __init smp_85xx_kick_cpu(int nr) { unsigned long flags; @@ -60,7 +60,7 @@ smp_85xx_kick_cpu(int nr) if (cpu_rel_addr == NULL) { printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr); - return; + return -ENOENT; } /* @@ -107,6 +107,8 @@ smp_85xx_kick_cpu(int nr) iounmap(bptr_vaddr); pr_debug("waited %d msecs for CPU #%d.\n", n, nr); + + return 0; } static void __init @@ -233,8 +235,10 @@ void __init mpc85xx_smp_init(void) smp_85xx_ops.message_pass = smp_mpic_message_pass; } - if (cpu_has_feature(CPU_FTR_DBELL)) - smp_85xx_ops.message_pass = doorbell_message_pass; + if (cpu_has_feature(CPU_FTR_DBELL)) { + smp_85xx_ops.message_pass = smp_muxed_ipi_message_pass; + smp_85xx_ops.cause_ipi = doorbell_cause_ipi; + } BUG_ON(!smp_85xx_ops.message_pass); diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index db864623b4a..12cb9bb2cc6 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -48,8 +48,6 @@ static struct socrates_fpga_irq_info fpga_irqs[SOCRATES_FPGA_NUM_IRQS] = { [8] = {0, IRQ_TYPE_LEVEL_HIGH}, }; -#define socrates_fpga_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq) - static DEFINE_RAW_SPINLOCK(socrates_fpga_pic_lock); static void __iomem *socrates_fpga_pic_iobase; @@ -110,11 +108,9 @@ void socrates_fpga_pic_cascade(unsigned int irq, struct irq_desc *desc) static void socrates_fpga_pic_ack(struct irq_data *d) { unsigned long flags; - unsigned int hwirq, irq_line; + unsigned int irq_line, hwirq = irqd_to_hwirq(d); uint32_t mask; - hwirq = socrates_fpga_irq_to_hw(d->irq); - irq_line = fpga_irqs[hwirq].irq_line; raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) @@ -127,12 +123,10 @@ static void socrates_fpga_pic_ack(struct irq_data *d) static void socrates_fpga_pic_mask(struct irq_data *d) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); int irq_line; u32 mask; - hwirq = socrates_fpga_irq_to_hw(d->irq); - irq_line = fpga_irqs[hwirq].irq_line; raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) @@ -145,12 +139,10 @@ static void socrates_fpga_pic_mask(struct irq_data *d) static void socrates_fpga_pic_mask_ack(struct irq_data *d) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); int irq_line; u32 mask; - hwirq = socrates_fpga_irq_to_hw(d->irq); - irq_line = fpga_irqs[hwirq].irq_line; raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) @@ -164,12 +156,10 @@ static void socrates_fpga_pic_mask_ack(struct irq_data *d) static void socrates_fpga_pic_unmask(struct irq_data *d) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); int irq_line; u32 mask; - hwirq = socrates_fpga_irq_to_hw(d->irq); - irq_line = fpga_irqs[hwirq].irq_line; raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) @@ -182,12 +172,10 @@ static void socrates_fpga_pic_unmask(struct irq_data *d) static void socrates_fpga_pic_eoi(struct irq_data *d) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); int irq_line; u32 mask; - hwirq = socrates_fpga_irq_to_hw(d->irq); - irq_line = fpga_irqs[hwirq].irq_line; raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) @@ -201,12 +189,10 @@ static int socrates_fpga_pic_set_type(struct irq_data *d, unsigned int flow_type) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); int polarity; u32 mask; - hwirq = socrates_fpga_irq_to_hw(d->irq); - if (fpga_irqs[hwirq].type != IRQ_TYPE_NONE) return -EINVAL; diff --git a/arch/powerpc/platforms/86xx/gef_pic.c b/arch/powerpc/platforms/86xx/gef_pic.c index 0beec7d5566..94594e58594 100644 --- a/arch/powerpc/platforms/86xx/gef_pic.c +++ b/arch/powerpc/platforms/86xx/gef_pic.c @@ -46,8 +46,6 @@ #define GEF_PIC_CPU0_MCP_MASK GEF_PIC_MCP_MASK(0) #define GEF_PIC_CPU1_MCP_MASK GEF_PIC_MCP_MASK(1) -#define gef_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq) - static DEFINE_RAW_SPINLOCK(gef_pic_lock); @@ -113,11 +111,9 @@ void gef_pic_cascade(unsigned int irq, struct irq_desc *desc) static void gef_pic_mask(struct irq_data *d) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); u32 mask; - hwirq = gef_irq_to_hw(d->irq); - raw_spin_lock_irqsave(&gef_pic_lock, flags); mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0)); mask &= ~(1 << hwirq); @@ -136,11 +132,9 @@ static void gef_pic_mask_ack(struct irq_data *d) static void gef_pic_unmask(struct irq_data *d) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); u32 mask; - hwirq = gef_irq_to_hw(d->irq); - raw_spin_lock_irqsave(&gef_pic_lock, flags); mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0)); mask |= (1 << hwirq); diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c index 018cc67be42..a896511690c 100644 --- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c +++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c @@ -66,7 +66,7 @@ static void __init mpc8610_suspend_init(void) return; } - ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9/wakeup", NULL); + ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9:wakeup", NULL); if (ret) { pr_err("%s: can't request pixis event IRQ: %d\n", __func__, ret); @@ -105,45 +105,77 @@ machine_device_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices); #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) -static u32 get_busfreq(void) -{ - struct device_node *node; - - u32 fs_busfreq = 0; - node = of_find_node_by_type(NULL, "cpu"); - if (node) { - unsigned int size; - const unsigned int *prop = - of_get_property(node, "bus-frequency", &size); - if (prop) - fs_busfreq = *prop; - of_node_put(node); - }; - return fs_busfreq; -} +/* + * DIU Area Descriptor + * + * The MPC8610 reference manual shows the bits of the AD register in + * little-endian order, which causes the BLUE_C field to be split into two + * parts. To simplify the definition of the MAKE_AD() macro, we define the + * fields in big-endian order and byte-swap the result. + * + * So even though the registers don't look like they're in the + * same bit positions as they are on the P1022, the same value is written to + * the AD register on the MPC8610 and on the P1022. + */ +#define AD_BYTE_F 0x10000000 +#define AD_ALPHA_C_MASK 0x0E000000 +#define AD_ALPHA_C_SHIFT 25 +#define AD_BLUE_C_MASK 0x01800000 +#define AD_BLUE_C_SHIFT 23 +#define AD_GREEN_C_MASK 0x00600000 +#define AD_GREEN_C_SHIFT 21 +#define AD_RED_C_MASK 0x00180000 +#define AD_RED_C_SHIFT 19 +#define AD_PALETTE 0x00040000 +#define AD_PIXEL_S_MASK 0x00030000 +#define AD_PIXEL_S_SHIFT 16 +#define AD_COMP_3_MASK 0x0000F000 +#define AD_COMP_3_SHIFT 12 +#define AD_COMP_2_MASK 0x00000F00 +#define AD_COMP_2_SHIFT 8 +#define AD_COMP_1_MASK 0x000000F0 +#define AD_COMP_1_SHIFT 4 +#define AD_COMP_0_MASK 0x0000000F +#define AD_COMP_0_SHIFT 0 + +#define MAKE_AD(alpha, red, blue, green, size, c0, c1, c2, c3) \ + cpu_to_le32(AD_BYTE_F | (alpha << AD_ALPHA_C_SHIFT) | \ + (blue << AD_BLUE_C_SHIFT) | (green << AD_GREEN_C_SHIFT) | \ + (red << AD_RED_C_SHIFT) | (c3 << AD_COMP_3_SHIFT) | \ + (c2 << AD_COMP_2_SHIFT) | (c1 << AD_COMP_1_SHIFT) | \ + (c0 << AD_COMP_0_SHIFT) | (size << AD_PIXEL_S_SHIFT)) unsigned int mpc8610hpcd_get_pixel_format(unsigned int bits_per_pixel, int monitor_port) { static const unsigned long pixelformat[][3] = { - {0x88882317, 0x88083218, 0x65052119}, - {0x88883316, 0x88082219, 0x65053118}, + { + MAKE_AD(3, 0, 2, 1, 3, 8, 8, 8, 8), + MAKE_AD(4, 2, 0, 1, 2, 8, 8, 8, 0), + MAKE_AD(4, 0, 2, 1, 1, 5, 6, 5, 0) + }, + { + MAKE_AD(3, 2, 0, 1, 3, 8, 8, 8, 8), + MAKE_AD(4, 0, 2, 1, 2, 8, 8, 8, 0), + MAKE_AD(4, 2, 0, 1, 1, 5, 6, 5, 0) + }, }; - unsigned int pix_fmt, arch_monitor; + unsigned int arch_monitor; + /* The DVI port is mis-wired on revision 1 of this board. */ arch_monitor = ((*pixis_arch == 0x01) && (monitor_port == 0))? 0 : 1; - /* DVI port for board version 0x01 */ - - if (bits_per_pixel == 32) - pix_fmt = pixelformat[arch_monitor][0]; - else if (bits_per_pixel == 24) - pix_fmt = pixelformat[arch_monitor][1]; - else if (bits_per_pixel == 16) - pix_fmt = pixelformat[arch_monitor][2]; - else - pix_fmt = pixelformat[1][0]; - - return pix_fmt; + + switch (bits_per_pixel) { + case 32: + return pixelformat[arch_monitor][0]; + case 24: + return pixelformat[arch_monitor][1]; + case 16: + return pixelformat[arch_monitor][2]; + default: + pr_err("fsl-diu: unsupported pixel depth %u\n", bits_per_pixel); + return 0; + } } void mpc8610hpcd_set_gamma_table(int monitor_port, char *gamma_table_base) @@ -190,8 +222,7 @@ void mpc8610hpcd_set_pixel_clock(unsigned int pixclock) } /* Pixel Clock configuration */ - pr_debug("DIU: Bus Frequency = %d\n", get_busfreq()); - speed_ccb = get_busfreq(); + speed_ccb = fsl_get_sys_freq(); /* Calculate the pixel clock with the smallest error */ /* calculate the following in steps to avoid overflow */ diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index eacea0e3fcc..af09baee22c 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -56,7 +56,7 @@ smp_86xx_release_core(int nr) } -static void __init +static int __init smp_86xx_kick_cpu(int nr) { unsigned int save_vector; @@ -65,7 +65,7 @@ smp_86xx_kick_cpu(int nr) unsigned int *vector = (unsigned int *)(KERNELBASE + 0x100); if (nr < 0 || nr >= NR_CPUS) - return; + return -ENOENT; pr_debug("smp_86xx_kick_cpu: kick CPU #%d\n", nr); @@ -92,6 +92,8 @@ smp_86xx_kick_cpu(int nr) local_irq_restore(flags); pr_debug("wait CPU #%d for %d msecs.\n", nr, n); + + return 0; } diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 9ecce995dd4..1e121088826 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -150,7 +150,7 @@ void __init mpc8xx_calibrate_decr(void) */ cpu = of_find_node_by_type(NULL, "cpu"); virq= irq_of_parse_and_map(cpu, 0); - irq = irq_map[virq].hwirq; + irq = virq_to_hw(virq); sys_tmr2 = immr_map(im_sit); out_be16(&sys_tmr2->sit_tbscr, ((1 << (7 - (irq/2))) << 8) | diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index f7b07720aa3..f970ca2b180 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -20,6 +20,7 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig" source "arch/powerpc/platforms/44x/Kconfig" source "arch/powerpc/platforms/40x/Kconfig" source "arch/powerpc/platforms/amigaone/Kconfig" +source "arch/powerpc/platforms/wsp/Kconfig" config KVM_GUEST bool "KVM Guest support" @@ -56,16 +57,19 @@ config UDBG_RTAS_CONSOLE depends on PPC_RTAS default n +config PPC_SMP_MUXED_IPI + bool + help + Select this opton if your platform supports SMP and your + interrupt controller provides less than 4 interrupts to each + cpu. This will enable the generic code to multiplex the 4 + messages on to one ipi. + config PPC_UDBG_BEAT bool "BEAT based debug console" depends on PPC_CELLEB default n -config XICS - depends on PPC_PSERIES - bool - default y - config IPIC bool default n @@ -147,14 +151,27 @@ config PPC_970_NAP bool default n +config PPC_P7_NAP + bool + default n + config PPC_INDIRECT_IO bool select GENERIC_IOMAP - default n + +config PPC_INDIRECT_PIO + bool + select PPC_INDIRECT_IO + +config PPC_INDIRECT_MMIO + bool + select PPC_INDIRECT_IO + +config PPC_IO_WORKAROUNDS + bool config GENERIC_IOMAP bool - default n source "drivers/cpufreq/Kconfig" diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 111138c55f9..2165b65876f 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -73,6 +73,7 @@ config PPC_BOOK3S_64 config PPC_BOOK3E_64 bool "Embedded processors" select PPC_FPU # Make it a choice ? + select PPC_SMP_MUXED_IPI endchoice @@ -107,6 +108,10 @@ config POWER4 depends on PPC64 && PPC_BOOK3S def_bool y +config PPC_A2 + bool + depends on PPC_BOOK3E_64 + config TUNE_CELL bool "Optimize for Cell Broadband Engine" depends on PPC64 && PPC_BOOK3S @@ -174,6 +179,7 @@ config FSL_BOOKE config PPC_FSL_BOOK3E bool select FSL_EMB_PERFMON + select PPC_SMP_MUXED_IPI default y if FSL_BOOKE config PTE_64BIT @@ -226,6 +232,24 @@ config VSX If in doubt, say Y here. +config PPC_ICSWX + bool "Support for PowerPC icswx coprocessor instruction" + depends on POWER4 + default n + ---help--- + + This option enables kernel support for the PowerPC Initiate + Coprocessor Store Word (icswx) coprocessor instruction on POWER7 + or newer processors. + + This option is only useful if you have a processor that supports + the icswx coprocessor instruction. It does not have any effect + on processors without the icswx coprocessor instruction. + + This option slightly increases kernel memory usage. + + If in doubt, say N here. + config SPE bool "SPE Support" depends on E200 || (E500 && !PPC_E500MC) diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index fdb9f0b0d7a..73e2116cfee 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -22,3 +22,4 @@ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ obj-$(CONFIG_AMIGAONE) += amigaone/ +obj-$(CONFIG_PPC_WSP) += wsp/ diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 81239ebed83..67d5009b4e8 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -6,7 +6,8 @@ config PPC_CELL_COMMON bool select PPC_CELL select PPC_DCR_MMIO - select PPC_INDIRECT_IO + select PPC_INDIRECT_PIO + select PPC_INDIRECT_MMIO select PPC_NATIVE select PPC_RTAS select IRQ_EDGE_EOI_HANDLER @@ -15,6 +16,7 @@ config PPC_CELL_NATIVE bool select PPC_CELL_COMMON select MPIC + select PPC_IO_WORKAROUNDS select IBM_NEW_EMAC_EMAC4 select IBM_NEW_EMAC_RGMII select IBM_NEW_EMAC_ZMII #test only diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index 83fafe92264..a4a89350bcf 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_PPC_CELL_COMMON) += cbe_regs.o interrupt.o pervasive.o obj-$(CONFIG_PPC_CELL_NATIVE) += iommu.o setup.o spider-pic.o \ - pmu.o io-workarounds.o spider-pci.o + pmu.o spider-pci.o obj-$(CONFIG_CBE_RAS) += ras.o obj-$(CONFIG_CBE_THERM) += cbe_thermal.o @@ -39,11 +39,10 @@ obj-y += celleb_setup.o \ celleb_pci.o celleb_scc_epci.o \ celleb_scc_pciex.o \ celleb_scc_uhc.o \ - io-workarounds.o spider-pci.o \ - beat.o beat_htab.o beat_hvCall.o \ - beat_interrupt.o beat_iommu.o + spider-pci.o beat.o beat_htab.o \ + beat_hvCall.o beat_interrupt.o \ + beat_iommu.o -obj-$(CONFIG_SMP) += beat_smp.o obj-$(CONFIG_PPC_UDBG_BEAT) += beat_udbg.o obj-$(CONFIG_SERIAL_TXX9) += celleb_scc_sio.o obj-$(CONFIG_SPU_BASE) += beat_spu_priv1.o diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index bb5ebf8fa80..ac06903e136 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -113,7 +113,7 @@ static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) pr_devel("axon_msi: woff %x roff %x msi %x\n", write_offset, msic->read_offset, msi); - if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) { + if (msi < NR_IRQS && irq_get_chip_data(msi) == msic) { generic_handle_irq(msi); msic->fifo_virt[idx] = cpu_to_le32(0xffffffff); } else { @@ -320,6 +320,7 @@ static struct irq_chip msic_irq_chip = { static int msic_host_map(struct irq_host *h, unsigned int virq, irq_hw_number_t hw) { + irq_set_chip_data(virq, h->host_data); irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq); return 0; diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c index 4cb9e147c30..55015e1f693 100644 --- a/arch/powerpc/platforms/cell/beat_interrupt.c +++ b/arch/powerpc/platforms/cell/beat_interrupt.c @@ -148,16 +148,6 @@ static int beatic_pic_host_map(struct irq_host *h, unsigned int virq, } /* - * Update binding hardware IRQ number (hw) and Virtuql - * IRQ number (virq). This is called only once for a given mapping. - */ -static void beatic_pic_host_remap(struct irq_host *h, unsigned int virq, - irq_hw_number_t hw) -{ - beat_construct_and_connect_irq_plug(virq, hw); -} - -/* * Translate device-tree interrupt spec to irq_hw_number_t style (ulong), * to pass away to irq_create_mapping(). * @@ -184,7 +174,6 @@ static int beatic_pic_host_match(struct irq_host *h, struct device_node *np) static struct irq_host_ops beatic_pic_host_ops = { .map = beatic_pic_host_map, - .remap = beatic_pic_host_remap, .unmap = beatic_pic_host_unmap, .xlate = beatic_pic_host_xlate, .match = beatic_pic_host_match, @@ -257,22 +246,6 @@ void __init beatic_init_IRQ(void) irq_set_default_host(beatic_host); } -#ifdef CONFIG_SMP - -/* Nullified to compile with SMP mode */ -void beatic_setup_cpu(int cpu) -{ -} - -void beatic_cause_IPI(int cpu, int mesg) -{ -} - -void beatic_request_IPIs(void) -{ -} -#endif /* CONFIG_SMP */ - void beatic_deinit_IRQ(void) { int i; diff --git a/arch/powerpc/platforms/cell/beat_interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h index b470fd0051f..a7e52f91a07 100644 --- a/arch/powerpc/platforms/cell/beat_interrupt.h +++ b/arch/powerpc/platforms/cell/beat_interrupt.h @@ -24,9 +24,6 @@ extern void beatic_init_IRQ(void); extern unsigned int beatic_get_irq(void); -extern void beatic_cause_IPI(int cpu, int mesg); -extern void beatic_request_IPIs(void); -extern void beatic_setup_cpu(int); extern void beatic_deinit_IRQ(void); #endif diff --git a/arch/powerpc/platforms/cell/beat_smp.c b/arch/powerpc/platforms/cell/beat_smp.c deleted file mode 100644 index 26efc204c47..00000000000 --- a/arch/powerpc/platforms/cell/beat_smp.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * SMP support for Celleb platform. (Incomplete) - * - * (C) Copyright 2006 TOSHIBA CORPORATION - * - * This code is based on arch/powerpc/platforms/cell/smp.c: - * Dave Engebretsen, Peter Bergner, and - * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com - * Plus various changes from other IBM teams... - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#undef DEBUG - -#include <linux/kernel.h> -#include <linux/smp.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/threads.h> -#include <linux/cpu.h> - -#include <asm/irq.h> -#include <asm/smp.h> -#include <asm/machdep.h> -#include <asm/udbg.h> - -#include "beat_interrupt.h" - -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - -/* - * The primary thread of each non-boot processor is recorded here before - * smp init. - */ -/* static cpumask_t of_spin_map; */ - -/** - * smp_startup_cpu() - start the given cpu - * - * At boot time, there is nothing to do for primary threads which were - * started from Open Firmware. For anything else, call RTAS with the - * appropriate start location. - * - * Returns: - * 0 - failure - * 1 - success - */ -static inline int __devinit smp_startup_cpu(unsigned int lcpu) -{ - return 0; -} - -static void smp_beatic_message_pass(int target, int msg) -{ - unsigned int i; - - if (target < NR_CPUS) { - beatic_cause_IPI(target, msg); - } else { - for_each_online_cpu(i) { - if (target == MSG_ALL_BUT_SELF - && i == smp_processor_id()) - continue; - beatic_cause_IPI(i, msg); - } - } -} - -static int __init smp_beatic_probe(void) -{ - return cpus_weight(cpu_possible_map); -} - -static void __devinit smp_beatic_setup_cpu(int cpu) -{ - beatic_setup_cpu(cpu); -} - -static void __devinit smp_celleb_kick_cpu(int nr) -{ - BUG_ON(nr < 0 || nr >= NR_CPUS); - - if (!smp_startup_cpu(nr)) - return; -} - -static int smp_celleb_cpu_bootable(unsigned int nr) -{ - return 1; -} -static struct smp_ops_t bpa_beatic_smp_ops = { - .message_pass = smp_beatic_message_pass, - .probe = smp_beatic_probe, - .kick_cpu = smp_celleb_kick_cpu, - .setup_cpu = smp_beatic_setup_cpu, - .cpu_bootable = smp_celleb_cpu_bootable, -}; - -/* This is called very early */ -void __init smp_init_celleb(void) -{ - DBG(" -> smp_init_celleb()\n"); - - smp_ops = &bpa_beatic_smp_ops; - - DBG(" <- smp_init_celleb()\n"); -} diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index dbc338f187a..f3917e7a5b4 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -45,8 +45,8 @@ static struct cbe_thread_map unsigned int cbe_id; } cbe_thread_map[NR_CPUS]; -static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = CPU_MASK_NONE }; -static cpumask_t cbe_first_online_cpu = CPU_MASK_NONE; +static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} }; +static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE }; static struct cbe_regs_map *cbe_find_map(struct device_node *np) { @@ -159,7 +159,8 @@ EXPORT_SYMBOL_GPL(cbe_cpu_to_node); u32 cbe_node_to_cpu(int node) { - return find_first_bit( (unsigned long *) &cbe_local_mask[node], sizeof(cpumask_t)); + return cpumask_first(&cbe_local_mask[node]); + } EXPORT_SYMBOL_GPL(cbe_node_to_cpu); @@ -268,9 +269,9 @@ void __init cbe_regs_init(void) thread->regs = map; thread->cbe_id = cbe_id; map->be_node = thread->be_node; - cpu_set(i, cbe_local_mask[cbe_id]); + cpumask_set_cpu(i, &cbe_local_mask[cbe_id]); if(thread->thread_id == 0) - cpu_set(i, cbe_first_online_cpu); + cpumask_set_cpu(i, &cbe_first_online_cpu); } } diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c index 404d1fc04d5..5822141aa63 100644 --- a/arch/powerpc/platforms/cell/celleb_pci.c +++ b/arch/powerpc/platforms/cell/celleb_pci.c @@ -41,7 +41,6 @@ #include <asm/pci-bridge.h> #include <asm/ppc-pci.h> -#include "io-workarounds.h" #include "celleb_pci.h" #define MAX_PCI_DEVICES 32 @@ -320,7 +319,7 @@ static int __init celleb_setup_fake_pci_device(struct device_node *node, size = 256; config = &private->fake_config[devno][fn]; - *config = alloc_maybe_bootmem(size, GFP_KERNEL); + *config = zalloc_maybe_bootmem(size, GFP_KERNEL); if (*config == NULL) { printk(KERN_ERR "PCI: " "not enough memory for fake configuration space\n"); @@ -331,7 +330,7 @@ static int __init celleb_setup_fake_pci_device(struct device_node *node, size = sizeof(struct celleb_pci_resource); res = &private->res[devno][fn]; - *res = alloc_maybe_bootmem(size, GFP_KERNEL); + *res = zalloc_maybe_bootmem(size, GFP_KERNEL); if (*res == NULL) { printk(KERN_ERR "PCI: not enough memory for resource data space\n"); @@ -432,7 +431,7 @@ static int __init phb_set_bus_ranges(struct device_node *dev, static void __init celleb_alloc_private_mem(struct pci_controller *hose) { hose->private_data = - alloc_maybe_bootmem(sizeof(struct celleb_pci_private), + zalloc_maybe_bootmem(sizeof(struct celleb_pci_private), GFP_KERNEL); } @@ -469,18 +468,6 @@ static struct of_device_id celleb_phb_match[] __initdata = { }, }; -static int __init celleb_io_workaround_init(struct pci_controller *phb, - struct celleb_phb_spec *phb_spec) -{ - if (phb_spec->ops) { - iowa_register_bus(phb, phb_spec->ops, phb_spec->iowa_init, - phb_spec->iowa_data); - io_workaround_init(); - } - - return 0; -} - int __init celleb_setup_phb(struct pci_controller *phb) { struct device_node *dev = phb->dn; @@ -500,7 +487,11 @@ int __init celleb_setup_phb(struct pci_controller *phb) if (rc) return 1; - return celleb_io_workaround_init(phb, phb_spec); + if (phb_spec->ops) + iowa_register_bus(phb, phb_spec->ops, + phb_spec->iowa_init, + phb_spec->iowa_data); + return 0; } int celleb_pci_probe_mode(struct pci_bus *bus) diff --git a/arch/powerpc/platforms/cell/celleb_pci.h b/arch/powerpc/platforms/cell/celleb_pci.h index 4cba1523ec5..a801fcc5f38 100644 --- a/arch/powerpc/platforms/cell/celleb_pci.h +++ b/arch/powerpc/platforms/cell/celleb_pci.h @@ -26,8 +26,9 @@ #include <asm/pci-bridge.h> #include <asm/prom.h> #include <asm/ppc-pci.h> +#include <asm/io-workarounds.h> -#include "io-workarounds.h" +struct iowa_bus; struct celleb_phb_spec { int (*setup)(struct device_node *, struct pci_controller *); diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c index e5384557977..d58d9bae4b9 100644 --- a/arch/powerpc/platforms/cell/celleb_setup.c +++ b/arch/powerpc/platforms/cell/celleb_setup.c @@ -128,10 +128,6 @@ static void __init celleb_setup_arch_beat(void) spu_management_ops = &spu_management_of_ops; #endif -#ifdef CONFIG_SMP - smp_init_celleb(); -#endif - celleb_setup_arch_common(); } diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 44cfd1bef89..449c08c1586 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -196,8 +196,20 @@ static irqreturn_t iic_ipi_action(int irq, void *dev_id) { int ipi = (int)(long)dev_id; - smp_message_recv(ipi); - + switch(ipi) { + case PPC_MSG_CALL_FUNCTION: + generic_smp_call_function_interrupt(); + break; + case PPC_MSG_RESCHEDULE: + scheduler_ipi(); + break; + case PPC_MSG_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; + case PPC_MSG_DEBUGGER_BREAK: + debug_ipi_action(0, NULL); + break; + } return IRQ_HANDLED; } static void iic_request_ipi(int ipi, const char *name) diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c index d31c594cfdf..51e290126bc 100644 --- a/arch/powerpc/platforms/cell/qpace_setup.c +++ b/arch/powerpc/platforms/cell/qpace_setup.c @@ -42,7 +42,6 @@ #include "interrupt.h" #include "pervasive.h" #include "ras.h" -#include "io-workarounds.h" static void qpace_show_cpuinfo(struct seq_file *m) { diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index fd57bfe00ed..c73cf4c43fc 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -51,11 +51,11 @@ #include <asm/udbg.h> #include <asm/mpic.h> #include <asm/cell-regs.h> +#include <asm/io-workarounds.h> #include "interrupt.h" #include "pervasive.h" #include "ras.h" -#include "io-workarounds.h" #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -136,8 +136,6 @@ static int __devinit cell_setup_phb(struct pci_controller *phb) iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init, (void *)SPIDER_PCI_REG_BASE); - io_workaround_init(); - return 0; } diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index f774530075b..d176e6148e3 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -77,7 +77,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) unsigned int pcpu; int start_cpu; - if (cpu_isset(lcpu, of_spin_map)) + if (cpumask_test_cpu(lcpu, &of_spin_map)) /* Already started by OF and sitting in spin loop */ return 1; @@ -103,27 +103,11 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) return 1; } -static void smp_iic_message_pass(int target, int msg) -{ - unsigned int i; - - if (target < NR_CPUS) { - iic_cause_IPI(target, msg); - } else { - for_each_online_cpu(i) { - if (target == MSG_ALL_BUT_SELF - && i == smp_processor_id()) - continue; - iic_cause_IPI(i, msg); - } - } -} - static int __init smp_iic_probe(void) { iic_request_IPIs(); - return cpus_weight(cpu_possible_map); + return cpumask_weight(cpu_possible_mask); } static void __devinit smp_cell_setup_cpu(int cpu) @@ -137,12 +121,12 @@ static void __devinit smp_cell_setup_cpu(int cpu) mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER); } -static void __devinit smp_cell_kick_cpu(int nr) +static int __devinit smp_cell_kick_cpu(int nr) { BUG_ON(nr < 0 || nr >= NR_CPUS); if (!smp_startup_cpu(nr)) - return; + return -ENOENT; /* * The processor is currently spinning, waiting for the @@ -150,6 +134,8 @@ static void __devinit smp_cell_kick_cpu(int nr) * the processor will continue on to secondary_start */ paca[nr].cpu_start = 1; + + return 0; } static int smp_cell_cpu_bootable(unsigned int nr) @@ -166,7 +152,7 @@ static int smp_cell_cpu_bootable(unsigned int nr) return 1; } static struct smp_ops_t bpa_iic_smp_ops = { - .message_pass = smp_iic_message_pass, + .message_pass = iic_cause_IPI, .probe = smp_iic_probe, .kick_cpu = smp_cell_kick_cpu, .setup_cpu = smp_cell_setup_cpu, @@ -186,13 +172,12 @@ void __init smp_init_cell(void) if (cpu_has_feature(CPU_FTR_SMT)) { for_each_present_cpu(i) { if (cpu_thread_in_core(i) == 0) - cpu_set(i, of_spin_map); + cpumask_set_cpu(i, &of_spin_map); } - } else { - of_spin_map = cpu_present_map; - } + } else + cpumask_copy(&of_spin_map, cpu_present_mask); - cpu_clear(boot_cpuid, of_spin_map); + cpumask_clear_cpu(boot_cpuid, &of_spin_map); /* Non-lpar has additional take/give timebase */ if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c index ca7731c0b59..f1f7878893f 100644 --- a/arch/powerpc/platforms/cell/spider-pci.c +++ b/arch/powerpc/platforms/cell/spider-pci.c @@ -27,8 +27,7 @@ #include <asm/ppc-pci.h> #include <asm/pci-bridge.h> - -#include "io-workarounds.h" +#include <asm/io-workarounds.h> #define SPIDER_PCI_DISABLE_PREFETCH diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index c5cf50e6b45..442c28c00f8 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -68,9 +68,9 @@ struct spider_pic { }; static struct spider_pic spider_pics[SPIDER_CHIP_COUNT]; -static struct spider_pic *spider_virq_to_pic(unsigned int virq) +static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d) { - return irq_map[virq].host->host_data; + return irq_data_get_irq_chip_data(d); } static void __iomem *spider_get_irq_config(struct spider_pic *pic, @@ -81,24 +81,24 @@ static void __iomem *spider_get_irq_config(struct spider_pic *pic, static void spider_unmask_irq(struct irq_data *d) { - struct spider_pic *pic = spider_virq_to_pic(d->irq); - void __iomem *cfg = spider_get_irq_config(pic, irq_map[d->irq].hwirq); + struct spider_pic *pic = spider_irq_data_to_pic(d); + void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d)); out_be32(cfg, in_be32(cfg) | 0x30000000u); } static void spider_mask_irq(struct irq_data *d) { - struct spider_pic *pic = spider_virq_to_pic(d->irq); - void __iomem *cfg = spider_get_irq_config(pic, irq_map[d->irq].hwirq); + struct spider_pic *pic = spider_irq_data_to_pic(d); + void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d)); out_be32(cfg, in_be32(cfg) & ~0x30000000u); } static void spider_ack_irq(struct irq_data *d) { - struct spider_pic *pic = spider_virq_to_pic(d->irq); - unsigned int src = irq_map[d->irq].hwirq; + struct spider_pic *pic = spider_irq_data_to_pic(d); + unsigned int src = irqd_to_hwirq(d); /* Reset edge detection logic if necessary */ @@ -116,8 +116,8 @@ static void spider_ack_irq(struct irq_data *d) static int spider_set_irq_type(struct irq_data *d, unsigned int type) { unsigned int sense = type & IRQ_TYPE_SENSE_MASK; - struct spider_pic *pic = spider_virq_to_pic(d->irq); - unsigned int hw = irq_map[d->irq].hwirq; + struct spider_pic *pic = spider_irq_data_to_pic(d); + unsigned int hw = irqd_to_hwirq(d); void __iomem *cfg = spider_get_irq_config(pic, hw); u32 old_mask; u32 ic; @@ -171,6 +171,7 @@ static struct irq_chip spider_pic = { static int spider_host_map(struct irq_host *h, unsigned int virq, irq_hw_number_t hw) { + irq_set_chip_data(virq, h->host_data); irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq); /* Set default irq type */ diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 65203857b0c..32cb4e66d2c 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -141,7 +141,7 @@ void __spu_update_sched_info(struct spu_context *ctx) * runqueue. The context will be rescheduled on the proper node * if it is timesliced or preempted. */ - ctx->cpus_allowed = current->cpus_allowed; + cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current)); /* Save the current cpu id for spu interrupt routing. */ ctx->last_ran = raw_smp_processor_id(); diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c index 02cafecc90e..a800122e4dd 100644 --- a/arch/powerpc/platforms/chrp/smp.c +++ b/arch/powerpc/platforms/chrp/smp.c @@ -30,10 +30,12 @@ #include <asm/mpic.h> #include <asm/rtas.h> -static void __devinit smp_chrp_kick_cpu(int nr) +static int __devinit smp_chrp_kick_cpu(int nr) { *(unsigned long *)KERNELBASE = nr; asm volatile("dcbf 0,%0"::"r"(KERNELBASE):"memory"); + + return 0; } static void __devinit smp_chrp_setup_cpu(int cpu_nr) diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c index 12aa62b6f22..f61a2dd96b9 100644 --- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c +++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c @@ -48,7 +48,7 @@ static void flipper_pic_mask_and_ack(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); u32 mask = 1 << irq; @@ -59,7 +59,7 @@ static void flipper_pic_mask_and_ack(struct irq_data *d) static void flipper_pic_ack(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); /* this is at least needed for RSW */ @@ -68,7 +68,7 @@ static void flipper_pic_ack(struct irq_data *d) static void flipper_pic_mask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); clrbits32(io_base + FLIPPER_IMR, 1 << irq); @@ -76,7 +76,7 @@ static void flipper_pic_mask(struct irq_data *d) static void flipper_pic_unmask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); setbits32(io_base + FLIPPER_IMR, 1 << irq); @@ -107,12 +107,6 @@ static int flipper_pic_map(struct irq_host *h, unsigned int virq, return 0; } -static void flipper_pic_unmap(struct irq_host *h, unsigned int irq) -{ - irq_set_chip_data(irq, NULL); - irq_set_chip(irq, NULL); -} - static int flipper_pic_match(struct irq_host *h, struct device_node *np) { return 1; @@ -121,7 +115,6 @@ static int flipper_pic_match(struct irq_host *h, struct device_node *np) static struct irq_host_ops flipper_irq_host_ops = { .map = flipper_pic_map, - .unmap = flipper_pic_unmap, .match = flipper_pic_match, }; diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index 2bdddfc9d52..e4919170c6b 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -43,7 +43,7 @@ static void hlwd_pic_mask_and_ack(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); u32 mask = 1 << irq; @@ -53,7 +53,7 @@ static void hlwd_pic_mask_and_ack(struct irq_data *d) static void hlwd_pic_ack(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); out_be32(io_base + HW_BROADWAY_ICR, 1 << irq); @@ -61,7 +61,7 @@ static void hlwd_pic_ack(struct irq_data *d) static void hlwd_pic_mask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); clrbits32(io_base + HW_BROADWAY_IMR, 1 << irq); @@ -69,7 +69,7 @@ static void hlwd_pic_mask(struct irq_data *d) static void hlwd_pic_unmask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); setbits32(io_base + HW_BROADWAY_IMR, 1 << irq); @@ -100,15 +100,8 @@ static int hlwd_pic_map(struct irq_host *h, unsigned int virq, return 0; } -static void hlwd_pic_unmap(struct irq_host *h, unsigned int irq) -{ - irq_set_chip_data(irq, NULL); - irq_set_chip(irq, NULL); -} - static struct irq_host_ops hlwd_irq_host_ops = { .map = hlwd_pic_map, - .unmap = hlwd_pic_unmap, }; static unsigned int __hlwd_pic_get_irq(struct irq_host *h) diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig index e5bc9f75d47..b57cda3a081 100644 --- a/arch/powerpc/platforms/iseries/Kconfig +++ b/arch/powerpc/platforms/iseries/Kconfig @@ -1,7 +1,9 @@ config PPC_ISERIES bool "IBM Legacy iSeries" depends on PPC64 && PPC_BOOK3S - select PPC_INDIRECT_IO + select PPC_SMP_MUXED_IPI + select PPC_INDIRECT_PIO + select PPC_INDIRECT_MMIO select PPC_PCI_CHOICE if EXPERT menu "iSeries device drivers" diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S index 32a56c6dfa7..29c02f36b32 100644 --- a/arch/powerpc/platforms/iseries/exception.S +++ b/arch/powerpc/platforms/iseries/exception.S @@ -31,6 +31,7 @@ #include <asm/thread_info.h> #include <asm/ptrace.h> #include <asm/cputable.h> +#include <asm/mmu.h> #include "exception.h" @@ -60,29 +61,31 @@ system_reset_iSeries: /* Spin on __secondary_hold_spinloop until it is updated by the boot cpu. */ /* In the UP case we'll yield() later, and we will not access the paca anyway */ #ifdef CONFIG_SMP -1: +iSeries_secondary_wait_paca: HMT_LOW LOAD_REG_ADDR(r23, __secondary_hold_spinloop) ld r23,0(r23) - sync - LOAD_REG_ADDR(r3,current_set) - sldi r28,r24,3 /* get current_set[cpu#] */ - ldx r3,r3,r28 - addi r1,r3,THREAD_SIZE - subi r1,r1,STACK_FRAME_OVERHEAD - cmpwi 0,r23,0 /* Keep poking the Hypervisor until */ - bne 2f /* we're released */ - /* Let the Hypervisor know we are alive */ + cmpdi 0,r23,0 + bne 2f /* go on when the master is ready */ + + /* Keep poking the Hypervisor until we're released */ /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ lis r3,0x8002 rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */ li r0,-1 /* r0=-1 indicates a Hypervisor call */ sc /* Invoke the hypervisor via a system call */ - b 1b -#endif + b iSeries_secondary_wait_paca 2: + HMT_MEDIUM + sync + + LOAD_REG_ADDR(r3, nr_cpu_ids) /* get number of pacas allocated */ + lwz r3,0(r3) /* nr_cpus= or NR_CPUS can limit */ + cmpld 0,r24,r3 /* is our cpu number allocated? */ + bge iSeries_secondary_yield /* no, yield forever */ + /* Load our paca now that it's been allocated */ LOAD_REG_ADDR(r13, paca) ld r13,0(r13) @@ -93,10 +96,24 @@ system_reset_iSeries: ori r23,r23,MSR_RI mtmsrd r23 /* RI on */ - HMT_LOW -#ifdef CONFIG_SMP +iSeries_secondary_smp_loop: lbz r23,PACAPROCSTART(r13) /* Test if this processor * should start */ + cmpwi 0,r23,0 + bne 3f /* go on when we are told */ + + HMT_LOW + /* Let the Hypervisor know we are alive */ + /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ + lis r3,0x8002 + rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */ + li r0,-1 /* r0=-1 indicates a Hypervisor call */ + sc /* Invoke the hypervisor via a system call */ + mfspr r13,SPRN_SPRG_PACA /* Put r13 back ???? */ + b iSeries_secondary_smp_loop /* wait for signal to start */ + +3: + HMT_MEDIUM sync LOAD_REG_ADDR(r3,current_set) sldi r28,r24,3 /* get current_set[cpu#] */ @@ -104,27 +121,22 @@ system_reset_iSeries: addi r1,r3,THREAD_SIZE subi r1,r1,STACK_FRAME_OVERHEAD - cmpwi 0,r23,0 - beq iSeries_secondary_smp_loop /* Loop until told to go */ b __secondary_start /* Loop until told to go */ -iSeries_secondary_smp_loop: - /* Let the Hypervisor know we are alive */ - /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ - lis r3,0x8002 - rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */ -#else /* CONFIG_SMP */ +#endif /* CONFIG_SMP */ + +iSeries_secondary_yield: /* Yield the processor. This is required for non-SMP kernels which are running on multi-threaded machines. */ + HMT_LOW lis r3,0x8000 rldicr r3,r3,32,15 /* r3 = (r3 << 32) & 0xffff000000000000 */ addi r3,r3,18 /* r3 = 0x8000000000000012 which is "yield" */ li r4,0 /* "yield timed" */ li r5,-1 /* "yield forever" */ -#endif /* CONFIG_SMP */ li r0,-1 /* r0=-1 indicates a Hypervisor call */ sc /* Invoke the hypervisor via a system call */ mfspr r13,SPRN_SPRG_PACA /* Put r13 back ???? */ - b 2b /* If SMP not configured, secondaries + b iSeries_secondary_yield /* If SMP not configured, secondaries * loop forever */ /*** ISeries-LPAR interrupt handlers ***/ @@ -157,7 +169,7 @@ BEGIN_FTR_SECTION FTR_SECTION_ELSE EXCEPTION_PROLOG_1(PACA_EXGEN) EXCEPTION_PROLOG_ISERIES_1 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB) b data_access_common .do_stab_bolted_iSeries: diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c index 52a6889832c..b2103453eb0 100644 --- a/arch/powerpc/platforms/iseries/irq.c +++ b/arch/powerpc/platforms/iseries/irq.c @@ -42,7 +42,6 @@ #include "irq.h" #include "pci.h" #include "call_pci.h" -#include "smp.h" #ifdef CONFIG_PCI @@ -171,7 +170,7 @@ static void iseries_enable_IRQ(struct irq_data *d) { u32 bus, dev_id, function, mask; const u32 sub_bus = 0; - unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int rirq = (unsigned int)irqd_to_hwirq(d); /* The IRQ has already been locked by the caller */ bus = REAL_IRQ_TO_BUS(rirq); @@ -188,7 +187,7 @@ static unsigned int iseries_startup_IRQ(struct irq_data *d) { u32 bus, dev_id, function, mask; const u32 sub_bus = 0; - unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int rirq = (unsigned int)irqd_to_hwirq(d); bus = REAL_IRQ_TO_BUS(rirq); function = REAL_IRQ_TO_FUNC(rirq); @@ -234,7 +233,7 @@ static void iseries_shutdown_IRQ(struct irq_data *d) { u32 bus, dev_id, function, mask; const u32 sub_bus = 0; - unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int rirq = (unsigned int)irqd_to_hwirq(d); /* irq should be locked by the caller */ bus = REAL_IRQ_TO_BUS(rirq); @@ -257,7 +256,7 @@ static void iseries_disable_IRQ(struct irq_data *d) { u32 bus, dev_id, function, mask; const u32 sub_bus = 0; - unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int rirq = (unsigned int)irqd_to_hwirq(d); /* The IRQ has already been locked by the caller */ bus = REAL_IRQ_TO_BUS(rirq); @@ -271,7 +270,7 @@ static void iseries_disable_IRQ(struct irq_data *d) static void iseries_end_IRQ(struct irq_data *d) { - unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int rirq = (unsigned int)irqd_to_hwirq(d); HvCallPci_eoi(REAL_IRQ_TO_BUS(rirq), REAL_IRQ_TO_SUBBUS(rirq), (REAL_IRQ_TO_IDSEL(rirq) << 4) + REAL_IRQ_TO_FUNC(rirq)); @@ -316,7 +315,7 @@ unsigned int iSeries_get_irq(void) #ifdef CONFIG_SMP if (get_lppaca()->int_dword.fields.ipi_cnt) { get_lppaca()->int_dword.fields.ipi_cnt = 0; - iSeries_smp_message_recv(); + smp_ipi_demux(); } #endif /* CONFIG_SMP */ if (hvlpevent_is_pending()) diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index 2946ae10fbf..c25a0815c26 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -249,7 +249,7 @@ static unsigned long iSeries_process_mainstore_vpd(struct MemoryBlock *mb_array, unsigned long i; unsigned long mem_blocks = 0; - if (cpu_has_feature(CPU_FTR_SLB)) + if (mmu_has_feature(MMU_FTR_SLB)) mem_blocks = iSeries_process_Regatta_mainstore_vpd(mb_array, max_entries); else @@ -634,7 +634,7 @@ static int __init iseries_probe(void) hpte_init_iSeries(); /* iSeries does not support 16M pages */ - cur_cpu_spec->cpu_features &= ~CPU_FTR_16M_PAGE; + cur_cpu_spec->mmu_features &= ~MMU_FTR_16M_PAGE; return 1; } @@ -685,6 +685,11 @@ void * __init iSeries_early_setup(void) powerpc_firmware_features |= FW_FEATURE_ISERIES; powerpc_firmware_features |= FW_FEATURE_LPAR; +#ifdef CONFIG_SMP + /* On iSeries we know we can never have more than 64 cpus */ + nr_cpu_ids = max(nr_cpu_ids, 64); +#endif + iSeries_fixup_klimit(); /* diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c index 6c6029914db..e3265adde5d 100644 --- a/arch/powerpc/platforms/iseries/smp.c +++ b/arch/powerpc/platforms/iseries/smp.c @@ -42,57 +42,23 @@ #include <asm/cputable.h> #include <asm/system.h> -#include "smp.h" - -static unsigned long iSeries_smp_message[NR_CPUS]; - -void iSeries_smp_message_recv(void) -{ - int cpu = smp_processor_id(); - int msg; - - if (num_online_cpus() < 2) - return; - - for (msg = 0; msg < 4; msg++) - if (test_and_clear_bit(msg, &iSeries_smp_message[cpu])) - smp_message_recv(msg); -} - -static inline void smp_iSeries_do_message(int cpu, int msg) +static void smp_iSeries_cause_ipi(int cpu, unsigned long data) { - set_bit(msg, &iSeries_smp_message[cpu]); HvCall_sendIPI(&(paca[cpu])); } -static void smp_iSeries_message_pass(int target, int msg) -{ - int i; - - if (target < NR_CPUS) - smp_iSeries_do_message(target, msg); - else { - for_each_online_cpu(i) { - if ((target == MSG_ALL_BUT_SELF) && - (i == smp_processor_id())) - continue; - smp_iSeries_do_message(i, msg); - } - } -} - static int smp_iSeries_probe(void) { return cpumask_weight(cpu_possible_mask); } -static void smp_iSeries_kick_cpu(int nr) +static int smp_iSeries_kick_cpu(int nr) { BUG_ON((nr < 0) || (nr >= NR_CPUS)); /* Verify that our partition has a processor nr */ if (lppaca_of(nr).dyn_proc_status >= 2) - return; + return -ENOENT; /* The processor is currently spinning, waiting * for the cpu_start field to become non-zero @@ -100,6 +66,8 @@ static void smp_iSeries_kick_cpu(int nr) * continue on to secondary_start in iSeries_head.S */ paca[nr].cpu_start = 1; + + return 0; } static void __devinit smp_iSeries_setup_cpu(int nr) @@ -107,7 +75,8 @@ static void __devinit smp_iSeries_setup_cpu(int nr) } static struct smp_ops_t iSeries_smp_ops = { - .message_pass = smp_iSeries_message_pass, + .message_pass = smp_muxed_ipi_message_pass, + .cause_ipi = smp_iSeries_cause_ipi, .probe = smp_iSeries_probe, .kick_cpu = smp_iSeries_kick_cpu, .setup_cpu = smp_iSeries_setup_cpu, diff --git a/arch/powerpc/platforms/iseries/smp.h b/arch/powerpc/platforms/iseries/smp.h deleted file mode 100644 index d501f7de01e..00000000000 --- a/arch/powerpc/platforms/iseries/smp.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _PLATFORMS_ISERIES_SMP_H -#define _PLATFORMS_ISERIES_SMP_H - -extern void iSeries_smp_message_recv(void); - -#endif /* _PLATFORMS_ISERIES_SMP_H */ diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig index 1e1a0873e1d..1afd10f6785 100644 --- a/arch/powerpc/platforms/powermac/Kconfig +++ b/arch/powerpc/platforms/powermac/Kconfig @@ -18,4 +18,13 @@ config PPC_PMAC64 select PPC_970_NAP default y - +config PPC_PMAC32_PSURGE + bool "Support for powersurge upgrade cards" if EXPERT + depends on SMP && PPC32 && PPC_PMAC + select PPC_SMP_MUXED_IPI + default y + help + The powersurge cpu boards can be used in the generation + of powermacs that have a socket for an upgradeable cpu card, + including the 7500, 8500, 9500, 9600. Support exists for + both dual and quad socket upgrade cards. diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 7c18a1607d1..9089b042119 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -84,7 +84,7 @@ static void __pmac_retrigger(unsigned int irq_nr) static void pmac_mask_and_ack_irq(struct irq_data *d) { - unsigned int src = irq_map[d->irq].hwirq; + unsigned int src = irqd_to_hwirq(d); unsigned long bit = 1UL << (src & 0x1f); int i = src >> 5; unsigned long flags; @@ -106,7 +106,7 @@ static void pmac_mask_and_ack_irq(struct irq_data *d) static void pmac_ack_irq(struct irq_data *d) { - unsigned int src = irq_map[d->irq].hwirq; + unsigned int src = irqd_to_hwirq(d); unsigned long bit = 1UL << (src & 0x1f); int i = src >> 5; unsigned long flags; @@ -152,7 +152,7 @@ static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost) static unsigned int pmac_startup_irq(struct irq_data *d) { unsigned long flags; - unsigned int src = irq_map[d->irq].hwirq; + unsigned int src = irqd_to_hwirq(d); unsigned long bit = 1UL << (src & 0x1f); int i = src >> 5; @@ -169,7 +169,7 @@ static unsigned int pmac_startup_irq(struct irq_data *d) static void pmac_mask_irq(struct irq_data *d) { unsigned long flags; - unsigned int src = irq_map[d->irq].hwirq; + unsigned int src = irqd_to_hwirq(d); raw_spin_lock_irqsave(&pmac_pic_lock, flags); __clear_bit(src, ppc_cached_irq_mask); @@ -180,7 +180,7 @@ static void pmac_mask_irq(struct irq_data *d) static void pmac_unmask_irq(struct irq_data *d) { unsigned long flags; - unsigned int src = irq_map[d->irq].hwirq; + unsigned int src = irqd_to_hwirq(d); raw_spin_lock_irqsave(&pmac_pic_lock, flags); __set_bit(src, ppc_cached_irq_mask); @@ -193,7 +193,7 @@ static int pmac_retrigger(struct irq_data *d) unsigned long flags; raw_spin_lock_irqsave(&pmac_pic_lock, flags); - __pmac_retrigger(irq_map[d->irq].hwirq); + __pmac_retrigger(irqd_to_hwirq(d)); raw_spin_unlock_irqrestore(&pmac_pic_lock, flags); return 1; } @@ -239,15 +239,12 @@ static unsigned int pmac_pic_get_irq(void) unsigned long bits = 0; unsigned long flags; -#ifdef CONFIG_SMP - void psurge_smp_message_recv(void); - - /* IPI's are a hack on the powersurge -- Cort */ - if ( smp_processor_id() != 0 ) { - psurge_smp_message_recv(); - return NO_IRQ_IGNORE; /* ignore, already handled */ +#ifdef CONFIG_PPC_PMAC32_PSURGE + /* IPI's are a hack on the powersurge -- Cort */ + if (smp_processor_id() != 0) { + return psurge_secondary_virq; } -#endif /* CONFIG_SMP */ +#endif /* CONFIG_PPC_PMAC32_PSURGE */ raw_spin_lock_irqsave(&pmac_pic_lock, flags); for (irq = max_real_irqs; (irq -= 32) >= 0; ) { int i = irq >> 5; diff --git a/arch/powerpc/platforms/powermac/pic.h b/arch/powerpc/platforms/powermac/pic.h deleted file mode 100644 index d622a8345aa..00000000000 --- a/arch/powerpc/platforms/powermac/pic.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef __PPC_PLATFORMS_PMAC_PIC_H -#define __PPC_PLATFORMS_PMAC_PIC_H - -#include <linux/irq.h> - -extern struct irq_chip pmac_pic; - -extern void pmac_pic_init(void); -extern int pmac_get_irq(void); - -#endif /* __PPC_PLATFORMS_PMAC_PIC_H */ diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h index 20468f49aec..8327cce2bdb 100644 --- a/arch/powerpc/platforms/powermac/pmac.h +++ b/arch/powerpc/platforms/powermac/pmac.h @@ -33,6 +33,7 @@ extern void pmac_setup_pci_dma(void); extern void pmac_check_ht_link(void); extern void pmac_setup_smp(void); +extern int psurge_secondary_virq; extern void low_cpu_die(void) __attribute__((noreturn)); extern int pmac_nvram_init(void); diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index bc5f0dc6ae1..db092d7c4c5 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -70,7 +70,7 @@ static void (*pmac_tb_freeze)(int freeze); static u64 timebase; static int tb_req; -#ifdef CONFIG_PPC32 +#ifdef CONFIG_PPC_PMAC32_PSURGE /* * Powersurge (old powermac SMP) support. @@ -124,6 +124,10 @@ static volatile u32 __iomem *psurge_start; /* what sort of powersurge board we have */ static int psurge_type = PSURGE_NONE; +/* irq for secondary cpus to report */ +static struct irq_host *psurge_host; +int psurge_secondary_virq; + /* * Set and clear IPIs for powersurge. */ @@ -156,51 +160,52 @@ static inline void psurge_clr_ipi(int cpu) /* * On powersurge (old SMP powermac architecture) we don't have * separate IPIs for separate messages like openpic does. Instead - * we have a bitmap for each processor, where a 1 bit means that - * the corresponding message is pending for that processor. - * Ideally each cpu's entry would be in a different cache line. + * use the generic demux helpers * -- paulus. */ -static unsigned long psurge_smp_message[NR_CPUS]; - -void psurge_smp_message_recv(void) +static irqreturn_t psurge_ipi_intr(int irq, void *d) { - int cpu = smp_processor_id(); - int msg; + psurge_clr_ipi(smp_processor_id()); + smp_ipi_demux(); - /* clear interrupt */ - psurge_clr_ipi(cpu); - - if (num_online_cpus() < 2) - return; + return IRQ_HANDLED; +} - /* make sure there is a message there */ - for (msg = 0; msg < 4; msg++) - if (test_and_clear_bit(msg, &psurge_smp_message[cpu])) - smp_message_recv(msg); +static void smp_psurge_cause_ipi(int cpu, unsigned long data) +{ + psurge_set_ipi(cpu); } -irqreturn_t psurge_primary_intr(int irq, void *d) +static int psurge_host_map(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw) { - psurge_smp_message_recv(); - return IRQ_HANDLED; + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_percpu_irq); + + return 0; } -static void smp_psurge_message_pass(int target, int msg) +struct irq_host_ops psurge_host_ops = { + .map = psurge_host_map, +}; + +static int psurge_secondary_ipi_init(void) { - int i; + int rc = -ENOMEM; - if (num_online_cpus() < 2) - return; + psurge_host = irq_alloc_host(NULL, IRQ_HOST_MAP_NOMAP, 0, + &psurge_host_ops, 0); - for_each_online_cpu(i) { - if (target == MSG_ALL - || (target == MSG_ALL_BUT_SELF && i != smp_processor_id()) - || target == i) { - set_bit(msg, &psurge_smp_message[i]); - psurge_set_ipi(i); - } - } + if (psurge_host) + psurge_secondary_virq = irq_create_direct_mapping(psurge_host); + + if (psurge_secondary_virq) + rc = request_irq(psurge_secondary_virq, psurge_ipi_intr, + IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL); + + if (rc) + pr_err("Failed to setup secondary cpu IPI\n"); + + return rc; } /* @@ -311,6 +316,9 @@ static int __init smp_psurge_probe(void) ncpus = 2; } + if (psurge_secondary_ipi_init()) + return 1; + psurge_start = ioremap(PSURGE_START, 4); psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4); @@ -329,7 +337,7 @@ static int __init smp_psurge_probe(void) return ncpus; } -static void __init smp_psurge_kick_cpu(int nr) +static int __init smp_psurge_kick_cpu(int nr) { unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8; unsigned long a, flags; @@ -394,11 +402,13 @@ static void __init smp_psurge_kick_cpu(int nr) psurge_set_ipi(1); if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354); + + return 0; } static struct irqaction psurge_irqaction = { - .handler = psurge_primary_intr, - .flags = IRQF_DISABLED, + .handler = psurge_ipi_intr, + .flags = IRQF_DISABLED|IRQF_PERCPU, .name = "primary IPI", }; @@ -437,14 +447,15 @@ void __init smp_psurge_give_timebase(void) /* PowerSurge-style Macs */ struct smp_ops_t psurge_smp_ops = { - .message_pass = smp_psurge_message_pass, + .message_pass = smp_muxed_ipi_message_pass, + .cause_ipi = smp_psurge_cause_ipi, .probe = smp_psurge_probe, .kick_cpu = smp_psurge_kick_cpu, .setup_cpu = smp_psurge_setup_cpu, .give_timebase = smp_psurge_give_timebase, .take_timebase = smp_psurge_take_timebase, }; -#endif /* CONFIG_PPC32 - actually powersurge support */ +#endif /* CONFIG_PPC_PMAC32_PSURGE */ /* * Core 99 and later support @@ -791,14 +802,14 @@ static int __init smp_core99_probe(void) return ncpus; } -static void __devinit smp_core99_kick_cpu(int nr) +static int __devinit smp_core99_kick_cpu(int nr) { unsigned int save_vector; unsigned long target, flags; unsigned int *vector = (unsigned int *)(PAGE_OFFSET+0x100); if (nr < 0 || nr > 3) - return; + return -ENOENT; if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu", 0x346); @@ -830,6 +841,8 @@ static void __devinit smp_core99_kick_cpu(int nr) local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); + + return 0; } static void __devinit smp_core99_setup_cpu(int cpu_nr) @@ -1002,7 +1015,7 @@ void __init pmac_setup_smp(void) of_node_put(np); smp_ops = &core99_smp_ops; } -#ifdef CONFIG_PPC32 +#ifdef CONFIG_PPC_PMAC32_PSURGE else { /* We have to set bits in cpu_possible_mask here since the * secondary CPU(s) aren't in the device tree. Various @@ -1015,7 +1028,7 @@ void __init pmac_setup_smp(void) set_cpu_possible(cpu, true); smp_ops = &psurge_smp_ops; } -#endif /* CONFIG_PPC32 */ +#endif /* CONFIG_PPC_PMAC32_PSURGE */ #ifdef CONFIG_HOTPLUG_CPU ppc_md.cpu_die = pmac_cpu_die; diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index f2f6413b81d..600ed2c0ed5 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -197,7 +197,7 @@ static int ps3_virq_setup(enum ps3_cpu_binding cpu, unsigned long outlet, result = irq_set_chip_data(*virq, pd); if (result) { - pr_debug("%s:%d: set_irq_chip_data failed\n", + pr_debug("%s:%d: irq_set_chip_data failed\n", __func__, __LINE__); goto fail_set; } @@ -659,11 +659,6 @@ static void __maybe_unused _dump_mask(struct ps3_private *pd, static void dump_bmp(struct ps3_private* pd) {}; #endif /* defined(DEBUG) */ -static void ps3_host_unmap(struct irq_host *h, unsigned int virq) -{ - irq_set_chip_data(virq, NULL); -} - static int ps3_host_map(struct irq_host *h, unsigned int virq, irq_hw_number_t hwirq) { @@ -683,7 +678,6 @@ static int ps3_host_match(struct irq_host *h, struct device_node *np) static struct irq_host_ops ps3_host_ops = { .map = ps3_host_map, - .unmap = ps3_host_unmap, .match = ps3_host_match, }; diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c index 51ffde40af2..4c44794faac 100644 --- a/arch/powerpc/platforms/ps3/smp.c +++ b/arch/powerpc/platforms/ps3/smp.c @@ -39,7 +39,7 @@ #define MSG_COUNT 4 static DEFINE_PER_CPU(unsigned int [MSG_COUNT], ps3_ipi_virqs); -static void do_message_pass(int target, int msg) +static void ps3_smp_message_pass(int cpu, int msg) { int result; unsigned int virq; @@ -49,28 +49,12 @@ static void do_message_pass(int target, int msg) return; } - virq = per_cpu(ps3_ipi_virqs, target)[msg]; + virq = per_cpu(ps3_ipi_virqs, cpu)[msg]; result = ps3_send_event_locally(virq); if (result) DBG("%s:%d: ps3_send_event_locally(%d, %d) failed" - " (%d)\n", __func__, __LINE__, target, msg, result); -} - -static void ps3_smp_message_pass(int target, int msg) -{ - int cpu; - - if (target < NR_CPUS) - do_message_pass(target, msg); - else if (target == MSG_ALL_BUT_SELF) { - for_each_online_cpu(cpu) - if (cpu != smp_processor_id()) - do_message_pass(cpu, msg); - } else { - for_each_online_cpu(cpu) - do_message_pass(cpu, msg); - } + " (%d)\n", __func__, __LINE__, cpu, msg, result); } static int ps3_smp_probe(void) diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index 39a472e9e80..375a9f92158 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -197,7 +197,7 @@ static void spu_unmap(struct spu *spu) * The current HV requires the spu shadow regs to be mapped with the * PTE page protection bits set as read-only (PP=3). This implementation * uses the low level __ioremap() to bypass the page protection settings - * inforced by ioremap_flags() to get the needed PTE bits set for the + * inforced by ioremap_prot() to get the needed PTE bits set for the * shadow regs. */ @@ -214,7 +214,7 @@ static int __init setup_areas(struct spu *spu) goto fail_ioremap; } - spu->local_store = (__force void *)ioremap_flags(spu->local_store_phys, + spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys, LS_SIZE, _PAGE_NO_CACHE); if (!spu->local_store) { diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 5b3da4b4ea7..71af4c5d6c0 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -3,7 +3,10 @@ config PPC_PSERIES bool "IBM pSeries & new (POWER5-based) iSeries" select MPIC select PCI_MSI - select XICS + select PPC_XICS + select PPC_ICP_NATIVE + select PPC_ICP_HV + select PPC_ICS_RTAS select PPC_I8259 select PPC_RTAS select PPC_RTAS_DAEMON @@ -47,6 +50,24 @@ config SCANLOG tristate "Scanlog dump interface" depends on RTAS_PROC && PPC_PSERIES +config IO_EVENT_IRQ + bool "IO Event Interrupt support" + depends on PPC_PSERIES + default y + help + Select this option, if you want to enable support for IO Event + interrupts. IO event interrupt is a mechanism provided by RTAS + to return information about hardware error and non-error events + which may need OS attention. RTAS returns events for multiple + event types and scopes. Device drivers can register their handlers + to receive events. + + This option will only enable the IO event platform code. You + will still need to enable or compile the actual drivers + that use this infrastruture to handle IO event interrupts. + + Say Y if you are unsure. + config LPARCFG bool "LPAR Configuration Data" depends on PPC_PSERIES || PPC_ISERIES diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index fc5237810ec..3556e402cbf 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -5,7 +5,6 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_SCANLOG) += scanlog.o obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_KEXEC) += kexec.o @@ -22,6 +21,7 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DTL) += dtl.o +obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o ifeq ($(CONFIG_PPC_PSERIES),y) obj-$(CONFIG_SUSPEND) += suspend.o diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index c371bc06434..e9190073bb9 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -52,10 +52,10 @@ static u8 dtl_event_mask = 0x7; /* - * Size of per-cpu log buffers. Default is just under 16 pages worth. + * Size of per-cpu log buffers. Firmware requires that the buffer does + * not cross a 4k boundary. */ -static int dtl_buf_entries = (16 * 85); - +static int dtl_buf_entries = N_DISPATCH_LOG; #ifdef CONFIG_VIRT_CPU_ACCOUNTING struct dtl_ring { @@ -151,7 +151,7 @@ static int dtl_start(struct dtl *dtl) /* Register our dtl buffer with the hypervisor. The HV expects the * buffer size to be passed in the second word of the buffer */ - ((u32 *)dtl->buf)[1] = dtl->buf_entries * sizeof(struct dtl_entry); + ((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES; hwcpu = get_hard_smp_processor_id(dtl->cpu); addr = __pa(dtl->buf); @@ -196,13 +196,15 @@ static int dtl_enable(struct dtl *dtl) long int rc; struct dtl_entry *buf = NULL; + if (!dtl_cache) + return -ENOMEM; + /* only allow one reader */ if (dtl->buf) return -EBUSY; n_entries = dtl_buf_entries; - buf = kmalloc_node(n_entries * sizeof(struct dtl_entry), - GFP_KERNEL, cpu_to_node(dtl->cpu)); + buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu)); if (!buf) { printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", __func__, dtl->cpu); @@ -223,7 +225,7 @@ static int dtl_enable(struct dtl *dtl) spin_unlock(&dtl->lock); if (rc) - kfree(buf); + kmem_cache_free(dtl_cache, buf); return rc; } @@ -231,7 +233,7 @@ static void dtl_disable(struct dtl *dtl) { spin_lock(&dtl->lock); dtl_stop(dtl); - kfree(dtl->buf); + kmem_cache_free(dtl_cache, dtl->buf); dtl->buf = NULL; dtl->buf_entries = 0; spin_unlock(&dtl->lock); @@ -365,7 +367,7 @@ static int dtl_init(void) event_mask_file = debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask); - buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0600, + buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries); if (!event_mask_file || !buf_entries_file) { diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 89649173d3a..46b55cf563e 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -93,6 +93,7 @@ static int ibm_slot_error_detail; static int ibm_get_config_addr_info; static int ibm_get_config_addr_info2; static int ibm_configure_bridge; +static int ibm_configure_pe; int eeh_subsystem_enabled; EXPORT_SYMBOL(eeh_subsystem_enabled); @@ -261,6 +262,8 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity) pci_regs_buf[0] = 0; rtas_pci_enable(pdn, EEH_THAW_MMIO); + rtas_configure_bridge(pdn); + eeh_restore_bars(pdn); loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen); @@ -448,6 +451,39 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag) raw_spin_unlock_irqrestore(&confirm_error_lock, flags); } +void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) +{ + struct device_node *dn; + + for_each_child_of_node(parent, dn) { + if (PCI_DN(dn)) { + + struct pci_dev *dev = PCI_DN(dn)->pcidev; + + if (dev && dev->driver) + *freset |= dev->needs_freset; + + __eeh_set_pe_freset(dn, freset); + } + } +} + +void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) +{ + struct pci_dev *dev; + dn = find_device_pe(dn); + + /* Back up one, since config addrs might be shared */ + if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) + dn = dn->parent; + + dev = PCI_DN(dn)->pcidev; + if (dev) + *freset |= dev->needs_freset; + + __eeh_set_pe_freset(dn, freset); +} + /** * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze * @dn device node @@ -692,15 +728,24 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state) if (pdn->eeh_pe_config_addr) config_addr = pdn->eeh_pe_config_addr; - rc = rtas_call(ibm_set_slot_reset,4,1, NULL, + rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL, config_addr, BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid), state); - if (rc) - printk (KERN_WARNING "EEH: Unable to reset the failed slot," - " (%d) #RST=%d dn=%s\n", - rc, state, pdn->node->full_name); + + /* Fundamental-reset not supported on this PE, try hot-reset */ + if (rc == -8 && state == 3) { + rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, + BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid), 1); + if (rc) + printk(KERN_WARNING + "EEH: Unable to reset the failed slot," + " #RST=%d dn=%s\n", + rc, pdn->node->full_name); + } } /** @@ -736,18 +781,21 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat /** * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second * @pdn: pci device node to be reset. - * - * Return 0 if success, else a non-zero value. */ static void __rtas_set_slot_reset(struct pci_dn *pdn) { - struct pci_dev *dev = pdn->pcidev; + unsigned int freset = 0; - /* Determine type of EEH reset required by device, - * default hot reset or fundamental reset - */ - if (dev && dev->needs_freset) + /* Determine type of EEH reset required for + * Partitionable Endpoint, a hot-reset (1) + * or a fundamental reset (3). + * A fundamental reset required by any device under + * Partitionable Endpoint trumps hot-reset. + */ + eeh_set_pe_freset(pdn->node, &freset); + + if (freset) rtas_pci_slot_reset(pdn, 3); else rtas_pci_slot_reset(pdn, 1); @@ -895,13 +943,20 @@ rtas_configure_bridge(struct pci_dn *pdn) { int config_addr; int rc; + int token; /* Use PE configuration address, if present */ config_addr = pdn->eeh_config_addr; if (pdn->eeh_pe_config_addr) config_addr = pdn->eeh_pe_config_addr; - rc = rtas_call(ibm_configure_bridge,3,1, NULL, + /* Use new configure-pe function, if supported */ + if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) + token = ibm_configure_pe; + else + token = ibm_configure_bridge; + + rc = rtas_call(token, 3, 1, NULL, config_addr, BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid)); @@ -1077,6 +1132,7 @@ void __init eeh_init(void) ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); ibm_configure_bridge = rtas_token ("ibm,configure-bridge"); + ibm_configure_pe = rtas_token("ibm,configure-pe"); if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) return; diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index b8d70f5d9aa..1b6cb10589e 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -328,7 +328,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; - const char *location, *pci_str, *drv_str; + const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str; frozen_dn = find_device_pe(event->dn); if (!frozen_dn) { @@ -364,13 +364,8 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) frozen_pdn = PCI_DN(frozen_dn); frozen_pdn->eeh_freeze_count++; - if (frozen_pdn->pcidev) { - pci_str = pci_name (frozen_pdn->pcidev); - drv_str = pcid_name (frozen_pdn->pcidev); - } else { - pci_str = eeh_pci_name(event->dev); - drv_str = pcid_name (event->dev); - } + pci_str = eeh_pci_name(event->dev); + drv_str = pcid_name(event->dev); if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; @@ -378,8 +373,17 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) printk(KERN_WARNING "EEH: This PCI device has failed %d times in the last hour:\n", frozen_pdn->eeh_freeze_count); + + if (frozen_pdn->pcidev) { + bus_pci_str = pci_name(frozen_pdn->pcidev); + bus_drv_str = pcid_name(frozen_pdn->pcidev); + printk(KERN_WARNING + "EEH: Bus location=%s driver=%s pci addr=%s\n", + location, bus_drv_str, bus_pci_str); + } + printk(KERN_WARNING - "EEH: location=%s driver=%s pci addr=%s\n", + "EEH: Device location=%s driver=%s pci addr=%s\n", location, drv_str, pci_str); /* Walk the various device drivers attached to this slot through diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index ef8c45489e2..46f13a3c5d0 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -19,6 +19,7 @@ */ #include <linux/kernel.h> +#include <linux/interrupt.h> #include <linux/delay.h> #include <linux/cpu.h> #include <asm/system.h> @@ -28,7 +29,7 @@ #include <asm/machdep.h> #include <asm/vdso_datapage.h> #include <asm/pSeries_reconfig.h> -#include "xics.h" +#include <asm/xics.h> #include "plpar_wrappers.h" #include "offline_states.h" @@ -280,7 +281,7 @@ static int pseries_add_processor(struct device_node *np) } for_each_cpu(cpu, tmp) { - BUG_ON(cpumask_test_cpu(cpu, cpu_present_mask)); + BUG_ON(cpu_present(cpu)); set_cpu_present(cpu, true); set_hard_smp_processor_id(cpu, *intserv++); } diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c new file mode 100644 index 00000000000..c829e6067d5 --- /dev/null +++ b/arch/powerpc/platforms/pseries/io_event_irq.c @@ -0,0 +1,231 @@ +/* + * Copyright 2010 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/of.h> +#include <linux/list.h> +#include <linux/notifier.h> + +#include <asm/machdep.h> +#include <asm/rtas.h> +#include <asm/irq.h> +#include <asm/io_event_irq.h> + +#include "pseries.h" + +/* + * IO event interrupt is a mechanism provided by RTAS to return + * information about hardware error and non-error events. Device + * drivers can register their event handlers to receive events. + * Device drivers are expected to use atomic_notifier_chain_register() + * and atomic_notifier_chain_unregister() to register and unregister + * their event handlers. Since multiple IO event types and scopes + * share an IO event interrupt, the event handlers are called one + * by one until the IO event is claimed by one of the handlers. + * The event handlers are expected to return NOTIFY_OK if the + * event is handled by the event handler or NOTIFY_DONE if the + * event does not belong to the handler. + * + * Usage: + * + * Notifier function: + * #include <asm/io_event_irq.h> + * int event_handler(struct notifier_block *nb, unsigned long val, void *data) { + * p = (struct pseries_io_event_sect_data *) data; + * if (! is_my_event(p->scope, p->event_type)) return NOTIFY_DONE; + * : + * : + * return NOTIFY_OK; + * } + * struct notifier_block event_nb = { + * .notifier_call = event_handler, + * } + * + * Registration: + * atomic_notifier_chain_register(&pseries_ioei_notifier_list, &event_nb); + * + * Unregistration: + * atomic_notifier_chain_unregister(&pseries_ioei_notifier_list, &event_nb); + */ + +ATOMIC_NOTIFIER_HEAD(pseries_ioei_notifier_list); +EXPORT_SYMBOL_GPL(pseries_ioei_notifier_list); + +static int ioei_check_exception_token; + +/* pSeries event log format */ + +/* Two bytes ASCII section IDs */ +#define PSERIES_ELOG_SECT_ID_PRIV_HDR (('P' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_USER_HDR (('U' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_PRIMARY_SRC (('P' << 8) | 'S') +#define PSERIES_ELOG_SECT_ID_EXTENDED_UH (('E' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_FAILING_MTMS (('M' << 8) | 'T') +#define PSERIES_ELOG_SECT_ID_SECONDARY_SRC (('S' << 8) | 'S') +#define PSERIES_ELOG_SECT_ID_DUMP_LOCATOR (('D' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_FW_ERROR (('S' << 8) | 'W') +#define PSERIES_ELOG_SECT_ID_IMPACT_PART_ID (('L' << 8) | 'P') +#define PSERIES_ELOG_SECT_ID_LOGIC_RESOURCE_ID (('L' << 8) | 'R') +#define PSERIES_ELOG_SECT_ID_HMC_ID (('H' << 8) | 'M') +#define PSERIES_ELOG_SECT_ID_EPOW (('E' << 8) | 'P') +#define PSERIES_ELOG_SECT_ID_IO_EVENT (('I' << 8) | 'E') +#define PSERIES_ELOG_SECT_ID_MANUFACT_INFO (('M' << 8) | 'I') +#define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_USER_DEF (('U' << 8) | 'D') + +/* Vendor specific Platform Event Log Format, Version 6, section header */ +struct pseries_elog_section { + uint16_t id; /* 0x00 2-byte ASCII section ID */ + uint16_t length; /* 0x02 Section length in bytes */ + uint8_t version; /* 0x04 Section version */ + uint8_t subtype; /* 0x05 Section subtype */ + uint16_t creator_component; /* 0x06 Creator component ID */ + uint8_t data[]; /* 0x08 Start of section data */ +}; + +static char ioei_rtas_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned; + +/** + * Find data portion of a specific section in RTAS extended event log. + * @elog: RTAS error/event log. + * @sect_id: secsion ID. + * + * Return: + * pointer to the section data of the specified section + * NULL if not found + */ +static struct pseries_elog_section *find_xelog_section(struct rtas_error_log *elog, + uint16_t sect_id) +{ + struct rtas_ext_event_log_v6 *xelog = + (struct rtas_ext_event_log_v6 *) elog->buffer; + struct pseries_elog_section *sect; + unsigned char *p, *log_end; + + /* Check that we understand the format */ + if (elog->extended_log_length < sizeof(struct rtas_ext_event_log_v6) || + xelog->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG || + xelog->company_id != RTAS_V6EXT_COMPANY_ID_IBM) + return NULL; + + log_end = elog->buffer + elog->extended_log_length; + p = xelog->vendor_log; + while (p < log_end) { + sect = (struct pseries_elog_section *)p; + if (sect->id == sect_id) + return sect; + p += sect->length; + } + return NULL; +} + +/** + * Find the data portion of an IO Event section from event log. + * @elog: RTAS error/event log. + * + * Return: + * pointer to a valid IO event section data. NULL if not found. + */ +static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog) +{ + struct pseries_elog_section *sect; + + /* We should only ever get called for io-event interrupts, but if + * we do get called for another type then something went wrong so + * make some noise about it. + * RTAS_TYPE_IO only exists in extended event log version 6 or later. + * No need to check event log version. + */ + if (unlikely(elog->type != RTAS_TYPE_IO)) { + printk_once(KERN_WARNING "io_event_irq: Unexpected event type %d", + elog->type); + return NULL; + } + + sect = find_xelog_section(elog, PSERIES_ELOG_SECT_ID_IO_EVENT); + if (unlikely(!sect)) { + printk_once(KERN_WARNING "io_event_irq: RTAS extended event " + "log does not contain an IO Event section. " + "Could be a bug in system firmware!\n"); + return NULL; + } + return (struct pseries_io_event *) §->data; +} + +/* + * PAPR: + * - check-exception returns the first found error or event and clear that + * error or event so it is reported once. + * - Each interrupt returns one event. If a plateform chooses to report + * multiple events through a single interrupt, it must ensure that the + * interrupt remains asserted until check-exception has been used to + * process all out-standing events for that interrupt. + * + * Implementation notes: + * - Events must be processed in the order they are returned. Hence, + * sequential in nature. + * - The owner of an event is determined by combinations of scope, + * event type, and sub-type. There is no easy way to pre-sort clients + * by scope or event type alone. For example, Torrent ISR route change + * event is reported with scope 0x00 (Not Applicatable) rather than + * 0x3B (Torrent-hub). It is better to let the clients to identify + * who owns the the event. + */ + +static irqreturn_t ioei_interrupt(int irq, void *dev_id) +{ + struct pseries_io_event *event; + int rtas_rc; + + for (;;) { + rtas_rc = rtas_call(ioei_check_exception_token, 6, 1, NULL, + RTAS_VECTOR_EXTERNAL_INTERRUPT, + virq_to_hw(irq), + RTAS_IO_EVENTS, 1 /* Time Critical */, + __pa(ioei_rtas_buf), + RTAS_DATA_BUF_SIZE); + if (rtas_rc != 0) + break; + + event = ioei_find_event((struct rtas_error_log *)ioei_rtas_buf); + if (!event) + continue; + + atomic_notifier_call_chain(&pseries_ioei_notifier_list, + 0, event); + } + return IRQ_HANDLED; +} + +static int __init ioei_init(void) +{ + struct device_node *np; + + ioei_check_exception_token = rtas_token("check-exception"); + if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE) { + pr_warning("IO Event IRQ not supported on this system !\n"); + return -ENODEV; + } + np = of_find_node_by_path("/event-sources/ibm,io-events"); + if (np) { + request_event_sources_irqs(np, ioei_interrupt, "IO_EVENT"); + of_node_put(np); + } else { + pr_err("io_event_irq: No ibm,io-events on system! " + "IO Event interrupt disabled.\n"); + return -ENODEV; + } + return 0; +} +machine_subsys_initcall(pseries, ioei_init); + diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 6d5412a18b2..01faab9456c 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -659,15 +659,18 @@ static void remove_ddw(struct device_node *np) { struct dynamic_dma_window_prop *dwp; struct property *win64; - const u32 *ddr_avail; + const u32 *ddw_avail; u64 liobn; int len, ret; - ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len); + ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len); win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); - if (!win64 || !ddr_avail || len < 3 * sizeof(u32)) + if (!win64) return; + if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp)) + goto delprop; + dwp = win64->value; liobn = (u64)be32_to_cpu(dwp->liobn); @@ -681,28 +684,29 @@ static void remove_ddw(struct device_node *np) pr_debug("%s successfully cleared tces in window.\n", np->full_name); - ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn); + ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn); if (ret) pr_warning("%s: failed to remove direct window: rtas returned " "%d to ibm,remove-pe-dma-window(%x) %llx\n", - np->full_name, ret, ddr_avail[2], liobn); + np->full_name, ret, ddw_avail[2], liobn); else pr_debug("%s: successfully removed direct window: rtas returned " "%d to ibm,remove-pe-dma-window(%x) %llx\n", - np->full_name, ret, ddr_avail[2], liobn); -} + np->full_name, ret, ddw_avail[2], liobn); +delprop: + ret = prom_remove_property(np, win64); + if (ret) + pr_warning("%s: failed to remove direct window property: %d\n", + np->full_name, ret); +} -static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn) +static u64 find_existing_ddw(struct device_node *pdn) { - struct device_node *dn; - struct pci_dn *pcidn; struct direct_window *window; const struct dynamic_dma_window_prop *direct64; u64 dma_addr = 0; - dn = pci_device_to_OF_node(dev); - pcidn = PCI_DN(dn); spin_lock(&direct_window_list_lock); /* check if we already created a window and dupe that config if so */ list_for_each_entry(window, &direct_window_list, list) { @@ -717,36 +721,40 @@ static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node * return dma_addr; } -static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn) +static int find_existing_ddw_windows(void) { - struct device_node *dn; - struct pci_dn *pcidn; int len; + struct device_node *pdn; struct direct_window *window; const struct dynamic_dma_window_prop *direct64; - u64 dma_addr = 0; - dn = pci_device_to_OF_node(dev); - pcidn = PCI_DN(dn); - direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); - if (direct64) { + if (!firmware_has_feature(FW_FEATURE_LPAR)) + return 0; + + for_each_node_with_property(pdn, DIRECT64_PROPNAME) { + direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); + if (!direct64) + continue; + window = kzalloc(sizeof(*window), GFP_KERNEL); - if (!window) { + if (!window || len < sizeof(struct dynamic_dma_window_prop)) { + kfree(window); remove_ddw(pdn); - } else { - window->device = pdn; - window->prop = direct64; - spin_lock(&direct_window_list_lock); - list_add(&window->list, &direct_window_list); - spin_unlock(&direct_window_list_lock); - dma_addr = direct64->dma_base; + continue; } + + window->device = pdn; + window->prop = direct64; + spin_lock(&direct_window_list_lock); + list_add(&window->list, &direct_window_list); + spin_unlock(&direct_window_list_lock); } - return dma_addr; + return 0; } +machine_arch_initcall(pseries, find_existing_ddw_windows); -static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail, +static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, struct ddw_query_response *query) { struct device_node *dn; @@ -767,15 +775,15 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail, if (pcidn->eeh_pe_config_addr) cfg_addr = pcidn->eeh_pe_config_addr; buid = pcidn->phb->buid; - ret = rtas_call(ddr_avail[0], 3, 5, (u32 *)query, + ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query, cfg_addr, BUID_HI(buid), BUID_LO(buid)); dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x" - " returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid), + " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid), BUID_LO(buid), ret); return ret; } -static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail, +static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, struct ddw_create_response *create, int page_shift, int window_shift) { @@ -800,12 +808,12 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail, do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ - ret = rtas_call(ddr_avail[1], 5, 4, (u32 *)create, cfg_addr, + ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift, window_shift); } while (rtas_busy_delay(ret)); dev_info(&dev->dev, "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " - "(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1], + "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1], cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift, window_shift, ret, create->liobn, create->addr_hi, create->addr_lo); @@ -831,18 +839,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) int page_shift; u64 dma_addr, max_addr; struct device_node *dn; - const u32 *uninitialized_var(ddr_avail); + const u32 *uninitialized_var(ddw_avail); struct direct_window *window; - struct property *uninitialized_var(win64); + struct property *win64; struct dynamic_dma_window_prop *ddwprop; mutex_lock(&direct_window_init_mutex); - dma_addr = dupe_ddw_if_already_created(dev, pdn); - if (dma_addr != 0) - goto out_unlock; - - dma_addr = dupe_ddw_if_kexec(dev, pdn); + dma_addr = find_existing_ddw(pdn); if (dma_addr != 0) goto out_unlock; @@ -854,8 +858,8 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) * for the given node in that order. * the property is actually in the parent, not the PE */ - ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); - if (!ddr_avail || len < 3 * sizeof(u32)) + ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); + if (!ddw_avail || len < 3 * sizeof(u32)) goto out_unlock; /* @@ -865,7 +869,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) * of page sizes: supported and supported for migrate-dma. */ dn = pci_device_to_OF_node(dev); - ret = query_ddw(dev, ddr_avail, &query); + ret = query_ddw(dev, ddw_avail, &query); if (ret != 0) goto out_unlock; @@ -907,13 +911,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) } win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL); win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL); + win64->length = sizeof(*ddwprop); if (!win64->name || !win64->value) { dev_info(&dev->dev, "couldn't allocate property name and value\n"); goto out_free_prop; } - ret = create_ddw(dev, ddr_avail, &create, page_shift, len); + ret = create_ddw(dev, ddw_avail, &create, page_shift, len); if (ret != 0) goto out_free_prop; @@ -1021,13 +1026,16 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) const void *dma_window = NULL; u64 dma_offset; - if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + if (!dev->dma_mask) return -EIO; + if (!dev_is_pci(dev)) + goto check_mask; + + pdev = to_pci_dev(dev); + /* only attempt to use a new window if 64-bit DMA is requested */ if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) { - pdev = to_pci_dev(dev); - dn = pci_device_to_OF_node(pdev); dev_dbg(dev, "node is %s\n", dn->full_name); @@ -1054,12 +1062,17 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) } } - /* fall-through to iommu ops */ - if (!ddw_enabled) { - dev_info(dev, "Using 32-bit DMA via iommu\n"); + /* fall back on iommu ops, restore table pointer with ops */ + if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) { + dev_info(dev, "Restoring 32-bit DMA via iommu\n"); set_dma_ops(dev, &dma_iommu_ops); + pci_dma_dev_setup_pSeriesLP(pdev); } +check_mask: + if (!dma_supported(dev, dma_mask)) + return -EIO; + *dev->dma_mask = dma_mask; return 0; } diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index 77d38a5e2ff..54cf3a4aa16 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -7,15 +7,18 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/kernel.h> +#include <linux/interrupt.h> + #include <asm/machdep.h> #include <asm/page.h> #include <asm/firmware.h> #include <asm/kexec.h> #include <asm/mpic.h> +#include <asm/xics.h> #include <asm/smp.h> #include "pseries.h" -#include "xics.h" #include "plpar_wrappers.h" static void pseries_kexec_cpu_down(int crash_shutdown, int secondary) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index ca5d5898d32..39e6e0a7b2f 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -329,6 +329,8 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, /* Make pHyp happy */ if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU)) hpte_r &= ~_PAGE_COHERENT; + if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) + flags |= H_COALESCE_CAND; lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); if (unlikely(lpar_rc == H_PTEG_FULL)) { @@ -573,7 +575,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) unsigned long i, pix, rc; unsigned long flags = 0; struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); - int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long param[9]; unsigned long va; unsigned long hash, index, shift, hidx, slot; @@ -771,3 +773,47 @@ out: local_irq_restore(flags); } #endif + +/** + * h_get_mpp + * H_GET_MPP hcall returns info in 7 parms + */ +int h_get_mpp(struct hvcall_mpp_data *mpp_data) +{ + int rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9(H_GET_MPP, retbuf); + + mpp_data->entitled_mem = retbuf[0]; + mpp_data->mapped_mem = retbuf[1]; + + mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; + mpp_data->pool_num = retbuf[2] & 0xffff; + + mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; + mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; + mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; + + mpp_data->pool_size = retbuf[4]; + mpp_data->loan_request = retbuf[5]; + mpp_data->backing_mem = retbuf[6]; + + return rc; +} +EXPORT_SYMBOL(h_get_mpp); + +int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data) +{ + int rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 }; + + rc = plpar_hcall9(H_GET_MPP_X, retbuf); + + mpp_x_data->coalesced_bytes = retbuf[0]; + mpp_x_data->pool_coalesced_bytes = retbuf[1]; + mpp_x_data->pool_purr_cycles = retbuf[2]; + mpp_x_data->pool_spurr_cycles = retbuf[3]; + + return rc; +} diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h index d9801117124..4bf21207d7d 100644 --- a/arch/powerpc/platforms/pseries/plpar_wrappers.h +++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h @@ -270,31 +270,4 @@ static inline long plpar_put_term_char(unsigned long termno, unsigned long len, lbuf[1]); } -static inline long plpar_eoi(unsigned long xirr) -{ - return plpar_hcall_norets(H_EOI, xirr); -} - -static inline long plpar_cppr(unsigned long cppr) -{ - return plpar_hcall_norets(H_CPPR, cppr); -} - -static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr) -{ - return plpar_hcall_norets(H_IPI, servernum, mfrr); -} - -static inline long plpar_xirr(unsigned long *xirr_ret, unsigned char cppr) -{ - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - rc = plpar_hcall(H_XIRR, retbuf, cppr); - - *xirr_ret = retbuf[0]; - - return rc; -} - #endif /* _PSERIES_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index c55d7ad9c64..086d2ae4e06 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -122,7 +122,7 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) status = rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, - irq_map[irq].hwirq, + virq_to_hw(irq), RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS, critical, __pa(&ras_log_buf), rtas_get_error_log_max()); @@ -157,7 +157,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) status = rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, - irq_map[irq].hwirq, + virq_to_hw(irq), RTAS_INTERNAL_ERROR, 1 /*Time Critical */, __pa(&ras_log_buf), rtas_get_error_log_max()); @@ -227,7 +227,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) struct rtas_error_log *h, *errhdr = NULL; if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { - printk(KERN_ERR "FWNMI: corrupt r3\n"); + printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); return NULL; } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 6c42cfde841..593acceeff9 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -53,9 +53,9 @@ #include <asm/irq.h> #include <asm/time.h> #include <asm/nvram.h> -#include "xics.h" #include <asm/pmc.h> #include <asm/mpic.h> +#include <asm/xics.h> #include <asm/ppc-pci.h> #include <asm/i8259.h> #include <asm/udbg.h> @@ -205,6 +205,9 @@ static void __init pseries_mpic_init_IRQ(void) mpic_assign_isu(mpic, n, isuaddr); } + /* Setup top-level get_irq */ + ppc_md.get_irq = mpic_get_irq; + /* All ISUs are setup, complete initialization */ mpic_init(mpic); @@ -214,7 +217,7 @@ static void __init pseries_mpic_init_IRQ(void) static void __init pseries_xics_init_IRQ(void) { - xics_init_IRQ(); + xics_init(); pseries_setup_i8259_cascade(); } @@ -238,7 +241,6 @@ static void __init pseries_discover_pic(void) if (strstr(typep, "open-pic")) { pSeries_mpic_node = of_node_get(np); ppc_md.init_IRQ = pseries_mpic_init_IRQ; - ppc_md.get_irq = mpic_get_irq; setup_kexec_cpu_down_mpic(); smp_init_pseries_mpic(); return; @@ -276,6 +278,8 @@ static struct notifier_block pci_dn_reconfig_nb = { .notifier_call = pci_dn_reconfig_notifier, }; +struct kmem_cache *dtl_cache; + #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* * Allocate space for the dispatch trace log for all possible cpus @@ -287,18 +291,12 @@ static int alloc_dispatch_logs(void) int cpu, ret; struct paca_struct *pp; struct dtl_entry *dtl; - struct kmem_cache *dtl_cache; if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return 0; - dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, - DISPATCH_LOG_BYTES, 0, NULL); - if (!dtl_cache) { - pr_warn("Failed to create dispatch trace log buffer cache\n"); - pr_warn("Stolen time statistics will be unreliable\n"); + if (!dtl_cache) return 0; - } for_each_possible_cpu(cpu) { pp = &paca[cpu]; @@ -332,10 +330,27 @@ static int alloc_dispatch_logs(void) return 0; } - -early_initcall(alloc_dispatch_logs); +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ +static inline int alloc_dispatch_logs(void) +{ + return 0; +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +static int alloc_dispatch_log_kmem_cache(void) +{ + dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, + DISPATCH_LOG_BYTES, 0, NULL); + if (!dtl_cache) { + pr_warn("Failed to create dispatch trace log buffer cache\n"); + pr_warn("Stolen time statistics will be unreliable\n"); + return 0; + } + + return alloc_dispatch_logs(); +} +early_initcall(alloc_dispatch_log_kmem_cache); + static void __init pSeries_setup_arch(void) { /* Discover PIC type and setup ppc_md accordingly */ @@ -403,6 +418,16 @@ static int pseries_set_xdabr(unsigned long dabr) #define CMO_CHARACTERISTICS_TOKEN 44 #define CMO_MAXLENGTH 1026 +void pSeries_coalesce_init(void) +{ + struct hvcall_mpp_x_data mpp_x_data; + + if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data)) + powerpc_firmware_features |= FW_FEATURE_XCMO; + else + powerpc_firmware_features &= ~FW_FEATURE_XCMO; +} + /** * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, * handle that here. (Stolen from parse_system_parameter_string) @@ -472,6 +497,7 @@ void pSeries_cmo_feature_init(void) pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP, CMO_SecPSP); powerpc_firmware_features |= FW_FEATURE_CMO; + pSeries_coalesce_init(); } else pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP, CMO_SecPSP); diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index a509c5292a6..fbffd7e47ab 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -44,10 +44,11 @@ #include <asm/mpic.h> #include <asm/vdso_datapage.h> #include <asm/cputhreads.h> +#include <asm/mpic.h> +#include <asm/xics.h> #include "plpar_wrappers.h" #include "pseries.h" -#include "xics.h" #include "offline_states.h" @@ -136,7 +137,6 @@ out: return 1; } -#ifdef CONFIG_XICS static void __devinit smp_xics_setup_cpu(int cpu) { if (cpu != boot_cpuid) @@ -151,14 +151,13 @@ static void __devinit smp_xics_setup_cpu(int cpu) set_default_offline_state(cpu); #endif } -#endif /* CONFIG_XICS */ -static void __devinit smp_pSeries_kick_cpu(int nr) +static int __devinit smp_pSeries_kick_cpu(int nr) { BUG_ON(nr < 0 || nr >= NR_CPUS); if (!smp_startup_cpu(nr)) - return; + return -ENOENT; /* * The processor is currently spinning, waiting for the @@ -180,6 +179,8 @@ static void __devinit smp_pSeries_kick_cpu(int nr) "Ret= %ld\n", nr, rc); } #endif + + return 0; } static int smp_pSeries_cpu_bootable(unsigned int nr) @@ -197,23 +198,22 @@ static int smp_pSeries_cpu_bootable(unsigned int nr) return 1; } -#ifdef CONFIG_MPIC + static struct smp_ops_t pSeries_mpic_smp_ops = { .message_pass = smp_mpic_message_pass, .probe = smp_mpic_probe, .kick_cpu = smp_pSeries_kick_cpu, .setup_cpu = smp_mpic_setup_cpu, }; -#endif -#ifdef CONFIG_XICS + static struct smp_ops_t pSeries_xics_smp_ops = { - .message_pass = smp_xics_message_pass, - .probe = smp_xics_probe, + .message_pass = smp_muxed_ipi_message_pass, + .cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */ + .probe = xics_smp_probe, .kick_cpu = smp_pSeries_kick_cpu, .setup_cpu = smp_xics_setup_cpu, .cpu_bootable = smp_pSeries_cpu_bootable, }; -#endif /* This is called very early */ static void __init smp_init_pseries(void) @@ -245,14 +245,12 @@ static void __init smp_init_pseries(void) pr_debug(" <- smp_init_pSeries()\n"); } -#ifdef CONFIG_MPIC void __init smp_init_pseries_mpic(void) { smp_ops = &pSeries_mpic_smp_ops; smp_init_pseries(); } -#endif void __init smp_init_pseries_xics(void) { diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c deleted file mode 100644 index d6901334d66..00000000000 --- a/arch/powerpc/platforms/pseries/xics.c +++ /dev/null @@ -1,949 +0,0 @@ -/* - * arch/powerpc/platforms/pseries/xics.c - * - * Copyright 2000 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/types.h> -#include <linux/threads.h> -#include <linux/kernel.h> -#include <linux/irq.h> -#include <linux/smp.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/radix-tree.h> -#include <linux/cpu.h> -#include <linux/msi.h> -#include <linux/of.h> -#include <linux/percpu.h> - -#include <asm/firmware.h> -#include <asm/io.h> -#include <asm/pgtable.h> -#include <asm/smp.h> -#include <asm/rtas.h> -#include <asm/hvcall.h> -#include <asm/machdep.h> - -#include "xics.h" -#include "plpar_wrappers.h" - -static struct irq_host *xics_host; - -#define XICS_IPI 2 -#define XICS_IRQ_SPURIOUS 0 - -/* Want a priority other than 0. Various HW issues require this. */ -#define DEFAULT_PRIORITY 5 - -/* - * Mark IPIs as higher priority so we can take them inside interrupts that - * arent marked IRQF_DISABLED - */ -#define IPI_PRIORITY 4 - -/* The least favored priority */ -#define LOWEST_PRIORITY 0xFF - -/* The number of priorities defined above */ -#define MAX_NUM_PRIORITIES 3 - -static unsigned int default_server = 0xFF; -static unsigned int default_distrib_server = 0; -static unsigned int interrupt_server_size = 8; - -/* RTAS service tokens */ -static int ibm_get_xive; -static int ibm_set_xive; -static int ibm_int_on; -static int ibm_int_off; - -struct xics_cppr { - unsigned char stack[MAX_NUM_PRIORITIES]; - int index; -}; - -static DEFINE_PER_CPU(struct xics_cppr, xics_cppr); - -/* Direct hardware low level accessors */ - -/* The part of the interrupt presentation layer that we care about */ -struct xics_ipl { - union { - u32 word; - u8 bytes[4]; - } xirr_poll; - union { - u32 word; - u8 bytes[4]; - } xirr; - u32 dummy; - union { - u32 word; - u8 bytes[4]; - } qirr; -}; - -static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS]; - -static inline unsigned int direct_xirr_info_get(void) -{ - int cpu = smp_processor_id(); - - return in_be32(&xics_per_cpu[cpu]->xirr.word); -} - -static inline void direct_xirr_info_set(unsigned int value) -{ - int cpu = smp_processor_id(); - - out_be32(&xics_per_cpu[cpu]->xirr.word, value); -} - -static inline void direct_cppr_info(u8 value) -{ - int cpu = smp_processor_id(); - - out_8(&xics_per_cpu[cpu]->xirr.bytes[0], value); -} - -static inline void direct_qirr_info(int n_cpu, u8 value) -{ - out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value); -} - - -/* LPAR low level accessors */ - -static inline unsigned int lpar_xirr_info_get(unsigned char cppr) -{ - unsigned long lpar_rc; - unsigned long return_value; - - lpar_rc = plpar_xirr(&return_value, cppr); - if (lpar_rc != H_SUCCESS) - panic(" bad return code xirr - rc = %lx\n", lpar_rc); - return (unsigned int)return_value; -} - -static inline void lpar_xirr_info_set(unsigned int value) -{ - unsigned long lpar_rc; - - lpar_rc = plpar_eoi(value); - if (lpar_rc != H_SUCCESS) - panic("bad return code EOI - rc = %ld, value=%x\n", lpar_rc, - value); -} - -static inline void lpar_cppr_info(u8 value) -{ - unsigned long lpar_rc; - - lpar_rc = plpar_cppr(value); - if (lpar_rc != H_SUCCESS) - panic("bad return code cppr - rc = %lx\n", lpar_rc); -} - -static inline void lpar_qirr_info(int n_cpu , u8 value) -{ - unsigned long lpar_rc; - - lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu), value); - if (lpar_rc != H_SUCCESS) - panic("bad return code qirr - rc = %lx\n", lpar_rc); -} - - -/* Interface to generic irq subsystem */ - -#ifdef CONFIG_SMP -/* - * For the moment we only implement delivery to all cpus or one cpu. - * - * If the requested affinity is cpu_all_mask, we set global affinity. - * If not we set it to the first cpu in the mask, even if multiple cpus - * are set. This is so things like irqbalance (which set core and package - * wide affinities) do the right thing. - */ -static int get_irq_server(unsigned int virq, const struct cpumask *cpumask, - unsigned int strict_check) -{ - - if (!distribute_irqs) - return default_server; - - if (!cpumask_subset(cpu_possible_mask, cpumask)) { - int server = cpumask_first_and(cpu_online_mask, cpumask); - - if (server < nr_cpu_ids) - return get_hard_smp_processor_id(server); - - if (strict_check) - return -1; - } - - /* - * Workaround issue with some versions of JS20 firmware that - * deliver interrupts to cpus which haven't been started. This - * happens when using the maxcpus= boot option. - */ - if (cpumask_equal(cpu_online_mask, cpu_present_mask)) - return default_distrib_server; - - return default_server; -} -#else -#define get_irq_server(virq, cpumask, strict_check) (default_server) -#endif - -static void xics_unmask_irq(struct irq_data *d) -{ - unsigned int hwirq; - int call_status; - int server; - - pr_devel("xics: unmask virq %d\n", d->irq); - - hwirq = (unsigned int)irq_map[d->irq].hwirq; - pr_devel(" -> map to hwirq 0x%x\n", hwirq); - if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS) - return; - - server = get_irq_server(d->irq, d->affinity, 0); - - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq, server, - DEFAULT_PRIORITY); - if (call_status != 0) { - printk(KERN_ERR - "%s: ibm_set_xive irq %u server %x returned %d\n", - __func__, hwirq, server, call_status); - return; - } - - /* Now unmask the interrupt (often a no-op) */ - call_status = rtas_call(ibm_int_on, 1, 1, NULL, hwirq); - if (call_status != 0) { - printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n", - __func__, hwirq, call_status); - return; - } -} - -static unsigned int xics_startup(struct irq_data *d) -{ - /* - * The generic MSI code returns with the interrupt disabled on the - * card, using the MSI mask bits. Firmware doesn't appear to unmask - * at that level, so we do it here by hand. - */ - if (d->msi_desc) - unmask_msi_irq(d); - - /* unmask it */ - xics_unmask_irq(d); - return 0; -} - -static void xics_mask_real_irq(unsigned int hwirq) -{ - int call_status; - - if (hwirq == XICS_IPI) - return; - - call_status = rtas_call(ibm_int_off, 1, 1, NULL, hwirq); - if (call_status != 0) { - printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n", - __func__, hwirq, call_status); - return; - } - - /* Have to set XIVE to 0xff to be able to remove a slot */ - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq, - default_server, 0xff); - if (call_status != 0) { - printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n", - __func__, hwirq, call_status); - return; - } -} - -static void xics_mask_irq(struct irq_data *d) -{ - unsigned int hwirq; - - pr_devel("xics: mask virq %d\n", d->irq); - - hwirq = (unsigned int)irq_map[d->irq].hwirq; - if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS) - return; - xics_mask_real_irq(hwirq); -} - -static void xics_mask_unknown_vec(unsigned int vec) -{ - printk(KERN_ERR "Interrupt %u (real) is invalid, disabling it.\n", vec); - xics_mask_real_irq(vec); -} - -static inline unsigned int xics_xirr_vector(unsigned int xirr) -{ - /* - * The top byte is the old cppr, to be restored on EOI. - * The remaining 24 bits are the vector. - */ - return xirr & 0x00ffffff; -} - -static void push_cppr(unsigned int vec) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - - if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1)) - return; - - if (vec == XICS_IPI) - os_cppr->stack[++os_cppr->index] = IPI_PRIORITY; - else - os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY; -} - -static unsigned int xics_get_irq_direct(void) -{ - unsigned int xirr = direct_xirr_info_get(); - unsigned int vec = xics_xirr_vector(xirr); - unsigned int irq; - - if (vec == XICS_IRQ_SPURIOUS) - return NO_IRQ; - - irq = irq_radix_revmap_lookup(xics_host, vec); - if (likely(irq != NO_IRQ)) { - push_cppr(vec); - return irq; - } - - /* We don't have a linux mapping, so have rtas mask it. */ - xics_mask_unknown_vec(vec); - - /* We might learn about it later, so EOI it */ - direct_xirr_info_set(xirr); - return NO_IRQ; -} - -static unsigned int xics_get_irq_lpar(void) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - unsigned int xirr = lpar_xirr_info_get(os_cppr->stack[os_cppr->index]); - unsigned int vec = xics_xirr_vector(xirr); - unsigned int irq; - - if (vec == XICS_IRQ_SPURIOUS) - return NO_IRQ; - - irq = irq_radix_revmap_lookup(xics_host, vec); - if (likely(irq != NO_IRQ)) { - push_cppr(vec); - return irq; - } - - /* We don't have a linux mapping, so have RTAS mask it. */ - xics_mask_unknown_vec(vec); - - /* We might learn about it later, so EOI it */ - lpar_xirr_info_set(xirr); - return NO_IRQ; -} - -static unsigned char pop_cppr(void) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - - if (WARN_ON(os_cppr->index < 1)) - return LOWEST_PRIORITY; - - return os_cppr->stack[--os_cppr->index]; -} - -static void xics_eoi_direct(struct irq_data *d) -{ - unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq; - - iosync(); - direct_xirr_info_set((pop_cppr() << 24) | hwirq); -} - -static void xics_eoi_lpar(struct irq_data *d) -{ - unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq; - - iosync(); - lpar_xirr_info_set((pop_cppr() << 24) | hwirq); -} - -static int -xics_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force) -{ - unsigned int hwirq; - int status; - int xics_status[2]; - int irq_server; - - hwirq = (unsigned int)irq_map[d->irq].hwirq; - if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS) - return -1; - - status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq); - - if (status) { - printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", - __func__, hwirq, status); - return -1; - } - - irq_server = get_irq_server(d->irq, cpumask, 1); - if (irq_server == -1) { - char cpulist[128]; - cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); - printk(KERN_WARNING - "%s: No online cpus in the mask %s for irq %d\n", - __func__, cpulist, d->irq); - return -1; - } - - status = rtas_call(ibm_set_xive, 3, 1, NULL, - hwirq, irq_server, xics_status[1]); - - if (status) { - printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", - __func__, hwirq, status); - return -1; - } - - return 0; -} - -static struct irq_chip xics_pic_direct = { - .name = "XICS", - .irq_startup = xics_startup, - .irq_mask = xics_mask_irq, - .irq_unmask = xics_unmask_irq, - .irq_eoi = xics_eoi_direct, - .irq_set_affinity = xics_set_affinity -}; - -static struct irq_chip xics_pic_lpar = { - .name = "XICS", - .irq_startup = xics_startup, - .irq_mask = xics_mask_irq, - .irq_unmask = xics_unmask_irq, - .irq_eoi = xics_eoi_lpar, - .irq_set_affinity = xics_set_affinity -}; - - -/* Interface to arch irq controller subsystem layer */ - -/* Points to the irq_chip we're actually using */ -static struct irq_chip *xics_irq_chip; - -static int xics_host_match(struct irq_host *h, struct device_node *node) -{ - /* IBM machines have interrupt parents of various funky types for things - * like vdevices, events, etc... The trick we use here is to match - * everything here except the legacy 8259 which is compatible "chrp,iic" - */ - return !of_device_is_compatible(node, "chrp,iic"); -} - -static int xics_host_map(struct irq_host *h, unsigned int virq, - irq_hw_number_t hw) -{ - pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw); - - /* Insert the interrupt mapping into the radix tree for fast lookup */ - irq_radix_revmap_insert(xics_host, virq, hw); - - irq_set_status_flags(virq, IRQ_LEVEL); - irq_set_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq); - return 0; -} - -static int xics_host_xlate(struct irq_host *h, struct device_node *ct, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_flags) - -{ - /* Current xics implementation translates everything - * to level. It is not technically right for MSIs but this - * is irrelevant at this point. We might get smarter in the future - */ - *out_hwirq = intspec[0]; - *out_flags = IRQ_TYPE_LEVEL_LOW; - - return 0; -} - -static struct irq_host_ops xics_host_ops = { - .match = xics_host_match, - .map = xics_host_map, - .xlate = xics_host_xlate, -}; - -static void __init xics_init_host(void) -{ - if (firmware_has_feature(FW_FEATURE_LPAR)) - xics_irq_chip = &xics_pic_lpar; - else - xics_irq_chip = &xics_pic_direct; - - xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops, - XICS_IRQ_SPURIOUS); - BUG_ON(xics_host == NULL); - irq_set_default_host(xics_host); -} - - -/* Inter-processor interrupt support */ - -#ifdef CONFIG_SMP -/* - * XICS only has a single IPI, so encode the messages per CPU - */ -static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message); - -static inline void smp_xics_do_message(int cpu, int msg) -{ - unsigned long *tgt = &per_cpu(xics_ipi_message, cpu); - - set_bit(msg, tgt); - mb(); - if (firmware_has_feature(FW_FEATURE_LPAR)) - lpar_qirr_info(cpu, IPI_PRIORITY); - else - direct_qirr_info(cpu, IPI_PRIORITY); -} - -void smp_xics_message_pass(int target, int msg) -{ - unsigned int i; - - if (target < NR_CPUS) { - smp_xics_do_message(target, msg); - } else { - for_each_online_cpu(i) { - if (target == MSG_ALL_BUT_SELF - && i == smp_processor_id()) - continue; - smp_xics_do_message(i, msg); - } - } -} - -static irqreturn_t xics_ipi_dispatch(int cpu) -{ - unsigned long *tgt = &per_cpu(xics_ipi_message, cpu); - - mb(); /* order mmio clearing qirr */ - while (*tgt) { - if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) { - smp_message_recv(PPC_MSG_CALL_FUNCTION); - } - if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) { - smp_message_recv(PPC_MSG_RESCHEDULE); - } - if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) { - smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE); - } -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) - if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) { - smp_message_recv(PPC_MSG_DEBUGGER_BREAK); - } -#endif - } - return IRQ_HANDLED; -} - -static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id) -{ - int cpu = smp_processor_id(); - - direct_qirr_info(cpu, 0xff); - - return xics_ipi_dispatch(cpu); -} - -static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id) -{ - int cpu = smp_processor_id(); - - lpar_qirr_info(cpu, 0xff); - - return xics_ipi_dispatch(cpu); -} - -static void xics_request_ipi(void) -{ - unsigned int ipi; - int rc; - - ipi = irq_create_mapping(xics_host, XICS_IPI); - BUG_ON(ipi == NO_IRQ); - - /* - * IPIs are marked IRQF_DISABLED as they must run with irqs - * disabled - */ - irq_set_handler(ipi, handle_percpu_irq); - if (firmware_has_feature(FW_FEATURE_LPAR)) - rc = request_irq(ipi, xics_ipi_action_lpar, - IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL); - else - rc = request_irq(ipi, xics_ipi_action_direct, - IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL); - BUG_ON(rc); -} - -int __init smp_xics_probe(void) -{ - xics_request_ipi(); - - return cpumask_weight(cpu_possible_mask); -} - -#endif /* CONFIG_SMP */ - - -/* Initialization */ - -static void xics_update_irq_servers(void) -{ - int i, j; - struct device_node *np; - u32 ilen; - const u32 *ireg; - u32 hcpuid; - - /* Find the server numbers for the boot cpu. */ - np = of_get_cpu_node(boot_cpuid, NULL); - BUG_ON(!np); - - ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen); - if (!ireg) { - of_node_put(np); - return; - } - - i = ilen / sizeof(int); - hcpuid = get_hard_smp_processor_id(boot_cpuid); - - /* Global interrupt distribution server is specified in the last - * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last - * entry fom this property for current boot cpu id and use it as - * default distribution server - */ - for (j = 0; j < i; j += 2) { - if (ireg[j] == hcpuid) { - default_server = hcpuid; - default_distrib_server = ireg[j+1]; - } - } - - of_node_put(np); -} - -static void __init xics_map_one_cpu(int hw_id, unsigned long addr, - unsigned long size) -{ - int i; - - /* This may look gross but it's good enough for now, we don't quite - * have a hard -> linux processor id matching. - */ - for_each_possible_cpu(i) { - if (!cpu_present(i)) - continue; - if (hw_id == get_hard_smp_processor_id(i)) { - xics_per_cpu[i] = ioremap(addr, size); - return; - } - } -} - -static void __init xics_init_one_node(struct device_node *np, - unsigned int *indx) -{ - unsigned int ilen; - const u32 *ireg; - - /* This code does the theorically broken assumption that the interrupt - * server numbers are the same as the hard CPU numbers. - * This happens to be the case so far but we are playing with fire... - * should be fixed one of these days. -BenH. - */ - ireg = of_get_property(np, "ibm,interrupt-server-ranges", NULL); - - /* Do that ever happen ? we'll know soon enough... but even good'old - * f80 does have that property .. - */ - WARN_ON(ireg == NULL); - if (ireg) { - /* - * set node starting index for this node - */ - *indx = *ireg; - } - ireg = of_get_property(np, "reg", &ilen); - if (!ireg) - panic("xics_init_IRQ: can't find interrupt reg property"); - - while (ilen >= (4 * sizeof(u32))) { - unsigned long addr, size; - - /* XXX Use proper OF parsing code here !!! */ - addr = (unsigned long)*ireg++ << 32; - ilen -= sizeof(u32); - addr |= *ireg++; - ilen -= sizeof(u32); - size = (unsigned long)*ireg++ << 32; - ilen -= sizeof(u32); - size |= *ireg++; - ilen -= sizeof(u32); - xics_map_one_cpu(*indx, addr, size); - (*indx)++; - } -} - -void __init xics_init_IRQ(void) -{ - struct device_node *np; - u32 indx = 0; - int found = 0; - const u32 *isize; - - ppc64_boot_msg(0x20, "XICS Init"); - - ibm_get_xive = rtas_token("ibm,get-xive"); - ibm_set_xive = rtas_token("ibm,set-xive"); - ibm_int_on = rtas_token("ibm,int-on"); - ibm_int_off = rtas_token("ibm,int-off"); - - for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") { - found = 1; - if (firmware_has_feature(FW_FEATURE_LPAR)) { - of_node_put(np); - break; - } - xics_init_one_node(np, &indx); - } - if (found == 0) - return; - - /* get the bit size of server numbers */ - found = 0; - - for_each_compatible_node(np, NULL, "ibm,ppc-xics") { - isize = of_get_property(np, "ibm,interrupt-server#-size", NULL); - - if (!isize) - continue; - - if (!found) { - interrupt_server_size = *isize; - found = 1; - } else if (*isize != interrupt_server_size) { - printk(KERN_WARNING "XICS: " - "mismatched ibm,interrupt-server#-size\n"); - interrupt_server_size = max(*isize, - interrupt_server_size); - } - } - - xics_update_irq_servers(); - xics_init_host(); - - if (firmware_has_feature(FW_FEATURE_LPAR)) - ppc_md.get_irq = xics_get_irq_lpar; - else - ppc_md.get_irq = xics_get_irq_direct; - - xics_setup_cpu(); - - ppc64_boot_msg(0x21, "XICS Done"); -} - -/* Cpu startup, shutdown, and hotplug */ - -static void xics_set_cpu_priority(unsigned char cppr) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - - /* - * we only really want to set the priority when there's - * just one cppr value on the stack - */ - WARN_ON(os_cppr->index != 0); - - os_cppr->stack[0] = cppr; - - if (firmware_has_feature(FW_FEATURE_LPAR)) - lpar_cppr_info(cppr); - else - direct_cppr_info(cppr); - iosync(); -} - -/* Have the calling processor join or leave the specified global queue */ -static void xics_set_cpu_giq(unsigned int gserver, unsigned int join) -{ - int index; - int status; - - if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL)) - return; - - index = (1UL << interrupt_server_size) - 1 - gserver; - - status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join); - - WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n", - GLOBAL_INTERRUPT_QUEUE, index, join, status); -} - -void xics_setup_cpu(void) -{ - xics_set_cpu_priority(LOWEST_PRIORITY); - - xics_set_cpu_giq(default_distrib_server, 1); -} - -void xics_teardown_cpu(void) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - int cpu = smp_processor_id(); - - /* - * we have to reset the cppr index to 0 because we're - * not going to return from the IPI - */ - os_cppr->index = 0; - xics_set_cpu_priority(0); - - /* Clear any pending IPI request */ - if (firmware_has_feature(FW_FEATURE_LPAR)) - lpar_qirr_info(cpu, 0xff); - else - direct_qirr_info(cpu, 0xff); -} - -void xics_kexec_teardown_cpu(int secondary) -{ - xics_teardown_cpu(); - - /* - * we take the ipi irq but and never return so we - * need to EOI the IPI, but want to leave our priority 0 - * - * should we check all the other interrupts too? - * should we be flagging idle loop instead? - * or creating some task to be scheduled? - */ - - if (firmware_has_feature(FW_FEATURE_LPAR)) - lpar_xirr_info_set((0x00 << 24) | XICS_IPI); - else - direct_xirr_info_set((0x00 << 24) | XICS_IPI); - - /* - * Some machines need to have at least one cpu in the GIQ, - * so leave the master cpu in the group. - */ - if (secondary) - xics_set_cpu_giq(default_distrib_server, 0); -} - -#ifdef CONFIG_HOTPLUG_CPU - -/* Interrupts are disabled. */ -void xics_migrate_irqs_away(void) -{ - int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id(); - int virq; - - /* If we used to be the default server, move to the new "boot_cpuid" */ - if (hw_cpu == default_server) - xics_update_irq_servers(); - - /* Reject any interrupt that was queued to us... */ - xics_set_cpu_priority(0); - - /* Remove ourselves from the global interrupt queue */ - xics_set_cpu_giq(default_distrib_server, 0); - - /* Allow IPIs again... */ - xics_set_cpu_priority(DEFAULT_PRIORITY); - - for_each_irq(virq) { - struct irq_desc *desc; - struct irq_chip *chip; - unsigned int hwirq; - int xics_status[2]; - int status; - unsigned long flags; - - /* We can't set affinity on ISA interrupts */ - if (virq < NUM_ISA_INTERRUPTS) - continue; - if (irq_map[virq].host != xics_host) - continue; - hwirq = (unsigned int)irq_map[virq].hwirq; - /* We need to get IPIs still. */ - if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS) - continue; - - desc = irq_to_desc(virq); - - /* We only need to migrate enabled IRQS */ - if (desc == NULL || desc->action == NULL) - continue; - - chip = irq_desc_get_chip(desc); - if (chip == NULL || chip->irq_set_affinity == NULL) - continue; - - raw_spin_lock_irqsave(&desc->lock, flags); - - status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq); - if (status) { - printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", - __func__, hwirq, status); - goto unlock; - } - - /* - * We only support delivery to all cpus or to one cpu. - * The irq has to be migrated only in the single cpu - * case. - */ - if (xics_status[0] != hw_cpu) - goto unlock; - - /* This is expected during cpu offline. */ - if (cpu_online(cpu)) - printk(KERN_WARNING "IRQ %u affinity broken off cpu %u\n", - virq, cpu); - - /* Reset affinity to all cpus */ - cpumask_setall(desc->irq_data.affinity); - chip->irq_set_affinity(&desc->irq_data, cpu_all_mask, true); -unlock: - raw_spin_unlock_irqrestore(&desc->lock, flags); - } -} -#endif diff --git a/arch/powerpc/platforms/pseries/xics.h b/arch/powerpc/platforms/pseries/xics.h deleted file mode 100644 index d1d5a83039a..00000000000 --- a/arch/powerpc/platforms/pseries/xics.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * arch/powerpc/platforms/pseries/xics.h - * - * Copyright 2000 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _POWERPC_KERNEL_XICS_H -#define _POWERPC_KERNEL_XICS_H - -extern void xics_init_IRQ(void); -extern void xics_setup_cpu(void); -extern void xics_teardown_cpu(void); -extern void xics_kexec_teardown_cpu(int secondary); -extern void xics_migrate_irqs_away(void); -extern int smp_xics_probe(void); -extern void smp_xics_message_pass(int target, int msg); - -#endif /* _POWERPC_KERNEL_XICS_H */ diff --git a/arch/powerpc/platforms/wsp/Kconfig b/arch/powerpc/platforms/wsp/Kconfig new file mode 100644 index 00000000000..c3c48eb62cc --- /dev/null +++ b/arch/powerpc/platforms/wsp/Kconfig @@ -0,0 +1,28 @@ +config PPC_WSP + bool + default n + +menu "WSP platform selection" + depends on PPC_BOOK3E_64 + +config PPC_PSR2 + bool "PSR-2 platform" + select PPC_A2 + select GENERIC_TBSYNC + select PPC_SCOM + select EPAPR_BOOT + select PPC_WSP + select PPC_XICS + select PPC_ICP_NATIVE + default y + +endmenu + +config PPC_A2_DD2 + bool "Support for DD2 based A2/WSP systems" + depends on PPC_A2 + +config WORKAROUND_ERRATUM_463 + depends on PPC_A2_DD2 + bool "Workaround erratum 463" + default y diff --git a/arch/powerpc/platforms/wsp/Makefile b/arch/powerpc/platforms/wsp/Makefile new file mode 100644 index 00000000000..095be73d6cd --- /dev/null +++ b/arch/powerpc/platforms/wsp/Makefile @@ -0,0 +1,6 @@ +ccflags-y += -mno-minimal-toc + +obj-y += setup.o ics.o +obj-$(CONFIG_PPC_PSR2) += psr2.o opb_pic.o +obj-$(CONFIG_PPC_WSP) += scom_wsp.o +obj-$(CONFIG_SMP) += smp.o scom_smp.o diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c new file mode 100644 index 00000000000..e53bd9e7b12 --- /dev/null +++ b/arch/powerpc/platforms/wsp/ics.c @@ -0,0 +1,712 @@ +/* + * Copyright 2008-2011 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/msi.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/xics.h> + +#include "wsp.h" +#include "ics.h" + + +/* WSP ICS */ + +struct wsp_ics { + struct ics ics; + struct device_node *dn; + void __iomem *regs; + spinlock_t lock; + unsigned long *bitmap; + u32 chip_id; + u32 lsi_base; + u32 lsi_count; + u64 hwirq_start; + u64 count; +#ifdef CONFIG_SMP + int *hwirq_cpu_map; +#endif +}; + +#define to_wsp_ics(ics) container_of(ics, struct wsp_ics, ics) + +#define INT_SRC_LAYER_BUID_REG(base) ((base) + 0x00) +#define IODA_TBL_ADDR_REG(base) ((base) + 0x18) +#define IODA_TBL_DATA_REG(base) ((base) + 0x20) +#define XIVE_UPDATE_REG(base) ((base) + 0x28) +#define ICS_INT_CAPS_REG(base) ((base) + 0x30) + +#define TBL_AUTO_INCREMENT ((1UL << 63) | (1UL << 15)) +#define TBL_SELECT_XIST (1UL << 48) +#define TBL_SELECT_XIVT (1UL << 49) + +#define IODA_IRQ(irq) ((irq) & (0x7FFULL)) /* HRM 5.1.3.4 */ + +#define XIST_REQUIRED 0x8 +#define XIST_REJECTED 0x4 +#define XIST_PRESENTED 0x2 +#define XIST_PENDING 0x1 + +#define XIVE_SERVER_SHIFT 42 +#define XIVE_SERVER_MASK 0xFFFFULL +#define XIVE_PRIORITY_MASK 0xFFULL +#define XIVE_PRIORITY_SHIFT 32 +#define XIVE_WRITE_ENABLE (1ULL << 63) + +/* + * The docs refer to a 6 bit field called ChipID, which consists of a + * 3 bit NodeID and a 3 bit ChipID. On WSP the ChipID is always zero + * so we ignore it, and every where we use "chip id" in this code we + * mean the NodeID. + */ +#define WSP_ICS_CHIP_SHIFT 17 + + +static struct wsp_ics *ics_list; +static int num_ics; + +/* ICS Source controller accessors */ + +static u64 wsp_ics_get_xive(struct wsp_ics *ics, unsigned int irq) +{ + unsigned long flags; + u64 xive; + + spin_lock_irqsave(&ics->lock, flags); + out_be64(IODA_TBL_ADDR_REG(ics->regs), TBL_SELECT_XIVT | IODA_IRQ(irq)); + xive = in_be64(IODA_TBL_DATA_REG(ics->regs)); + spin_unlock_irqrestore(&ics->lock, flags); + + return xive; +} + +static void wsp_ics_set_xive(struct wsp_ics *ics, unsigned int irq, u64 xive) +{ + xive &= ~XIVE_ADDR_MASK; + xive |= (irq & XIVE_ADDR_MASK); + xive |= XIVE_WRITE_ENABLE; + + out_be64(XIVE_UPDATE_REG(ics->regs), xive); +} + +static u64 xive_set_server(u64 xive, unsigned int server) +{ + u64 mask = ~(XIVE_SERVER_MASK << XIVE_SERVER_SHIFT); + + xive &= mask; + xive |= (server & XIVE_SERVER_MASK) << XIVE_SERVER_SHIFT; + + return xive; +} + +static u64 xive_set_priority(u64 xive, unsigned int priority) +{ + u64 mask = ~(XIVE_PRIORITY_MASK << XIVE_PRIORITY_SHIFT); + + xive &= mask; + xive |= (priority & XIVE_PRIORITY_MASK) << XIVE_PRIORITY_SHIFT; + + return xive; +} + + +#ifdef CONFIG_SMP +/* Find logical CPUs within mask on a given chip and store result in ret */ +void cpus_on_chip(int chip_id, cpumask_t *mask, cpumask_t *ret) +{ + int cpu, chip; + struct device_node *cpu_dn, *dn; + const u32 *prop; + + cpumask_clear(ret); + for_each_cpu(cpu, mask) { + cpu_dn = of_get_cpu_node(cpu, NULL); + if (!cpu_dn) + continue; + + prop = of_get_property(cpu_dn, "at-node", NULL); + if (!prop) { + of_node_put(cpu_dn); + continue; + } + + dn = of_find_node_by_phandle(*prop); + of_node_put(cpu_dn); + + chip = wsp_get_chip_id(dn); + if (chip == chip_id) + cpumask_set_cpu(cpu, ret); + + of_node_put(dn); + } +} + +/* Store a suitable CPU to handle a hwirq in the ics->hwirq_cpu_map cache */ +static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq, + const cpumask_t *affinity) +{ + cpumask_var_t avail, newmask; + int ret = -ENOMEM, cpu, cpu_rover = 0, target; + int index = hwirq - ics->hwirq_start; + unsigned int nodeid; + + BUG_ON(index < 0 || index >= ics->count); + + if (!ics->hwirq_cpu_map) + return -ENOMEM; + + if (!distribute_irqs) { + ics->hwirq_cpu_map[hwirq - ics->hwirq_start] = xics_default_server; + return 0; + } + + /* Allocate needed CPU masks */ + if (!alloc_cpumask_var(&avail, GFP_KERNEL)) + goto ret; + if (!alloc_cpumask_var(&newmask, GFP_KERNEL)) + goto freeavail; + + /* Find PBus attached to the source of this IRQ */ + nodeid = (hwirq >> WSP_ICS_CHIP_SHIFT) & 0x3; /* 12:14 */ + + /* Find CPUs that could handle this IRQ */ + if (affinity) + cpumask_and(avail, cpu_online_mask, affinity); + else + cpumask_copy(avail, cpu_online_mask); + + /* Narrow selection down to logical CPUs on the same chip */ + cpus_on_chip(nodeid, avail, newmask); + + /* Ensure we haven't narrowed it down to 0 */ + if (unlikely(cpumask_empty(newmask))) { + if (unlikely(cpumask_empty(avail))) { + ret = -1; + goto out; + } + cpumask_copy(newmask, avail); + } + + /* Choose a CPU out of those we narrowed it down to in round robin */ + target = hwirq % cpumask_weight(newmask); + for_each_cpu(cpu, newmask) { + if (cpu_rover++ >= target) { + ics->hwirq_cpu_map[index] = get_hard_smp_processor_id(cpu); + ret = 0; + goto out; + } + } + + /* Shouldn't happen */ + WARN_ON(1); + +out: + free_cpumask_var(newmask); +freeavail: + free_cpumask_var(avail); +ret: + if (ret < 0) { + ics->hwirq_cpu_map[index] = cpumask_first(cpu_online_mask); + pr_warning("Error, falling hwirq 0x%x routing back to CPU %i\n", + hwirq, ics->hwirq_cpu_map[index]); + } + return ret; +} + +static void alloc_irq_map(struct wsp_ics *ics) +{ + int i; + + ics->hwirq_cpu_map = kmalloc(sizeof(int) * ics->count, GFP_KERNEL); + if (!ics->hwirq_cpu_map) { + pr_warning("Allocate hwirq_cpu_map failed, " + "IRQ balancing disabled\n"); + return; + } + + for (i=0; i < ics->count; i++) + ics->hwirq_cpu_map[i] = xics_default_server; +} + +static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq) +{ + int index = hwirq - ics->hwirq_start; + + BUG_ON(index < 0 || index >= ics->count); + + if (!ics->hwirq_cpu_map) + return xics_default_server; + + return ics->hwirq_cpu_map[index]; +} +#else /* !CONFIG_SMP */ +static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq, + const cpumask_t *affinity) +{ + return 0; +} + +static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq) +{ + return xics_default_server; +} + +static void alloc_irq_map(struct wsp_ics *ics) { } +#endif + +static void wsp_chip_unmask_irq(struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + struct wsp_ics *ics; + int server; + u64 xive; + + if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) + return; + + ics = d->chip_data; + if (WARN_ON(!ics)) + return; + + server = get_irq_server(ics, hw_irq); + + xive = wsp_ics_get_xive(ics, hw_irq); + xive = xive_set_server(xive, server); + xive = xive_set_priority(xive, DEFAULT_PRIORITY); + wsp_ics_set_xive(ics, hw_irq, xive); +} + +static unsigned int wsp_chip_startup(struct irq_data *d) +{ + /* unmask it */ + wsp_chip_unmask_irq(d); + return 0; +} + +static void wsp_mask_real_irq(unsigned int hw_irq, struct wsp_ics *ics) +{ + u64 xive; + + if (hw_irq == XICS_IPI) + return; + + if (WARN_ON(!ics)) + return; + xive = wsp_ics_get_xive(ics, hw_irq); + xive = xive_set_server(xive, xics_default_server); + xive = xive_set_priority(xive, LOWEST_PRIORITY); + wsp_ics_set_xive(ics, hw_irq, xive); +} + +static void wsp_chip_mask_irq(struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + struct wsp_ics *ics = d->chip_data; + + if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) + return; + + wsp_mask_real_irq(hw_irq, ics); +} + +static int wsp_chip_set_affinity(struct irq_data *d, + const struct cpumask *cpumask, bool force) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + struct wsp_ics *ics; + int ret; + u64 xive; + + if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) + return -1; + + ics = d->chip_data; + if (WARN_ON(!ics)) + return -1; + xive = wsp_ics_get_xive(ics, hw_irq); + + /* + * For the moment only implement delivery to all cpus or one cpu. + * Get current irq_server for the given irq + */ + ret = cache_hwirq_map(ics, d->irq, cpumask); + if (ret == -1) { + char cpulist[128]; + cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); + pr_warning("%s: No online cpus in the mask %s for irq %d\n", + __func__, cpulist, d->irq); + return -1; + } else if (ret == -ENOMEM) { + pr_warning("%s: Out of memory\n", __func__); + return -1; + } + + xive = xive_set_server(xive, get_irq_server(ics, hw_irq)); + wsp_ics_set_xive(ics, hw_irq, xive); + + return 0; +} + +static struct irq_chip wsp_irq_chip = { + .name = "WSP ICS", + .irq_startup = wsp_chip_startup, + .irq_mask = wsp_chip_mask_irq, + .irq_unmask = wsp_chip_unmask_irq, + .irq_set_affinity = wsp_chip_set_affinity +}; + +static int wsp_ics_host_match(struct ics *ics, struct device_node *dn) +{ + /* All ICSs in the system implement a global irq number space, + * so match against them all. */ + return of_device_is_compatible(dn, "ibm,ppc-xics"); +} + +static int wsp_ics_match_hwirq(struct wsp_ics *wsp_ics, unsigned int hwirq) +{ + if (hwirq >= wsp_ics->hwirq_start && + hwirq < wsp_ics->hwirq_start + wsp_ics->count) + return 1; + + return 0; +} + +static int wsp_ics_map(struct ics *ics, unsigned int virq) +{ + struct wsp_ics *wsp_ics = to_wsp_ics(ics); + unsigned int hw_irq = virq_to_hw(virq); + unsigned long flags; + + if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) + return -ENOENT; + + irq_set_chip_and_handler(virq, &wsp_irq_chip, handle_fasteoi_irq); + + irq_set_chip_data(virq, wsp_ics); + + spin_lock_irqsave(&wsp_ics->lock, flags); + bitmap_allocate_region(wsp_ics->bitmap, hw_irq - wsp_ics->hwirq_start, 0); + spin_unlock_irqrestore(&wsp_ics->lock, flags); + + return 0; +} + +static void wsp_ics_mask_unknown(struct ics *ics, unsigned long hw_irq) +{ + struct wsp_ics *wsp_ics = to_wsp_ics(ics); + + if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) + return; + + pr_err("%s: IRQ %lu (real) is invalid, disabling it.\n", __func__, hw_irq); + wsp_mask_real_irq(hw_irq, wsp_ics); +} + +static long wsp_ics_get_server(struct ics *ics, unsigned long hw_irq) +{ + struct wsp_ics *wsp_ics = to_wsp_ics(ics); + + if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) + return -ENOENT; + + return get_irq_server(wsp_ics, hw_irq); +} + +/* HW Number allocation API */ + +static struct wsp_ics *wsp_ics_find_dn_ics(struct device_node *dn) +{ + struct device_node *iparent; + int i; + + iparent = of_irq_find_parent(dn); + if (!iparent) { + pr_err("wsp_ics: Failed to find interrupt parent!\n"); + return NULL; + } + + for(i = 0; i < num_ics; i++) { + if(ics_list[i].dn == iparent) + break; + } + + if (i >= num_ics) { + pr_err("wsp_ics: Unable to find parent bitmap!\n"); + return NULL; + } + + return &ics_list[i]; +} + +int wsp_ics_alloc_irq(struct device_node *dn, int num) +{ + struct wsp_ics *ics; + int order, offset; + + ics = wsp_ics_find_dn_ics(dn); + if (!ics) + return -ENODEV; + + /* Fast, but overly strict if num isn't a power of two */ + order = get_count_order(num); + + spin_lock_irq(&ics->lock); + offset = bitmap_find_free_region(ics->bitmap, ics->count, order); + spin_unlock_irq(&ics->lock); + + if (offset < 0) + return offset; + + return offset + ics->hwirq_start; +} + +void wsp_ics_free_irq(struct device_node *dn, unsigned int irq) +{ + struct wsp_ics *ics; + + ics = wsp_ics_find_dn_ics(dn); + if (WARN_ON(!ics)) + return; + + spin_lock_irq(&ics->lock); + bitmap_release_region(ics->bitmap, irq, 0); + spin_unlock_irq(&ics->lock); +} + +/* Initialisation */ + +static int __init wsp_ics_bitmap_setup(struct wsp_ics *ics, + struct device_node *dn) +{ + int len, i, j, size; + u32 start, count; + const u32 *p; + + size = BITS_TO_LONGS(ics->count) * sizeof(long); + ics->bitmap = kzalloc(size, GFP_KERNEL); + if (!ics->bitmap) { + pr_err("wsp_ics: ENOMEM allocating IRQ bitmap!\n"); + return -ENOMEM; + } + + spin_lock_init(&ics->lock); + + p = of_get_property(dn, "available-ranges", &len); + if (!p || !len) { + /* FIXME this should be a WARN() once mambo is updated */ + pr_err("wsp_ics: No available-ranges defined for %s\n", + dn->full_name); + return 0; + } + + if (len % (2 * sizeof(u32)) != 0) { + /* FIXME this should be a WARN() once mambo is updated */ + pr_err("wsp_ics: Invalid available-ranges for %s\n", + dn->full_name); + return 0; + } + + bitmap_fill(ics->bitmap, ics->count); + + for (i = 0; i < len / sizeof(u32); i += 2) { + start = of_read_number(p + i, 1); + count = of_read_number(p + i + 1, 1); + + pr_devel("%s: start: %d count: %d\n", __func__, start, count); + + if ((start + count) > (ics->hwirq_start + ics->count) || + start < ics->hwirq_start) { + pr_err("wsp_ics: Invalid range! -> %d to %d\n", + start, start + count); + break; + } + + for (j = 0; j < count; j++) + bitmap_release_region(ics->bitmap, + (start + j) - ics->hwirq_start, 0); + } + + /* Ensure LSIs are not available for allocation */ + bitmap_allocate_region(ics->bitmap, ics->lsi_base, + get_count_order(ics->lsi_count)); + + return 0; +} + +static int __init wsp_ics_setup(struct wsp_ics *ics, struct device_node *dn) +{ + u32 lsi_buid, msi_buid, msi_base, msi_count; + void __iomem *regs; + const u32 *p; + int rc, len, i; + u64 caps, buid; + + p = of_get_property(dn, "interrupt-ranges", &len); + if (!p || len < (2 * sizeof(u32))) { + pr_err("wsp_ics: No/bad interrupt-ranges found on %s\n", + dn->full_name); + return -ENOENT; + } + + if (len > (2 * sizeof(u32))) { + pr_err("wsp_ics: Multiple ics ranges not supported.\n"); + return -EINVAL; + } + + regs = of_iomap(dn, 0); + if (!regs) { + pr_err("wsp_ics: of_iomap(%s) failed\n", dn->full_name); + return -ENXIO; + } + + ics->hwirq_start = of_read_number(p, 1); + ics->count = of_read_number(p + 1, 1); + ics->regs = regs; + + ics->chip_id = wsp_get_chip_id(dn); + if (WARN_ON(ics->chip_id < 0)) + ics->chip_id = 0; + + /* Get some informations about the critter */ + caps = in_be64(ICS_INT_CAPS_REG(ics->regs)); + buid = in_be64(INT_SRC_LAYER_BUID_REG(ics->regs)); + ics->lsi_count = caps >> 56; + msi_count = (caps >> 44) & 0x7ff; + + /* Note: LSI BUID is 9 bits, but really only 3 are BUID and the + * rest is mixed in the interrupt number. We store the whole + * thing though + */ + lsi_buid = (buid >> 48) & 0x1ff; + ics->lsi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | lsi_buid << 5; + msi_buid = (buid >> 37) & 0x7; + msi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | msi_buid << 11; + + pr_info("wsp_ics: Found %s\n", dn->full_name); + pr_info("wsp_ics: irq range : 0x%06llx..0x%06llx\n", + ics->hwirq_start, ics->hwirq_start + ics->count - 1); + pr_info("wsp_ics: %4d LSIs : 0x%06x..0x%06x\n", + ics->lsi_count, ics->lsi_base, + ics->lsi_base + ics->lsi_count - 1); + pr_info("wsp_ics: %4d MSIs : 0x%06x..0x%06x\n", + msi_count, msi_base, + msi_base + msi_count - 1); + + /* Let's check the HW config is sane */ + if (ics->lsi_base < ics->hwirq_start || + (ics->lsi_base + ics->lsi_count) > (ics->hwirq_start + ics->count)) + pr_warning("wsp_ics: WARNING ! LSIs out of interrupt-ranges !\n"); + if (msi_base < ics->hwirq_start || + (msi_base + msi_count) > (ics->hwirq_start + ics->count)) + pr_warning("wsp_ics: WARNING ! MSIs out of interrupt-ranges !\n"); + + /* We don't check for overlap between LSI and MSI, which will happen + * if we use the same BUID, I'm not sure yet how legit that is. + */ + + rc = wsp_ics_bitmap_setup(ics, dn); + if (rc) { + iounmap(regs); + return rc; + } + + ics->dn = of_node_get(dn); + alloc_irq_map(ics); + + for(i = 0; i < ics->count; i++) + wsp_mask_real_irq(ics->hwirq_start + i, ics); + + ics->ics.map = wsp_ics_map; + ics->ics.mask_unknown = wsp_ics_mask_unknown; + ics->ics.get_server = wsp_ics_get_server; + ics->ics.host_match = wsp_ics_host_match; + + xics_register_ics(&ics->ics); + + return 0; +} + +static void __init wsp_ics_set_default_server(void) +{ + struct device_node *np; + u32 hwid; + + /* Find the server number for the boot cpu. */ + np = of_get_cpu_node(boot_cpuid, NULL); + BUG_ON(!np); + + hwid = get_hard_smp_processor_id(boot_cpuid); + + pr_info("wsp_ics: default server is %#x, CPU %s\n", hwid, np->full_name); + xics_default_server = hwid; + + of_node_put(np); +} + +static int __init wsp_ics_init(void) +{ + struct device_node *dn; + struct wsp_ics *ics; + int rc, found; + + wsp_ics_set_default_server(); + + found = 0; + for_each_compatible_node(dn, NULL, "ibm,ppc-xics") + found++; + + if (found == 0) { + pr_err("wsp_ics: No ICS's found!\n"); + return -ENODEV; + } + + ics_list = kmalloc(sizeof(*ics) * found, GFP_KERNEL); + if (!ics_list) { + pr_err("wsp_ics: No memory for structs.\n"); + return -ENOMEM; + } + + num_ics = 0; + ics = ics_list; + for_each_compatible_node(dn, NULL, "ibm,wsp-xics") { + rc = wsp_ics_setup(ics, dn); + if (rc == 0) { + ics++; + num_ics++; + } + } + + if (found != num_ics) { + pr_err("wsp_ics: Failed setting up %d ICS's\n", + found - num_ics); + return -1; + } + + return 0; +} + +void __init wsp_init_irq(void) +{ + wsp_ics_init(); + xics_init(); + + /* We need to patch our irq chip's EOI to point to the right ICP */ + wsp_irq_chip.irq_eoi = icp_ops->eoi; +} diff --git a/arch/powerpc/platforms/wsp/ics.h b/arch/powerpc/platforms/wsp/ics.h new file mode 100644 index 00000000000..e34d5310264 --- /dev/null +++ b/arch/powerpc/platforms/wsp/ics.h @@ -0,0 +1,20 @@ +/* + * Copyright 2009 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ICS_H +#define __ICS_H + +#define XIVE_ADDR_MASK 0x7FFULL + +extern void wsp_init_irq(void); + +extern int wsp_ics_alloc_irq(struct device_node *dn, int num); +extern void wsp_ics_free_irq(struct device_node *dn, unsigned int irq); + +#endif /* __ICS_H */ diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c new file mode 100644 index 00000000000..be05631a3c1 --- /dev/null +++ b/arch/powerpc/platforms/wsp/opb_pic.c @@ -0,0 +1,332 @@ +/* + * IBM Onboard Peripheral Bus Interrupt Controller + * + * Copyright 2010 Jack Miller, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/time.h> + +#include <asm/reg_a2.h> +#include <asm/irq.h> + +#define OPB_NR_IRQS 32 + +#define OPB_MLSASIER 0x04 /* MLS Accumulated Status IER */ +#define OPB_MLSIR 0x50 /* MLS Interrupt Register */ +#define OPB_MLSIER 0x54 /* MLS Interrupt Enable Register */ +#define OPB_MLSIPR 0x58 /* MLS Interrupt Polarity Register */ +#define OPB_MLSIIR 0x5c /* MLS Interrupt Inputs Register */ + +static int opb_index = 0; + +struct opb_pic { + struct irq_host *host; + void *regs; + int index; + spinlock_t lock; +}; + +static u32 opb_in(struct opb_pic *opb, int offset) +{ + return in_be32(opb->regs + offset); +} + +static void opb_out(struct opb_pic *opb, int offset, u32 val) +{ + out_be32(opb->regs + offset, val); +} + +static void opb_unmask_irq(struct irq_data *d) +{ + struct opb_pic *opb; + unsigned long flags; + u32 ier, bitset; + + opb = d->chip_data; + bitset = (1 << (31 - irqd_to_hwirq(d))); + + spin_lock_irqsave(&opb->lock, flags); + + ier = opb_in(opb, OPB_MLSIER); + opb_out(opb, OPB_MLSIER, ier | bitset); + ier = opb_in(opb, OPB_MLSIER); + + spin_unlock_irqrestore(&opb->lock, flags); +} + +static void opb_mask_irq(struct irq_data *d) +{ + struct opb_pic *opb; + unsigned long flags; + u32 ier, mask; + + opb = d->chip_data; + mask = ~(1 << (31 - irqd_to_hwirq(d))); + + spin_lock_irqsave(&opb->lock, flags); + + ier = opb_in(opb, OPB_MLSIER); + opb_out(opb, OPB_MLSIER, ier & mask); + ier = opb_in(opb, OPB_MLSIER); // Flush posted writes + + spin_unlock_irqrestore(&opb->lock, flags); +} + +static void opb_ack_irq(struct irq_data *d) +{ + struct opb_pic *opb; + unsigned long flags; + u32 bitset; + + opb = d->chip_data; + bitset = (1 << (31 - irqd_to_hwirq(d))); + + spin_lock_irqsave(&opb->lock, flags); + + opb_out(opb, OPB_MLSIR, bitset); + opb_in(opb, OPB_MLSIR); // Flush posted writes + + spin_unlock_irqrestore(&opb->lock, flags); +} + +static void opb_mask_ack_irq(struct irq_data *d) +{ + struct opb_pic *opb; + unsigned long flags; + u32 bitset; + u32 ier, ir; + + opb = d->chip_data; + bitset = (1 << (31 - irqd_to_hwirq(d))); + + spin_lock_irqsave(&opb->lock, flags); + + ier = opb_in(opb, OPB_MLSIER); + opb_out(opb, OPB_MLSIER, ier & ~bitset); + ier = opb_in(opb, OPB_MLSIER); // Flush posted writes + + opb_out(opb, OPB_MLSIR, bitset); + ir = opb_in(opb, OPB_MLSIR); // Flush posted writes + + spin_unlock_irqrestore(&opb->lock, flags); +} + +static int opb_set_irq_type(struct irq_data *d, unsigned int flow) +{ + struct opb_pic *opb; + unsigned long flags; + int invert, ipr, mask, bit; + + opb = d->chip_data; + + /* The only information we're interested in in the type is whether it's + * a high or low trigger. For high triggered interrupts, the polarity + * set for it in the MLS Interrupt Polarity Register is 0, for low + * interrupts it's 1 so that the proper input in the MLS Interrupt Input + * Register is interrupted as asserting the interrupt. */ + + switch (flow) { + case IRQ_TYPE_NONE: + opb_mask_irq(d); + return 0; + + case IRQ_TYPE_LEVEL_HIGH: + invert = 0; + break; + + case IRQ_TYPE_LEVEL_LOW: + invert = 1; + break; + + default: + return -EINVAL; + } + + bit = (1 << (31 - irqd_to_hwirq(d))); + mask = ~bit; + + spin_lock_irqsave(&opb->lock, flags); + + ipr = opb_in(opb, OPB_MLSIPR); + ipr = (ipr & mask) | (invert ? bit : 0); + opb_out(opb, OPB_MLSIPR, ipr); + ipr = opb_in(opb, OPB_MLSIPR); // Flush posted writes + + spin_unlock_irqrestore(&opb->lock, flags); + + /* Record the type in the interrupt descriptor */ + irqd_set_trigger_type(d, flow); + + return 0; +} + +static struct irq_chip opb_irq_chip = { + .name = "OPB", + .irq_mask = opb_mask_irq, + .irq_unmask = opb_unmask_irq, + .irq_mask_ack = opb_mask_ack_irq, + .irq_ack = opb_ack_irq, + .irq_set_type = opb_set_irq_type +}; + +static int opb_host_map(struct irq_host *host, unsigned int virq, + irq_hw_number_t hwirq) +{ + struct opb_pic *opb; + + opb = host->host_data; + + /* Most of the important stuff is handled by the generic host code, like + * the lookup, so just attach some info to the virtual irq */ + + irq_set_chip_data(virq, opb); + irq_set_chip_and_handler(virq, &opb_irq_chip, handle_level_irq); + irq_set_irq_type(virq, IRQ_TYPE_NONE); + + return 0; +} + +static int opb_host_xlate(struct irq_host *host, struct device_node *dn, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type) +{ + /* Interrupt size must == 2 */ + BUG_ON(intsize != 2); + *out_hwirq = intspec[0]; + *out_type = intspec[1]; + return 0; +} + +static struct irq_host_ops opb_host_ops = { + .map = opb_host_map, + .xlate = opb_host_xlate, +}; + +irqreturn_t opb_irq_handler(int irq, void *private) +{ + struct opb_pic *opb; + u32 ir, src, subvirq; + + opb = (struct opb_pic *) private; + + /* Read the OPB MLS Interrupt Register for + * asserted interrupts */ + ir = opb_in(opb, OPB_MLSIR); + if (!ir) + return IRQ_NONE; + + do { + /* Get 1 - 32 source, *NOT* bit */ + src = 32 - ffs(ir); + + /* Translate from the OPB's conception of interrupt number to + * Linux's virtual IRQ */ + + subvirq = irq_linear_revmap(opb->host, src); + + generic_handle_irq(subvirq); + } while ((ir = opb_in(opb, OPB_MLSIR))); + + return IRQ_HANDLED; +} + +struct opb_pic *opb_pic_init_one(struct device_node *dn) +{ + struct opb_pic *opb; + struct resource res; + + if (of_address_to_resource(dn, 0, &res)) { + printk(KERN_ERR "opb: Couldn't translate resource\n"); + return NULL; + } + + opb = kzalloc(sizeof(struct opb_pic), GFP_KERNEL); + if (!opb) { + printk(KERN_ERR "opb: Failed to allocate opb struct!\n"); + return NULL; + } + + /* Get access to the OPB MMIO registers */ + opb->regs = ioremap(res.start + 0x10000, 0x1000); + if (!opb->regs) { + printk(KERN_ERR "opb: Failed to allocate register space!\n"); + goto free_opb; + } + + /* Allocate an irq host so that Linux knows that despite only + * having one interrupt to issue, we're the controller for multiple + * hardware IRQs, so later we can lookup their virtual IRQs. */ + + opb->host = irq_alloc_host(dn, IRQ_HOST_MAP_LINEAR, + OPB_NR_IRQS, &opb_host_ops, -1); + + if (!opb->host) { + printk(KERN_ERR "opb: Failed to allocate IRQ host!\n"); + goto free_regs; + } + + opb->index = opb_index++; + spin_lock_init(&opb->lock); + opb->host->host_data = opb; + + /* Disable all interrupts by default */ + opb_out(opb, OPB_MLSASIER, 0); + opb_out(opb, OPB_MLSIER, 0); + + /* ACK any interrupts left by FW */ + opb_out(opb, OPB_MLSIR, 0xFFFFFFFF); + + return opb; + +free_regs: + iounmap(opb->regs); +free_opb: + kfree(opb); + return NULL; +} + +void __init opb_pic_init(void) +{ + struct device_node *dn; + struct opb_pic *opb; + int virq; + int rc; + + /* Call init_one for each OPB device */ + for_each_compatible_node(dn, NULL, "ibm,opb") { + + /* Fill in an OPB struct */ + opb = opb_pic_init_one(dn); + if (!opb) { + printk(KERN_WARNING "opb: Failed to init node, skipped!\n"); + continue; + } + + /* Map / get opb's hardware virtual irq */ + virq = irq_of_parse_and_map(dn, 0); + if (virq <= 0) { + printk("opb: irq_op_parse_and_map failed!\n"); + continue; + } + + /* Attach opb interrupt handler to new virtual IRQ */ + rc = request_irq(virq, opb_irq_handler, 0, "OPB LS Cascade", opb); + if (rc) { + printk("opb: request_irq failed: %d\n", rc); + continue; + } + + printk("OPB%d init with %d IRQs at %p\n", opb->index, + OPB_NR_IRQS, opb->regs); + } +} diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c new file mode 100644 index 00000000000..40f28916ff6 --- /dev/null +++ b/arch/powerpc/platforms/wsp/psr2.c @@ -0,0 +1,95 @@ +/* + * Copyright 2008-2011, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/of.h> +#include <linux/smp.h> + +#include <asm/machdep.h> +#include <asm/system.h> +#include <asm/time.h> +#include <asm/udbg.h> + +#include "ics.h" +#include "wsp.h" + + +static void psr2_spin(void) +{ + hard_irq_disable(); + for (;;) ; +} + +static void psr2_restart(char *cmd) +{ + psr2_spin(); +} + +static int psr2_probe_devices(void) +{ + struct device_node *np; + + /* Our RTC is a ds1500. It seems to be programatically compatible + * with the ds1511 for which we have a driver so let's use that + */ + np = of_find_compatible_node(NULL, NULL, "dallas,ds1500"); + if (np != NULL) { + struct resource res; + if (of_address_to_resource(np, 0, &res) == 0) + platform_device_register_simple("ds1511", 0, &res, 1); + } + return 0; +} +machine_arch_initcall(psr2_md, psr2_probe_devices); + +static void __init psr2_setup_arch(void) +{ + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + scom_init_wsp(); + + /* Setup SMP callback */ +#ifdef CONFIG_SMP + a2_setup_smp(); +#endif +} + +static int __init psr2_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + if (!of_flat_dt_is_compatible(root, "ibm,psr2")) + return 0; + + return 1; +} + +static void __init psr2_init_irq(void) +{ + wsp_init_irq(); + opb_pic_init(); +} + +define_machine(psr2_md) { + .name = "PSR2 A2", + .probe = psr2_probe, + .setup_arch = psr2_setup_arch, + .restart = psr2_restart, + .power_off = psr2_spin, + .halt = psr2_spin, + .calibrate_decr = generic_calibrate_decr, + .init_IRQ = psr2_init_irq, + .progress = udbg_progress, + .power_save = book3e_idle, +}; diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c new file mode 100644 index 00000000000..141e7803209 --- /dev/null +++ b/arch/powerpc/platforms/wsp/scom_smp.c @@ -0,0 +1,427 @@ +/* + * SCOM support for A2 platforms + * + * Copyright 2007-2011 Benjamin Herrenschmidt, David Gibson, + * Michael Ellerman, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/cpumask.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include <asm/cputhreads.h> +#include <asm/reg_a2.h> +#include <asm/scom.h> +#include <asm/udbg.h> + +#include "wsp.h" + +#define SCOM_RAMC 0x2a /* Ram Command */ +#define SCOM_RAMC_TGT1_EXT 0x80000000 +#define SCOM_RAMC_SRC1_EXT 0x40000000 +#define SCOM_RAMC_SRC2_EXT 0x20000000 +#define SCOM_RAMC_SRC3_EXT 0x10000000 +#define SCOM_RAMC_ENABLE 0x00080000 +#define SCOM_RAMC_THREADSEL 0x00060000 +#define SCOM_RAMC_EXECUTE 0x00010000 +#define SCOM_RAMC_MSR_OVERRIDE 0x00008000 +#define SCOM_RAMC_MSR_PR 0x00004000 +#define SCOM_RAMC_MSR_GS 0x00002000 +#define SCOM_RAMC_FORCE 0x00001000 +#define SCOM_RAMC_FLUSH 0x00000800 +#define SCOM_RAMC_INTERRUPT 0x00000004 +#define SCOM_RAMC_ERROR 0x00000002 +#define SCOM_RAMC_DONE 0x00000001 +#define SCOM_RAMI 0x29 /* Ram Instruction */ +#define SCOM_RAMIC 0x28 /* Ram Instruction and Command */ +#define SCOM_RAMIC_INSN 0xffffffff00000000 +#define SCOM_RAMD 0x2d /* Ram Data */ +#define SCOM_RAMDH 0x2e /* Ram Data High */ +#define SCOM_RAMDL 0x2f /* Ram Data Low */ +#define SCOM_PCCR0 0x33 /* PC Configuration Register 0 */ +#define SCOM_PCCR0_ENABLE_DEBUG 0x80000000 +#define SCOM_PCCR0_ENABLE_RAM 0x40000000 +#define SCOM_THRCTL 0x30 /* Thread Control and Status */ +#define SCOM_THRCTL_T0_STOP 0x80000000 +#define SCOM_THRCTL_T1_STOP 0x40000000 +#define SCOM_THRCTL_T2_STOP 0x20000000 +#define SCOM_THRCTL_T3_STOP 0x10000000 +#define SCOM_THRCTL_T0_STEP 0x08000000 +#define SCOM_THRCTL_T1_STEP 0x04000000 +#define SCOM_THRCTL_T2_STEP 0x02000000 +#define SCOM_THRCTL_T3_STEP 0x01000000 +#define SCOM_THRCTL_T0_RUN 0x00800000 +#define SCOM_THRCTL_T1_RUN 0x00400000 +#define SCOM_THRCTL_T2_RUN 0x00200000 +#define SCOM_THRCTL_T3_RUN 0x00100000 +#define SCOM_THRCTL_T0_PM 0x00080000 +#define SCOM_THRCTL_T1_PM 0x00040000 +#define SCOM_THRCTL_T2_PM 0x00020000 +#define SCOM_THRCTL_T3_PM 0x00010000 +#define SCOM_THRCTL_T0_UDE 0x00008000 +#define SCOM_THRCTL_T1_UDE 0x00004000 +#define SCOM_THRCTL_T2_UDE 0x00002000 +#define SCOM_THRCTL_T3_UDE 0x00001000 +#define SCOM_THRCTL_ASYNC_DIS 0x00000800 +#define SCOM_THRCTL_TB_DIS 0x00000400 +#define SCOM_THRCTL_DEC_DIS 0x00000200 +#define SCOM_THRCTL_AND 0x31 /* Thread Control and Status */ +#define SCOM_THRCTL_OR 0x32 /* Thread Control and Status */ + + +static DEFINE_PER_CPU(scom_map_t, scom_ptrs); + +static scom_map_t get_scom(int cpu, struct device_node *np, int *first_thread) +{ + scom_map_t scom = per_cpu(scom_ptrs, cpu); + int tcpu; + + if (scom_map_ok(scom)) { + *first_thread = 0; + return scom; + } + + *first_thread = 1; + + scom = scom_map_device(np, 0); + + for (tcpu = cpu_first_thread_sibling(cpu); + tcpu <= cpu_last_thread_sibling(cpu); tcpu++) + per_cpu(scom_ptrs, tcpu) = scom; + + /* Hack: for the boot core, this will actually get called on + * the second thread up, not the first so our test above will + * set first_thread incorrectly. */ + if (cpu_first_thread_sibling(cpu) == 0) + *first_thread = 0; + + return scom; +} + +static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask) +{ + u64 cmd, mask, val; + int n = 0; + + cmd = ((u64)insn << 32) | (((u64)extmask & 0xf) << 28) + | ((u64)thread << 17) | SCOM_RAMC_ENABLE | SCOM_RAMC_EXECUTE; + mask = SCOM_RAMC_DONE | SCOM_RAMC_INTERRUPT | SCOM_RAMC_ERROR; + + scom_write(scom, SCOM_RAMIC, cmd); + + while (!((val = scom_read(scom, SCOM_RAMC)) & mask)) { + pr_devel("Waiting on RAMC = 0x%llx\n", val); + if (++n == 3) { + pr_err("RAMC timeout on instruction 0x%08x, thread %d\n", + insn, thread); + return -1; + } + } + + if (val & SCOM_RAMC_INTERRUPT) { + pr_err("RAMC interrupt on instruction 0x%08x, thread %d\n", + insn, thread); + return -SCOM_RAMC_INTERRUPT; + } + + if (val & SCOM_RAMC_ERROR) { + pr_err("RAMC error on instruction 0x%08x, thread %d\n", + insn, thread); + return -SCOM_RAMC_ERROR; + } + + return 0; +} + +static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt, + u64 *out_gpr) +{ + int rc; + + /* or rN, rN, rN */ + u32 insn = 0x7c000378 | (gpr << 21) | (gpr << 16) | (gpr << 11); + rc = a2_scom_ram(scom, thread, insn, alt ? 0xf : 0x0); + if (rc) + return rc; + + *out_gpr = scom_read(scom, SCOM_RAMD); + + return 0; +} + +static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr) +{ + int rc, sprhi, sprlo; + u32 insn; + + sprhi = spr >> 5; + sprlo = spr & 0x1f; + insn = 0x7c2002a6 | (sprlo << 16) | (sprhi << 11); /* mfspr r1,spr */ + + if (spr == 0x0ff0) + insn = 0x7c2000a6; /* mfmsr r1 */ + + rc = a2_scom_ram(scom, thread, insn, 0xf); + if (rc) + return rc; + return a2_scom_getgpr(scom, thread, 1, 1, out_spr); +} + +static int a2_scom_setgpr(scom_map_t scom, int thread, int gpr, + int alt, u64 val) +{ + u32 lis = 0x3c000000 | (gpr << 21); + u32 li = 0x38000000 | (gpr << 21); + u32 oris = 0x64000000 | (gpr << 21) | (gpr << 16); + u32 ori = 0x60000000 | (gpr << 21) | (gpr << 16); + u32 rldicr32 = 0x780007c6 | (gpr << 21) | (gpr << 16); + u32 highest = val >> 48; + u32 higher = (val >> 32) & 0xffff; + u32 high = (val >> 16) & 0xffff; + u32 low = val & 0xffff; + int lext = alt ? 0x8 : 0x0; + int oext = alt ? 0xf : 0x0; + int rc = 0; + + if (highest) + rc |= a2_scom_ram(scom, thread, lis | highest, lext); + + if (higher) { + if (highest) + rc |= a2_scom_ram(scom, thread, oris | higher, oext); + else + rc |= a2_scom_ram(scom, thread, li | higher, lext); + } + + if (highest || higher) + rc |= a2_scom_ram(scom, thread, rldicr32, oext); + + if (high) { + if (highest || higher) + rc |= a2_scom_ram(scom, thread, oris | high, oext); + else + rc |= a2_scom_ram(scom, thread, lis | high, lext); + } + + if (highest || higher || high) + rc |= a2_scom_ram(scom, thread, ori | low, oext); + else + rc |= a2_scom_ram(scom, thread, li | low, lext); + + return rc; +} + +static int a2_scom_setspr(scom_map_t scom, int thread, int spr, u64 val) +{ + int sprhi = spr >> 5; + int sprlo = spr & 0x1f; + /* mtspr spr, r1 */ + u32 insn = 0x7c2003a6 | (sprlo << 16) | (sprhi << 11); + + if (spr == 0x0ff0) + insn = 0x7c200124; /* mtmsr r1 */ + + if (a2_scom_setgpr(scom, thread, 1, 1, val)) + return -1; + + return a2_scom_ram(scom, thread, insn, 0xf); +} + +static int a2_scom_initial_tlb(scom_map_t scom, int thread) +{ + extern u32 a2_tlbinit_code_start[], a2_tlbinit_code_end[]; + extern u32 a2_tlbinit_after_iprot_flush[]; + extern u32 a2_tlbinit_after_linear_map[]; + u32 assoc, entries, i; + u64 epn, tlbcfg; + u32 *p; + int rc; + + /* Invalidate all entries (including iprot) */ + + rc = a2_scom_getspr(scom, thread, SPRN_TLB0CFG, &tlbcfg); + if (rc) + goto scom_fail; + entries = tlbcfg & TLBnCFG_N_ENTRY; + assoc = (tlbcfg & TLBnCFG_ASSOC) >> 24; + epn = 0; + + /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ + a2_scom_setspr(scom, thread, SPRN_MMUCR2, 0x000a7531); + /* Set MMUCR3 to write all thids bit to the TLB */ + a2_scom_setspr(scom, thread, SPRN_MMUCR3, 0x0000000f); + + /* Set MAS1 for 1G page size, and MAS2 to our initial EPN */ + a2_scom_setspr(scom, thread, SPRN_MAS1, MAS1_TSIZE(BOOK3E_PAGESZ_1GB)); + a2_scom_setspr(scom, thread, SPRN_MAS2, epn); + for (i = 0; i < entries; i++) { + + a2_scom_setspr(scom, thread, SPRN_MAS0, MAS0_ESEL(i % assoc)); + + /* tlbwe */ + rc = a2_scom_ram(scom, thread, 0x7c0007a4, 0); + if (rc) + goto scom_fail; + + /* Next entry is new address? */ + if((i + 1) % assoc == 0) { + epn += (1 << 30); + a2_scom_setspr(scom, thread, SPRN_MAS2, epn); + } + } + + /* Setup args for linear mapping */ + rc = a2_scom_setgpr(scom, thread, 3, 0, MAS0_TLBSEL(0)); + if (rc) + goto scom_fail; + + /* Linear mapping */ + for (p = a2_tlbinit_code_start; p < a2_tlbinit_after_linear_map; p++) { + rc = a2_scom_ram(scom, thread, *p, 0); + if (rc) + goto scom_fail; + } + + /* + * For the boot thread, between the linear mapping and the debug + * mappings there is a loop to flush iprot mappings. Ramming doesn't do + * branches, but the secondary threads don't need to be nearly as smart + * (i.e. we don't need to worry about invalidating the mapping we're + * standing on). + */ + + /* Debug mappings. Expects r11 = MAS0 from linear map (set above) */ + for (p = a2_tlbinit_after_iprot_flush; p < a2_tlbinit_code_end; p++) { + rc = a2_scom_ram(scom, thread, *p, 0); + if (rc) + goto scom_fail; + } + +scom_fail: + if (rc) + pr_err("Setting up initial TLB failed, err %d\n", rc); + + if (rc == -SCOM_RAMC_INTERRUPT) { + /* Interrupt, dump some status */ + int rc[10]; + u64 iar, srr0, srr1, esr, mas0, mas1, mas2, mas7_3, mas8, ccr2; + rc[0] = a2_scom_getspr(scom, thread, SPRN_IAR, &iar); + rc[1] = a2_scom_getspr(scom, thread, SPRN_SRR0, &srr0); + rc[2] = a2_scom_getspr(scom, thread, SPRN_SRR1, &srr1); + rc[3] = a2_scom_getspr(scom, thread, SPRN_ESR, &esr); + rc[4] = a2_scom_getspr(scom, thread, SPRN_MAS0, &mas0); + rc[5] = a2_scom_getspr(scom, thread, SPRN_MAS1, &mas1); + rc[6] = a2_scom_getspr(scom, thread, SPRN_MAS2, &mas2); + rc[7] = a2_scom_getspr(scom, thread, SPRN_MAS7_MAS3, &mas7_3); + rc[8] = a2_scom_getspr(scom, thread, SPRN_MAS8, &mas8); + rc[9] = a2_scom_getspr(scom, thread, SPRN_A2_CCR2, &ccr2); + pr_err(" -> retreived IAR =0x%llx (err %d)\n", iar, rc[0]); + pr_err(" retreived SRR0=0x%llx (err %d)\n", srr0, rc[1]); + pr_err(" retreived SRR1=0x%llx (err %d)\n", srr1, rc[2]); + pr_err(" retreived ESR =0x%llx (err %d)\n", esr, rc[3]); + pr_err(" retreived MAS0=0x%llx (err %d)\n", mas0, rc[4]); + pr_err(" retreived MAS1=0x%llx (err %d)\n", mas1, rc[5]); + pr_err(" retreived MAS2=0x%llx (err %d)\n", mas2, rc[6]); + pr_err(" retreived MS73=0x%llx (err %d)\n", mas7_3, rc[7]); + pr_err(" retreived MAS8=0x%llx (err %d)\n", mas8, rc[8]); + pr_err(" retreived CCR2=0x%llx (err %d)\n", ccr2, rc[9]); + } + + return rc; +} + +int __devinit a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, + struct device_node *np) +{ + u64 init_iar, init_msr, init_ccr2; + unsigned long start_here; + int rc, core_setup; + scom_map_t scom; + u64 pccr0; + + scom = get_scom(lcpu, np, &core_setup); + if (!scom) { + printk(KERN_ERR "Couldn't map SCOM for CPU%d\n", lcpu); + return -1; + } + + pr_devel("Bringing up CPU%d using SCOM...\n", lcpu); + + pccr0 = scom_read(scom, SCOM_PCCR0); + scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG | + SCOM_PCCR0_ENABLE_RAM); + + /* Stop the thead with THRCTL. If we are setting up the TLB we stop all + * threads. We also disable asynchronous interrupts while RAMing. + */ + if (core_setup) + scom_write(scom, SCOM_THRCTL_OR, + SCOM_THRCTL_T0_STOP | + SCOM_THRCTL_T1_STOP | + SCOM_THRCTL_T2_STOP | + SCOM_THRCTL_T3_STOP | + SCOM_THRCTL_ASYNC_DIS); + else + scom_write(scom, SCOM_THRCTL_OR, SCOM_THRCTL_T0_STOP >> thr_idx); + + /* Flush its pipeline just in case */ + scom_write(scom, SCOM_RAMC, ((u64)thr_idx << 17) | + SCOM_RAMC_FLUSH | SCOM_RAMC_ENABLE); + + a2_scom_getspr(scom, thr_idx, SPRN_IAR, &init_iar); + a2_scom_getspr(scom, thr_idx, 0x0ff0, &init_msr); + a2_scom_getspr(scom, thr_idx, SPRN_A2_CCR2, &init_ccr2); + + /* Set MSR to MSR_CM (0x0ff0 is magic value for MSR_CM) */ + rc = a2_scom_setspr(scom, thr_idx, 0x0ff0, MSR_CM); + if (rc) { + pr_err("Failed to set MSR ! err %d\n", rc); + return rc; + } + + /* RAM in an sync/isync for the sake of it */ + a2_scom_ram(scom, thr_idx, 0x7c0004ac, 0); + a2_scom_ram(scom, thr_idx, 0x4c00012c, 0); + + if (core_setup) { + pr_devel("CPU%d is first thread in core, initializing TLB...\n", + lcpu); + rc = a2_scom_initial_tlb(scom, thr_idx); + if (rc) + goto fail; + } + + start_here = *(unsigned long *)(core_setup ? generic_secondary_smp_init + : generic_secondary_thread_init); + pr_devel("CPU%d entry point at 0x%lx...\n", lcpu, start_here); + + rc |= a2_scom_setspr(scom, thr_idx, SPRN_IAR, start_here); + rc |= a2_scom_setgpr(scom, thr_idx, 3, 0, + get_hard_smp_processor_id(lcpu)); + /* + * Tell book3e_secondary_core_init not to set up the TLB, we've + * already done that. + */ + rc |= a2_scom_setgpr(scom, thr_idx, 4, 0, 1); + + rc |= a2_scom_setspr(scom, thr_idx, SPRN_TENS, 0x1 << thr_idx); + + scom_write(scom, SCOM_RAMC, 0); + scom_write(scom, SCOM_THRCTL_AND, ~(SCOM_THRCTL_T0_STOP >> thr_idx)); + scom_write(scom, SCOM_PCCR0, pccr0); +fail: + pr_devel(" SCOM initialization %s\n", rc ? "failed" : "succeeded"); + if (rc) { + pr_err("Old IAR=0x%08llx MSR=0x%08llx CCR2=0x%08llx\n", + init_iar, init_msr, init_ccr2); + } + + return rc; +} diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c new file mode 100644 index 00000000000..4052e2259f3 --- /dev/null +++ b/arch/powerpc/platforms/wsp/scom_wsp.c @@ -0,0 +1,77 @@ +/* + * SCOM backend for WSP + * + * Copyright 2010 Benjamin Herrenschmidt, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/cpumask.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include <asm/cputhreads.h> +#include <asm/reg_a2.h> +#include <asm/scom.h> +#include <asm/udbg.h> + +#include "wsp.h" + + +static scom_map_t wsp_scom_map(struct device_node *dev, u64 reg, u64 count) +{ + struct resource r; + u64 xscom_addr; + + if (!of_get_property(dev, "scom-controller", NULL)) { + pr_err("%s: device %s is not a SCOM controller\n", + __func__, dev->full_name); + return SCOM_MAP_INVALID; + } + + if (of_address_to_resource(dev, 0, &r)) { + pr_debug("Failed to find SCOM controller address\n"); + return 0; + } + + /* Transform the SCOM address into an XSCOM offset */ + xscom_addr = ((reg & 0x7f000000) >> 1) | ((reg & 0xfffff) << 3); + + return (scom_map_t)ioremap(r.start + xscom_addr, count << 3); +} + +static void wsp_scom_unmap(scom_map_t map) +{ + iounmap((void *)map); +} + +static u64 wsp_scom_read(scom_map_t map, u32 reg) +{ + u64 __iomem *addr = (u64 __iomem *)map; + + return in_be64(addr + reg); +} + +static void wsp_scom_write(scom_map_t map, u32 reg, u64 value) +{ + u64 __iomem *addr = (u64 __iomem *)map; + + return out_be64(addr + reg, value); +} + +static const struct scom_controller wsp_scom_controller = { + .map = wsp_scom_map, + .unmap = wsp_scom_unmap, + .read = wsp_scom_read, + .write = wsp_scom_write +}; + +void scom_init_wsp(void) +{ + scom_init(&wsp_scom_controller); +} diff --git a/arch/powerpc/platforms/wsp/setup.c b/arch/powerpc/platforms/wsp/setup.c new file mode 100644 index 00000000000..11ac2f05e01 --- /dev/null +++ b/arch/powerpc/platforms/wsp/setup.c @@ -0,0 +1,36 @@ +/* + * Copyright 2010 Michael Ellerman, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/of_platform.h> + +#include "wsp.h" + +/* + * Find chip-id by walking up device tree looking for ibm,wsp-chip-id property. + * Won't work for nodes that are not a descendant of a wsp node. + */ +int wsp_get_chip_id(struct device_node *dn) +{ + const u32 *p; + int rc; + + /* Start looking at the specified node, not its parent */ + dn = of_node_get(dn); + while (dn && !(p = of_get_property(dn, "ibm,wsp-chip-id", NULL))) + dn = of_get_next_parent(dn); + + if (!dn) + return -1; + + rc = *p; + of_node_put(dn); + + return rc; +} diff --git a/arch/powerpc/platforms/wsp/smp.c b/arch/powerpc/platforms/wsp/smp.c new file mode 100644 index 00000000000..9d20fa9d371 --- /dev/null +++ b/arch/powerpc/platforms/wsp/smp.c @@ -0,0 +1,88 @@ +/* + * SMP Support for A2 platforms + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/cpumask.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/smp.h> + +#include <asm/dbell.h> +#include <asm/machdep.h> +#include <asm/xics.h> + +#include "ics.h" +#include "wsp.h" + +static void __devinit smp_a2_setup_cpu(int cpu) +{ + doorbell_setup_this_cpu(); + + if (cpu != boot_cpuid) + xics_setup_cpu(); +} + +int __devinit smp_a2_kick_cpu(int nr) +{ + const char *enable_method; + struct device_node *np; + int thr_idx; + + if (nr < 0 || nr >= NR_CPUS) + return -ENOENT; + + np = of_get_cpu_node(nr, &thr_idx); + if (!np) + return -ENODEV; + + enable_method = of_get_property(np, "enable-method", NULL); + pr_devel("CPU%d has enable-method: \"%s\"\n", nr, enable_method); + + if (!enable_method) { + printk(KERN_ERR "CPU%d has no enable-method\n", nr); + return -ENOENT; + } else if (strcmp(enable_method, "ibm,a2-scom") == 0) { + if (a2_scom_startup_cpu(nr, thr_idx, np)) + return -1; + } else { + printk(KERN_ERR "CPU%d: Don't understand enable-method \"%s\"\n", + nr, enable_method); + return -EINVAL; + } + + /* + * The processor is currently spinning, waiting for the + * cpu_start field to become non-zero After we set cpu_start, + * the processor will continue on to secondary_start + */ + paca[nr].cpu_start = 1; + + return 0; +} + +static int __init smp_a2_probe(void) +{ + return cpus_weight(cpu_possible_map); +} + +static struct smp_ops_t a2_smp_ops = { + .message_pass = smp_muxed_ipi_message_pass, + .cause_ipi = doorbell_cause_ipi, + .probe = smp_a2_probe, + .kick_cpu = smp_a2_kick_cpu, + .setup_cpu = smp_a2_setup_cpu, +}; + +void __init a2_setup_smp(void) +{ + smp_ops = &a2_smp_ops; +} diff --git a/arch/powerpc/platforms/wsp/wsp.h b/arch/powerpc/platforms/wsp/wsp.h new file mode 100644 index 00000000000..7c3e087fd2f --- /dev/null +++ b/arch/powerpc/platforms/wsp/wsp.h @@ -0,0 +1,17 @@ +#ifndef __WSP_H +#define __WSP_H + +#include <asm/wsp.h> + +extern void wsp_setup_pci(void); +extern void scom_init_wsp(void); + +extern void a2_setup_smp(void); +extern int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, + struct device_node *np); +int smp_a2_cpu_bootable(unsigned int nr); +int __devinit smp_a2_kick_cpu(int nr); + +void opb_pic_init(void); + +#endif /* __WSP_H */ diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig index 396582835cb..d775fd148d1 100644 --- a/arch/powerpc/sysdev/Kconfig +++ b/arch/powerpc/sysdev/Kconfig @@ -12,3 +12,13 @@ config PPC_MSI_BITMAP depends on PCI_MSI default y if MPIC default y if FSL_PCI + +source "arch/powerpc/sysdev/xics/Kconfig" + +config PPC_SCOM + bool + +config SCOM_DEBUGFS + bool "Expose SCOM controllers via debugfs" + depends on PPC_SCOM + default n diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 1e0c933ef77..6076e0074a8 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -57,3 +57,9 @@ obj-$(CONFIG_PPC_MPC52xx) += mpc5xxx_clocks.o ifeq ($(CONFIG_SUSPEND),y) obj-$(CONFIG_6xx) += 6xx-suspend.o endif + +obj-$(CONFIG_PPC_SCOM) += scom.o + +subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror + +obj-$(CONFIG_PPC_XICS) += xics/ diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 1636dd89670..bd0d54060b9 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -216,7 +216,7 @@ static int axon_ram_probe(struct platform_device *device) AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20); bank->ph_addr = resource.start; - bank->io_addr = (unsigned long) ioremap_flags( + bank->io_addr = (unsigned long) ioremap_prot( bank->ph_addr, bank->size, _PAGE_NO_CACHE); if (bank->io_addr == 0) { dev_err(&device->dev, "ioremap() failed\n"); diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c index e0bc944eb23..350787c83e2 100644 --- a/arch/powerpc/sysdev/cpm1.c +++ b/arch/powerpc/sysdev/cpm1.c @@ -58,21 +58,21 @@ static struct irq_host *cpm_pic_host; static void cpm_mask_irq(struct irq_data *d) { - unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq; + unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d); clrbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec)); } static void cpm_unmask_irq(struct irq_data *d) { - unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq; + unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d); setbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec)); } static void cpm_end_irq(struct irq_data *d) { - unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq; + unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d); out_be32(&cpic_reg->cpic_cisr, (1 << cpm_vec)); } @@ -157,7 +157,7 @@ unsigned int cpm_pic_init(void) goto end; /* Initialize the CPM interrupt controller. */ - hwirq = (unsigned int)irq_map[sirq].hwirq; + hwirq = (unsigned int)virq_to_hw(sirq); out_be32(&cpic_reg->cpic_cicr, (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) | ((hwirq/2) << 13) | CICR_HP_MASK); diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c index 5495c1be472..bcab50e2a9e 100644 --- a/arch/powerpc/sysdev/cpm2_pic.c +++ b/arch/powerpc/sysdev/cpm2_pic.c @@ -81,7 +81,7 @@ static const u_char irq_to_siubit[] = { static void cpm2_mask_irq(struct irq_data *d) { int bit, word; - unsigned int irq_nr = virq_to_hw(d->irq); + unsigned int irq_nr = irqd_to_hwirq(d); bit = irq_to_siubit[irq_nr]; word = irq_to_siureg[irq_nr]; @@ -93,7 +93,7 @@ static void cpm2_mask_irq(struct irq_data *d) static void cpm2_unmask_irq(struct irq_data *d) { int bit, word; - unsigned int irq_nr = virq_to_hw(d->irq); + unsigned int irq_nr = irqd_to_hwirq(d); bit = irq_to_siubit[irq_nr]; word = irq_to_siureg[irq_nr]; @@ -105,7 +105,7 @@ static void cpm2_unmask_irq(struct irq_data *d) static void cpm2_ack(struct irq_data *d) { int bit, word; - unsigned int irq_nr = virq_to_hw(d->irq); + unsigned int irq_nr = irqd_to_hwirq(d); bit = irq_to_siubit[irq_nr]; word = irq_to_siureg[irq_nr]; @@ -116,7 +116,7 @@ static void cpm2_ack(struct irq_data *d) static void cpm2_end_irq(struct irq_data *d) { int bit, word; - unsigned int irq_nr = virq_to_hw(d->irq); + unsigned int irq_nr = irqd_to_hwirq(d); bit = irq_to_siubit[irq_nr]; word = irq_to_siureg[irq_nr]; @@ -133,7 +133,7 @@ static void cpm2_end_irq(struct irq_data *d) static int cpm2_set_irq_type(struct irq_data *d, unsigned int flow_type) { - unsigned int src = virq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned int vold, vnew, edibit; /* Port C interrupts are either IRQ_TYPE_EDGE_FALLING or diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c index 54fb1922fe3..11641589917 100644 --- a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c +++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c @@ -106,10 +106,10 @@ int __init instantiate_cache_sram(struct platform_device *dev, goto out_free; } - cache_sram->base_virt = ioremap_flags(cache_sram->base_phys, + cache_sram->base_virt = ioremap_prot(cache_sram->base_phys, cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL); if (!cache_sram->base_virt) { - dev_err(&dev->dev, "%s: ioremap_flags failed\n", + dev_err(&dev->dev, "%s: ioremap_prot failed\n", dev->dev.of_node->full_name); ret = -ENOMEM; goto out_release; diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 01cd2f08951..92e78333c47 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -110,7 +110,7 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev) list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; - msi_data = irq_get_handler_data(entry->irq); + msi_data = irq_get_chip_data(entry->irq); irq_set_msi_desc(entry->irq, NULL); msi_bitmap_free_hwirqs(&msi_data->bitmap, virq_to_hw(entry->irq), 1); @@ -168,7 +168,7 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) rc = -ENOSPC; goto out_free; } - irq_set_handler_data(virq, msi_data); + /* chip_data is msi_data via host->hostdata in host->map() */ irq_set_msi_desc(virq, entry); fsl_compose_msi_msg(pdev, hwirq, &msg, msi_data); @@ -193,7 +193,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) u32 have_shift = 0; struct fsl_msi_cascade_data *cascade_data; - cascade_data = (struct fsl_msi_cascade_data *)irq_get_handler_data(irq); + cascade_data = irq_get_handler_data(irq); msi_data = cascade_data->msi_data; raw_spin_lock(&desc->lock); @@ -253,7 +253,7 @@ unlock: static int fsl_of_msi_remove(struct platform_device *ofdev) { - struct fsl_msi *msi = ofdev->dev.platform_data; + struct fsl_msi *msi = platform_get_drvdata(ofdev); int virq, i; struct fsl_msi_cascade_data *cascade_data; @@ -330,7 +330,7 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev) dev_err(&dev->dev, "No memory for MSI structure\n"); return -ENOMEM; } - dev->dev.platform_data = msi; + platform_set_drvdata(dev, msi); msi->irqhost = irq_alloc_host(dev->dev.of_node, IRQ_HOST_MAP_LINEAR, NR_MSI_IRQS, &fsl_msi_host_ops, 0); diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index 142770cb84b..d18bb27e4df 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -185,18 +185,6 @@ static int i8259_host_map(struct irq_host *h, unsigned int virq, return 0; } -static void i8259_host_unmap(struct irq_host *h, unsigned int virq) -{ - /* Make sure irq is masked in hardware */ - i8259_mask_irq(irq_get_irq_data(virq)); - - /* remove chip and handler */ - irq_set_chip_and_handler(virq, NULL, NULL); - - /* Make sure it's completed */ - synchronize_irq(virq); -} - static int i8259_host_xlate(struct irq_host *h, struct device_node *ct, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_flags) @@ -220,7 +208,6 @@ static int i8259_host_xlate(struct irq_host *h, struct device_node *ct, static struct irq_host_ops i8259_host_ops = { .match = i8259_host_match, .map = i8259_host_map, - .unmap = i8259_host_unmap, .xlate = i8259_host_xlate, }; diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 596554a8725..7367d17364c 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -521,12 +521,10 @@ static inline struct ipic * ipic_from_irq(unsigned int virq) return primary_ipic; } -#define ipic_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq) - static void ipic_unmask_irq(struct irq_data *d) { struct ipic *ipic = ipic_from_irq(d->irq); - unsigned int src = ipic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 temp; @@ -542,7 +540,7 @@ static void ipic_unmask_irq(struct irq_data *d) static void ipic_mask_irq(struct irq_data *d) { struct ipic *ipic = ipic_from_irq(d->irq); - unsigned int src = ipic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 temp; @@ -562,7 +560,7 @@ static void ipic_mask_irq(struct irq_data *d) static void ipic_ack_irq(struct irq_data *d) { struct ipic *ipic = ipic_from_irq(d->irq); - unsigned int src = ipic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 temp; @@ -581,7 +579,7 @@ static void ipic_ack_irq(struct irq_data *d) static void ipic_mask_irq_and_ack(struct irq_data *d) { struct ipic *ipic = ipic_from_irq(d->irq); - unsigned int src = ipic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 temp; @@ -604,7 +602,7 @@ static void ipic_mask_irq_and_ack(struct irq_data *d) static int ipic_set_irq_type(struct irq_data *d, unsigned int flow_type) { struct ipic *ipic = ipic_from_irq(d->irq); - unsigned int src = ipic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned int vold, vnew, edibit; if (flow_type == IRQ_TYPE_NONE) @@ -793,7 +791,7 @@ struct ipic * __init ipic_init(struct device_node *node, unsigned int flags) int ipic_set_priority(unsigned int virq, unsigned int priority) { struct ipic *ipic = ipic_from_irq(virq); - unsigned int src = ipic_irq_to_hw(virq); + unsigned int src = virq_to_hw(virq); u32 temp; if (priority > 7) @@ -821,7 +819,7 @@ int ipic_set_priority(unsigned int virq, unsigned int priority) void ipic_set_highest_priority(unsigned int virq) { struct ipic *ipic = ipic_from_irq(virq); - unsigned int src = ipic_irq_to_hw(virq); + unsigned int src = virq_to_hw(virq); u32 temp; temp = ipic_read(ipic->regs, IPIC_SICFR); diff --git a/arch/powerpc/sysdev/mmio_nvram.c b/arch/powerpc/sysdev/mmio_nvram.c index 20732420906..ddc877a3a23 100644 --- a/arch/powerpc/sysdev/mmio_nvram.c +++ b/arch/powerpc/sysdev/mmio_nvram.c @@ -115,6 +115,8 @@ int __init mmio_nvram_init(void) int ret; nvram_node = of_find_node_by_type(NULL, "nvram"); + if (!nvram_node) + nvram_node = of_find_compatible_node(NULL, NULL, "nvram"); if (!nvram_node) { printk(KERN_WARNING "nvram: no node found in device-tree\n"); return -ENODEV; diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c index a88800ff4d0..20924f2246f 100644 --- a/arch/powerpc/sysdev/mpc8xx_pic.c +++ b/arch/powerpc/sysdev/mpc8xx_pic.c @@ -28,7 +28,7 @@ int cpm_get_irq(struct pt_regs *regs); static void mpc8xx_unmask_irq(struct irq_data *d) { int bit, word; - unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq; + unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d); bit = irq_nr & 0x1f; word = irq_nr >> 5; @@ -40,7 +40,7 @@ static void mpc8xx_unmask_irq(struct irq_data *d) static void mpc8xx_mask_irq(struct irq_data *d) { int bit, word; - unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq; + unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d); bit = irq_nr & 0x1f; word = irq_nr >> 5; @@ -52,7 +52,7 @@ static void mpc8xx_mask_irq(struct irq_data *d) static void mpc8xx_ack(struct irq_data *d) { int bit; - unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq; + unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d); bit = irq_nr & 0x1f; out_be32(&siu_reg->sc_sipend, 1 << (31-bit)); @@ -61,7 +61,7 @@ static void mpc8xx_ack(struct irq_data *d) static void mpc8xx_end_irq(struct irq_data *d) { int bit, word; - unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq; + unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d); bit = irq_nr & 0x1f; word = irq_nr >> 5; @@ -73,7 +73,7 @@ static void mpc8xx_end_irq(struct irq_data *d) static int mpc8xx_set_irq_type(struct irq_data *d, unsigned int flow_type) { if (flow_type & IRQ_TYPE_EDGE_FALLING) { - irq_hw_number_t hw = (unsigned int)irq_map[d->irq].hwirq; + irq_hw_number_t hw = (unsigned int)irqd_to_hwirq(d); unsigned int siel = in_be32(&siu_reg->sc_siel); /* only external IRQ senses are programmable */ diff --git a/arch/powerpc/sysdev/mpc8xxx_gpio.c b/arch/powerpc/sysdev/mpc8xxx_gpio.c index 0892a2841c2..fb4963abdf5 100644 --- a/arch/powerpc/sysdev/mpc8xxx_gpio.c +++ b/arch/powerpc/sysdev/mpc8xxx_gpio.c @@ -163,7 +163,7 @@ static void mpc8xxx_irq_unmask(struct irq_data *d) spin_lock_irqsave(&mpc8xxx_gc->lock, flags); - setbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(virq_to_hw(d->irq))); + setbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(irqd_to_hwirq(d))); spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags); } @@ -176,7 +176,7 @@ static void mpc8xxx_irq_mask(struct irq_data *d) spin_lock_irqsave(&mpc8xxx_gc->lock, flags); - clrbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(virq_to_hw(d->irq))); + clrbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(irqd_to_hwirq(d))); spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags); } @@ -186,7 +186,7 @@ static void mpc8xxx_irq_ack(struct irq_data *d) struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d); struct of_mm_gpio_chip *mm = &mpc8xxx_gc->mm_gc; - out_be32(mm->regs + GPIO_IER, mpc8xxx_gpio2mask(virq_to_hw(d->irq))); + out_be32(mm->regs + GPIO_IER, mpc8xxx_gpio2mask(irqd_to_hwirq(d))); } static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type) @@ -199,14 +199,14 @@ static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type) case IRQ_TYPE_EDGE_FALLING: spin_lock_irqsave(&mpc8xxx_gc->lock, flags); setbits32(mm->regs + GPIO_ICR, - mpc8xxx_gpio2mask(virq_to_hw(d->irq))); + mpc8xxx_gpio2mask(irqd_to_hwirq(d))); spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags); break; case IRQ_TYPE_EDGE_BOTH: spin_lock_irqsave(&mpc8xxx_gc->lock, flags); clrbits32(mm->regs + GPIO_ICR, - mpc8xxx_gpio2mask(virq_to_hw(d->irq))); + mpc8xxx_gpio2mask(irqd_to_hwirq(d))); spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags); break; @@ -221,7 +221,7 @@ static int mpc512x_irq_set_type(struct irq_data *d, unsigned int flow_type) { struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d); struct of_mm_gpio_chip *mm = &mpc8xxx_gc->mm_gc; - unsigned long gpio = virq_to_hw(d->irq); + unsigned long gpio = irqd_to_hwirq(d); void __iomem *reg; unsigned int shift; unsigned long flags; diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 7e5dc8f4984..3a8de5bb628 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -6,6 +6,7 @@ * with various broken implementations of this HW. * * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. + * Copyright 2010-2011 Freescale Semiconductor, Inc. * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive @@ -219,6 +220,28 @@ static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 valu _mpic_write(mpic->reg_type, &mpic->gregs, offset, value); } +static inline u32 _mpic_tm_read(struct mpic *mpic, unsigned int tm) +{ + unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) + + ((tm & 3) * MPIC_INFO(TIMER_STRIDE)); + + if (tm >= 4) + offset += 0x1000 / 4; + + return _mpic_read(mpic->reg_type, &mpic->tmregs, offset); +} + +static inline void _mpic_tm_write(struct mpic *mpic, unsigned int tm, u32 value) +{ + unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) + + ((tm & 3) * MPIC_INFO(TIMER_STRIDE)); + + if (tm >= 4) + offset += 0x1000 / 4; + + _mpic_write(mpic->reg_type, &mpic->tmregs, offset, value); +} + static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg) { unsigned int cpu = mpic_processor_id(mpic); @@ -269,6 +292,8 @@ static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no, #define mpic_write(b,r,v) _mpic_write(mpic->reg_type,&(b),(r),(v)) #define mpic_ipi_read(i) _mpic_ipi_read(mpic,(i)) #define mpic_ipi_write(i,v) _mpic_ipi_write(mpic,(i),(v)) +#define mpic_tm_read(i) _mpic_tm_read(mpic,(i)) +#define mpic_tm_write(i,v) _mpic_tm_write(mpic,(i),(v)) #define mpic_cpu_read(i) _mpic_cpu_read(mpic,(i)) #define mpic_cpu_write(i,v) _mpic_cpu_write(mpic,(i),(v)) #define mpic_irq_read(s,r) _mpic_irq_read(mpic,(s),(r)) @@ -608,8 +633,6 @@ static int irq_choose_cpu(const struct cpumask *mask) } #endif -#define mpic_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq) - /* Find an mpic associated with a given linux interrupt */ static struct mpic *mpic_find(unsigned int irq) { @@ -622,11 +645,18 @@ static struct mpic *mpic_find(unsigned int irq) /* Determine if the linux irq is an IPI */ static unsigned int mpic_is_ipi(struct mpic *mpic, unsigned int irq) { - unsigned int src = mpic_irq_to_hw(irq); + unsigned int src = virq_to_hw(irq); return (src >= mpic->ipi_vecs[0] && src <= mpic->ipi_vecs[3]); } +/* Determine if the linux irq is a timer */ +static unsigned int mpic_is_tm(struct mpic *mpic, unsigned int irq) +{ + unsigned int src = virq_to_hw(irq); + + return (src >= mpic->timer_vecs[0] && src <= mpic->timer_vecs[7]); +} /* Convert a cpu mask from logical to physical cpu numbers. */ static inline u32 mpic_physmask(u32 cpumask) @@ -634,7 +664,7 @@ static inline u32 mpic_physmask(u32 cpumask) int i; u32 mask = 0; - for (i = 0; i < NR_CPUS; ++i, cpumask >>= 1) + for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1) mask |= (cpumask & 1) << get_hard_smp_processor_id(i); return mask; } @@ -675,7 +705,7 @@ void mpic_unmask_irq(struct irq_data *d) { unsigned int loops = 100000; struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, d->irq, src); @@ -696,7 +726,7 @@ void mpic_mask_irq(struct irq_data *d) { unsigned int loops = 100000; struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); DBG("%s: disable_irq: %d (src %d)\n", mpic->name, d->irq, src); @@ -734,7 +764,7 @@ void mpic_end_irq(struct irq_data *d) static void mpic_unmask_ht_irq(struct irq_data *d) { struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); mpic_unmask_irq(d); @@ -745,7 +775,7 @@ static void mpic_unmask_ht_irq(struct irq_data *d) static unsigned int mpic_startup_ht_irq(struct irq_data *d) { struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); mpic_unmask_irq(d); mpic_startup_ht_interrupt(mpic, src, irqd_is_level_type(d)); @@ -756,7 +786,7 @@ static unsigned int mpic_startup_ht_irq(struct irq_data *d) static void mpic_shutdown_ht_irq(struct irq_data *d) { struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); mpic_shutdown_ht_interrupt(mpic, src); mpic_mask_irq(d); @@ -765,7 +795,7 @@ static void mpic_shutdown_ht_irq(struct irq_data *d) static void mpic_end_ht_irq(struct irq_data *d) { struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); #ifdef DEBUG_IRQ DBG("%s: end_irq: %d\n", mpic->name, d->irq); @@ -786,7 +816,7 @@ static void mpic_end_ht_irq(struct irq_data *d) static void mpic_unmask_ipi(struct irq_data *d) { struct mpic *mpic = mpic_from_ipi(d); - unsigned int src = mpic_irq_to_hw(d->irq) - mpic->ipi_vecs[0]; + unsigned int src = virq_to_hw(d->irq) - mpic->ipi_vecs[0]; DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, d->irq, src); mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK); @@ -813,27 +843,42 @@ static void mpic_end_ipi(struct irq_data *d) #endif /* CONFIG_SMP */ +static void mpic_unmask_tm(struct irq_data *d) +{ + struct mpic *mpic = mpic_from_irq_data(d); + unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0]; + + DBG("%s: enable_tm: %d (tm %d)\n", mpic->name, irq, src); + mpic_tm_write(src, mpic_tm_read(src) & ~MPIC_VECPRI_MASK); + mpic_tm_read(src); +} + +static void mpic_mask_tm(struct irq_data *d) +{ + struct mpic *mpic = mpic_from_irq_data(d); + unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0]; + + mpic_tm_write(src, mpic_tm_read(src) | MPIC_VECPRI_MASK); + mpic_tm_read(src); +} + int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force) { struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); if (mpic->flags & MPIC_SINGLE_DEST_CPU) { int cpuid = irq_choose_cpu(cpumask); mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 1 << cpuid); } else { - cpumask_var_t tmp; - - alloc_cpumask_var(&tmp, GFP_KERNEL); + u32 mask = cpumask_bits(cpumask)[0]; - cpumask_and(tmp, cpumask, cpu_online_mask); + mask &= cpumask_bits(cpu_online_mask)[0]; mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), - mpic_physmask(cpumask_bits(tmp)[0])); - - free_cpumask_var(tmp); + mpic_physmask(mask)); } return 0; @@ -863,7 +908,7 @@ static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type) int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type) { struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned int vecpri, vold, vnew; DBG("mpic: set_irq_type(mpic:@%p,virq:%d,src:0x%x,type:0x%x)\n", @@ -899,7 +944,7 @@ int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type) void mpic_set_vector(unsigned int virq, unsigned int vector) { struct mpic *mpic = mpic_from_irq(virq); - unsigned int src = mpic_irq_to_hw(virq); + unsigned int src = virq_to_hw(virq); unsigned int vecpri; DBG("mpic: set_vector(mpic:@%p,virq:%d,src:%d,vector:0x%x)\n", @@ -917,7 +962,7 @@ void mpic_set_vector(unsigned int virq, unsigned int vector) void mpic_set_destination(unsigned int virq, unsigned int cpuid) { struct mpic *mpic = mpic_from_irq(virq); - unsigned int src = mpic_irq_to_hw(virq); + unsigned int src = virq_to_hw(virq); DBG("mpic: set_destination(mpic:@%p,virq:%d,src:%d,cpuid:0x%x)\n", mpic, virq, src, cpuid); @@ -943,6 +988,12 @@ static struct irq_chip mpic_ipi_chip = { }; #endif /* CONFIG_SMP */ +static struct irq_chip mpic_tm_chip = { + .irq_mask = mpic_mask_tm, + .irq_unmask = mpic_unmask_tm, + .irq_eoi = mpic_end_irq, +}; + #ifdef CONFIG_MPIC_U3_HT_IRQS static struct irq_chip mpic_irq_ht_chip = { .irq_startup = mpic_startup_ht_irq, @@ -986,6 +1037,16 @@ static int mpic_host_map(struct irq_host *h, unsigned int virq, } #endif /* CONFIG_SMP */ + if (hw >= mpic->timer_vecs[0] && hw <= mpic->timer_vecs[7]) { + WARN_ON(!(mpic->flags & MPIC_PRIMARY)); + + DBG("mpic: mapping as timer\n"); + irq_set_chip_data(virq, mpic); + irq_set_chip_and_handler(virq, &mpic->hc_tm, + handle_fasteoi_irq); + return 0; + } + if (hw >= mpic->irq_count) return -EINVAL; @@ -1026,6 +1087,7 @@ static int mpic_host_xlate(struct irq_host *h, struct device_node *ct, irq_hw_number_t *out_hwirq, unsigned int *out_flags) { + struct mpic *mpic = h->host_data; static unsigned char map_mpic_senses[4] = { IRQ_TYPE_EDGE_RISING, IRQ_TYPE_LEVEL_LOW, @@ -1034,7 +1096,38 @@ static int mpic_host_xlate(struct irq_host *h, struct device_node *ct, }; *out_hwirq = intspec[0]; - if (intsize > 1) { + if (intsize >= 4 && (mpic->flags & MPIC_FSL)) { + /* + * Freescale MPIC with extended intspec: + * First two cells are as usual. Third specifies + * an "interrupt type". Fourth is type-specific data. + * + * See Documentation/devicetree/bindings/powerpc/fsl/mpic.txt + */ + switch (intspec[2]) { + case 0: + case 1: /* no EISR/EIMR support for now, treat as shared IRQ */ + break; + case 2: + if (intspec[0] >= ARRAY_SIZE(mpic->ipi_vecs)) + return -EINVAL; + + *out_hwirq = mpic->ipi_vecs[intspec[0]]; + break; + case 3: + if (intspec[0] >= ARRAY_SIZE(mpic->timer_vecs)) + return -EINVAL; + + *out_hwirq = mpic->timer_vecs[intspec[0]]; + break; + default: + pr_debug("%s: unknown irq type %u\n", + __func__, intspec[2]); + return -EINVAL; + } + + *out_flags = map_mpic_senses[intspec[1] & 3]; + } else if (intsize > 1) { u32 mask = 0x3; /* Apple invented a new race of encoding on machines with @@ -1110,6 +1203,9 @@ struct mpic * __init mpic_alloc(struct device_node *node, mpic->hc_ipi.name = name; #endif /* CONFIG_SMP */ + mpic->hc_tm = mpic_tm_chip; + mpic->hc_tm.name = name; + mpic->flags = flags; mpic->isu_size = isu_size; mpic->irq_count = irq_count; @@ -1120,10 +1216,14 @@ struct mpic * __init mpic_alloc(struct device_node *node, else intvec_top = 255; - mpic->timer_vecs[0] = intvec_top - 8; - mpic->timer_vecs[1] = intvec_top - 7; - mpic->timer_vecs[2] = intvec_top - 6; - mpic->timer_vecs[3] = intvec_top - 5; + mpic->timer_vecs[0] = intvec_top - 12; + mpic->timer_vecs[1] = intvec_top - 11; + mpic->timer_vecs[2] = intvec_top - 10; + mpic->timer_vecs[3] = intvec_top - 9; + mpic->timer_vecs[4] = intvec_top - 8; + mpic->timer_vecs[5] = intvec_top - 7; + mpic->timer_vecs[6] = intvec_top - 6; + mpic->timer_vecs[7] = intvec_top - 5; mpic->ipi_vecs[0] = intvec_top - 4; mpic->ipi_vecs[1] = intvec_top - 3; mpic->ipi_vecs[2] = intvec_top - 2; @@ -1133,6 +1233,8 @@ struct mpic * __init mpic_alloc(struct device_node *node, /* Check for "big-endian" in device-tree */ if (node && of_get_property(node, "big-endian", NULL) != NULL) mpic->flags |= MPIC_BIG_ENDIAN; + if (node && of_device_is_compatible(node, "fsl,mpic")) + mpic->flags |= MPIC_FSL; /* Look for protected sources */ if (node) { @@ -1324,15 +1426,17 @@ void __init mpic_init(struct mpic *mpic) /* Set current processor priority to max */ mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf); - /* Initialize timers: just disable them all */ + /* Initialize timers to our reserved vectors and mask them for now */ for (i = 0; i < 4; i++) { mpic_write(mpic->tmregs, i * MPIC_INFO(TIMER_STRIDE) + - MPIC_INFO(TIMER_DESTINATION), 0); + MPIC_INFO(TIMER_DESTINATION), + 1 << hard_smp_processor_id()); mpic_write(mpic->tmregs, i * MPIC_INFO(TIMER_STRIDE) + MPIC_INFO(TIMER_VECTOR_PRI), MPIC_VECPRI_MASK | + (9 << MPIC_VECPRI_PRIORITY_SHIFT) | (mpic->timer_vecs[0] + i)); } @@ -1428,7 +1532,7 @@ void __init mpic_set_serial_int(struct mpic *mpic, int enable) void mpic_irq_set_priority(unsigned int irq, unsigned int pri) { struct mpic *mpic = mpic_find(irq); - unsigned int src = mpic_irq_to_hw(irq); + unsigned int src = virq_to_hw(irq); unsigned long flags; u32 reg; @@ -1441,6 +1545,11 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri) ~MPIC_VECPRI_PRIORITY_MASK; mpic_ipi_write(src - mpic->ipi_vecs[0], reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); + } else if (mpic_is_tm(mpic, irq)) { + reg = mpic_tm_read(src - mpic->timer_vecs[0]) & + ~MPIC_VECPRI_PRIORITY_MASK; + mpic_tm_write(src - mpic->timer_vecs[0], + reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); } else { reg = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) & ~MPIC_VECPRI_PRIORITY_MASK; @@ -1620,46 +1729,28 @@ void mpic_request_ipis(void) } } -static void mpic_send_ipi(unsigned int ipi_no, const struct cpumask *cpu_mask) +void smp_mpic_message_pass(int cpu, int msg) { struct mpic *mpic = mpic_primary; + u32 physmask; BUG_ON(mpic == NULL); -#ifdef DEBUG_IPI - DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no); -#endif - - mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) + - ipi_no * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE), - mpic_physmask(cpumask_bits(cpu_mask)[0])); -} - -void smp_mpic_message_pass(int target, int msg) -{ - cpumask_var_t tmp; - /* make sure we're sending something that translates to an IPI */ if ((unsigned int)msg > 3) { printk("SMP %d: smp_message_pass: unknown msg %d\n", smp_processor_id(), msg); return; } - switch (target) { - case MSG_ALL: - mpic_send_ipi(msg, cpu_online_mask); - break; - case MSG_ALL_BUT_SELF: - alloc_cpumask_var(&tmp, GFP_NOWAIT); - cpumask_andnot(tmp, cpu_online_mask, - cpumask_of(smp_processor_id())); - mpic_send_ipi(msg, tmp); - free_cpumask_var(tmp); - break; - default: - mpic_send_ipi(msg, cpumask_of(target)); - break; - } + +#ifdef DEBUG_IPI + DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, msg); +#endif + + physmask = 1 << get_hard_smp_processor_id(cpu); + + mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) + + msg * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE), physmask); } int __init smp_mpic_probe(void) diff --git a/arch/powerpc/sysdev/mv64x60_pic.c b/arch/powerpc/sysdev/mv64x60_pic.c index e9c633c7c08..14d130268e7 100644 --- a/arch/powerpc/sysdev/mv64x60_pic.c +++ b/arch/powerpc/sysdev/mv64x60_pic.c @@ -78,7 +78,7 @@ static struct irq_host *mv64x60_irq_host; static void mv64x60_mask_low(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); @@ -91,7 +91,7 @@ static void mv64x60_mask_low(struct irq_data *d) static void mv64x60_unmask_low(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); @@ -115,7 +115,7 @@ static struct irq_chip mv64x60_chip_low = { static void mv64x60_mask_high(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); @@ -128,7 +128,7 @@ static void mv64x60_mask_high(struct irq_data *d) static void mv64x60_unmask_high(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); @@ -152,7 +152,7 @@ static struct irq_chip mv64x60_chip_high = { static void mv64x60_mask_gpp(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); @@ -165,7 +165,7 @@ static void mv64x60_mask_gpp(struct irq_data *d) static void mv64x60_mask_ack_gpp(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); @@ -180,7 +180,7 @@ static void mv64x60_mask_ack_gpp(struct irq_data *d) static void mv64x60_unmask_gpp(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index 832d6924ad1..b2acda07220 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -197,12 +197,10 @@ static inline struct qe_ic *qe_ic_from_irq_data(struct irq_data *d) return irq_data_get_irq_chip_data(d); } -#define virq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq) - static void qe_ic_unmask_irq(struct irq_data *d) { struct qe_ic *qe_ic = qe_ic_from_irq_data(d); - unsigned int src = virq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 temp; @@ -218,7 +216,7 @@ static void qe_ic_unmask_irq(struct irq_data *d) static void qe_ic_mask_irq(struct irq_data *d) { struct qe_ic *qe_ic = qe_ic_from_irq_data(d); - unsigned int src = virq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 temp; diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c new file mode 100644 index 00000000000..b2593ce30c9 --- /dev/null +++ b/arch/powerpc/sysdev/scom.c @@ -0,0 +1,192 @@ +/* + * Copyright 2010 Benjamin Herrenschmidt, IBM Corp + * <benh@kernel.crashing.org> + * and David Gibson, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/debugfs.h> +#include <linux/slab.h> +#include <asm/prom.h> +#include <asm/scom.h> + +const struct scom_controller *scom_controller; +EXPORT_SYMBOL_GPL(scom_controller); + +struct device_node *scom_find_parent(struct device_node *node) +{ + struct device_node *par, *tmp; + const u32 *p; + + for (par = of_node_get(node); par;) { + if (of_get_property(par, "scom-controller", NULL)) + break; + p = of_get_property(par, "scom-parent", NULL); + tmp = par; + if (p == NULL) + par = of_get_parent(par); + else + par = of_find_node_by_phandle(*p); + of_node_put(tmp); + } + return par; +} +EXPORT_SYMBOL_GPL(scom_find_parent); + +scom_map_t scom_map_device(struct device_node *dev, int index) +{ + struct device_node *parent; + unsigned int cells, size; + const u32 *prop; + u64 reg, cnt; + scom_map_t ret; + + parent = scom_find_parent(dev); + + if (parent == NULL) + return 0; + + prop = of_get_property(parent, "#scom-cells", NULL); + cells = prop ? *prop : 1; + + prop = of_get_property(dev, "scom-reg", &size); + if (!prop) + return 0; + size >>= 2; + + if (index >= (size / (2*cells))) + return 0; + + reg = of_read_number(&prop[index * cells * 2], cells); + cnt = of_read_number(&prop[index * cells * 2 + cells], cells); + + ret = scom_map(parent, reg, cnt); + of_node_put(parent); + + return ret; +} +EXPORT_SYMBOL_GPL(scom_map_device); + +#ifdef CONFIG_SCOM_DEBUGFS +struct scom_debug_entry { + struct device_node *dn; + unsigned long addr; + scom_map_t map; + spinlock_t lock; + char name[8]; + struct debugfs_blob_wrapper blob; +}; + +static int scom_addr_set(void *data, u64 val) +{ + struct scom_debug_entry *ent = data; + + ent->addr = 0; + scom_unmap(ent->map); + + ent->map = scom_map(ent->dn, val, 1); + if (scom_map_ok(ent->map)) + ent->addr = val; + else + return -EFAULT; + + return 0; +} + +static int scom_addr_get(void *data, u64 *val) +{ + struct scom_debug_entry *ent = data; + *val = ent->addr; + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(scom_addr_fops, scom_addr_get, scom_addr_set, + "0x%llx\n"); + +static int scom_val_set(void *data, u64 val) +{ + struct scom_debug_entry *ent = data; + + if (!scom_map_ok(ent->map)) + return -EFAULT; + + scom_write(ent->map, 0, val); + + return 0; +} + +static int scom_val_get(void *data, u64 *val) +{ + struct scom_debug_entry *ent = data; + + if (!scom_map_ok(ent->map)) + return -EFAULT; + + *val = scom_read(ent->map, 0); + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(scom_val_fops, scom_val_get, scom_val_set, + "0x%llx\n"); + +static int scom_debug_init_one(struct dentry *root, struct device_node *dn, + int i) +{ + struct scom_debug_entry *ent; + struct dentry *dir; + + ent = kzalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + return -ENOMEM; + + ent->dn = of_node_get(dn); + ent->map = SCOM_MAP_INVALID; + spin_lock_init(&ent->lock); + snprintf(ent->name, 8, "scom%d", i); + ent->blob.data = dn->full_name; + ent->blob.size = strlen(dn->full_name); + + dir = debugfs_create_dir(ent->name, root); + if (!dir) { + of_node_put(dn); + kfree(ent); + return -1; + } + + debugfs_create_file("addr", 0600, dir, ent, &scom_addr_fops); + debugfs_create_file("value", 0600, dir, ent, &scom_val_fops); + debugfs_create_blob("path", 0400, dir, &ent->blob); + + return 0; +} + +static int scom_debug_init(void) +{ + struct device_node *dn; + struct dentry *root; + int i, rc; + + root = debugfs_create_dir("scom", powerpc_debugfs_root); + if (!root) + return -1; + + i = rc = 0; + for_each_node_with_property(dn, "scom-controller") + rc |= scom_debug_init_one(root, dn, i++); + + return rc; +} +device_initcall(scom_debug_init); +#endif /* CONFIG_SCOM_DEBUGFS */ diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 5d913851662..984cd202915 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -41,8 +41,6 @@ #define UIC_VR 0x7 #define UIC_VCR 0x8 -#define uic_irq_to_hw(virq) (irq_map[virq].hwirq) - struct uic *primary_uic; struct uic { @@ -58,7 +56,7 @@ struct uic { static void uic_unmask_irq(struct irq_data *d) { struct uic *uic = irq_data_get_irq_chip_data(d); - unsigned int src = uic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 er, sr; @@ -76,7 +74,7 @@ static void uic_unmask_irq(struct irq_data *d) static void uic_mask_irq(struct irq_data *d) { struct uic *uic = irq_data_get_irq_chip_data(d); - unsigned int src = uic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 er; @@ -90,7 +88,7 @@ static void uic_mask_irq(struct irq_data *d) static void uic_ack_irq(struct irq_data *d) { struct uic *uic = irq_data_get_irq_chip_data(d); - unsigned int src = uic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; spin_lock_irqsave(&uic->lock, flags); @@ -101,7 +99,7 @@ static void uic_ack_irq(struct irq_data *d) static void uic_mask_ack_irq(struct irq_data *d) { struct uic *uic = irq_data_get_irq_chip_data(d); - unsigned int src = uic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 er, sr; @@ -126,7 +124,7 @@ static void uic_mask_ack_irq(struct irq_data *d) static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type) { struct uic *uic = irq_data_get_irq_chip_data(d); - unsigned int src = uic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; int trigger, polarity; u32 tr, pr, mask; diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig new file mode 100644 index 00000000000..0031eda320c --- /dev/null +++ b/arch/powerpc/sysdev/xics/Kconfig @@ -0,0 +1,13 @@ +config PPC_XICS + def_bool n + select PPC_SMP_MUXED_IPI + +config PPC_ICP_NATIVE + def_bool n + +config PPC_ICP_HV + def_bool n + +config PPC_ICS_RTAS + def_bool n + diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile new file mode 100644 index 00000000000..b75a6059337 --- /dev/null +++ b/arch/powerpc/sysdev/xics/Makefile @@ -0,0 +1,6 @@ +subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror + +obj-y += xics-common.o +obj-$(CONFIG_PPC_ICP_NATIVE) += icp-native.o +obj-$(CONFIG_PPC_ICP_HV) += icp-hv.o +obj-$(CONFIG_PPC_ICS_RTAS) += ics-rtas.o diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c new file mode 100644 index 00000000000..9518d367a64 --- /dev/null +++ b/arch/powerpc/sysdev/xics/icp-hv.c @@ -0,0 +1,164 @@ +/* + * Copyright 2011 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/of.h> + +#include <asm/smp.h> +#include <asm/irq.h> +#include <asm/errno.h> +#include <asm/xics.h> +#include <asm/io.h> +#include <asm/hvcall.h> + +static inline unsigned int icp_hv_get_xirr(unsigned char cppr) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_XIRR, retbuf, cppr); + if (rc != H_SUCCESS) + panic(" bad return code xirr - rc = %lx\n", rc); + return (unsigned int)retbuf[0]; +} + +static inline void icp_hv_set_xirr(unsigned int value) +{ + long rc = plpar_hcall_norets(H_EOI, value); + if (rc != H_SUCCESS) + panic("bad return code EOI - rc = %ld, value=%x\n", rc, value); +} + +static inline void icp_hv_set_cppr(u8 value) +{ + long rc = plpar_hcall_norets(H_CPPR, value); + if (rc != H_SUCCESS) + panic("bad return code cppr - rc = %lx\n", rc); +} + +static inline void icp_hv_set_qirr(int n_cpu , u8 value) +{ + long rc = plpar_hcall_norets(H_IPI, get_hard_smp_processor_id(n_cpu), + value); + if (rc != H_SUCCESS) + panic("bad return code qirr - rc = %lx\n", rc); +} + +static void icp_hv_eoi(struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + + iosync(); + icp_hv_set_xirr((xics_pop_cppr() << 24) | hw_irq); +} + +static void icp_hv_teardown_cpu(void) +{ + int cpu = smp_processor_id(); + + /* Clear any pending IPI */ + icp_hv_set_qirr(cpu, 0xff); +} + +static void icp_hv_flush_ipi(void) +{ + /* We take the ipi irq but and never return so we + * need to EOI the IPI, but want to leave our priority 0 + * + * should we check all the other interrupts too? + * should we be flagging idle loop instead? + * or creating some task to be scheduled? + */ + + icp_hv_set_xirr((0x00 << 24) | XICS_IPI); +} + +static unsigned int icp_hv_get_irq(void) +{ + unsigned int xirr = icp_hv_get_xirr(xics_cppr_top()); + unsigned int vec = xirr & 0x00ffffff; + unsigned int irq; + + if (vec == XICS_IRQ_SPURIOUS) + return NO_IRQ; + + irq = irq_radix_revmap_lookup(xics_host, vec); + if (likely(irq != NO_IRQ)) { + xics_push_cppr(vec); + return irq; + } + + /* We don't have a linux mapping, so have rtas mask it. */ + xics_mask_unknown_vec(vec); + + /* We might learn about it later, so EOI it */ + icp_hv_set_xirr(xirr); + + return NO_IRQ; +} + +static void icp_hv_set_cpu_priority(unsigned char cppr) +{ + xics_set_base_cppr(cppr); + icp_hv_set_cppr(cppr); + iosync(); +} + +#ifdef CONFIG_SMP + +static void icp_hv_cause_ipi(int cpu, unsigned long data) +{ + icp_hv_set_qirr(cpu, IPI_PRIORITY); +} + +static irqreturn_t icp_hv_ipi_action(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + + icp_hv_set_qirr(cpu, 0xff); + + return smp_ipi_demux(); +} + +#endif /* CONFIG_SMP */ + +static const struct icp_ops icp_hv_ops = { + .get_irq = icp_hv_get_irq, + .eoi = icp_hv_eoi, + .set_priority = icp_hv_set_cpu_priority, + .teardown_cpu = icp_hv_teardown_cpu, + .flush_ipi = icp_hv_flush_ipi, +#ifdef CONFIG_SMP + .ipi_action = icp_hv_ipi_action, + .cause_ipi = icp_hv_cause_ipi, +#endif +}; + +int icp_hv_init(void) +{ + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xicp"); + if (!np) + np = of_find_node_by_type(NULL, + "PowerPC-External-Interrupt-Presentation"); + if (!np) + return -ENODEV; + + icp_ops = &icp_hv_ops; + + return 0; +} + diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c new file mode 100644 index 00000000000..1f15ad43614 --- /dev/null +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -0,0 +1,293 @@ +/* + * Copyright 2011 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/of.h> +#include <linux/spinlock.h> + +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/irq.h> +#include <asm/errno.h> +#include <asm/xics.h> + +struct icp_ipl { + union { + u32 word; + u8 bytes[4]; + } xirr_poll; + union { + u32 word; + u8 bytes[4]; + } xirr; + u32 dummy; + union { + u32 word; + u8 bytes[4]; + } qirr; + u32 link_a; + u32 link_b; + u32 link_c; +}; + +static struct icp_ipl __iomem *icp_native_regs[NR_CPUS]; + +static inline unsigned int icp_native_get_xirr(void) +{ + int cpu = smp_processor_id(); + + return in_be32(&icp_native_regs[cpu]->xirr.word); +} + +static inline void icp_native_set_xirr(unsigned int value) +{ + int cpu = smp_processor_id(); + + out_be32(&icp_native_regs[cpu]->xirr.word, value); +} + +static inline void icp_native_set_cppr(u8 value) +{ + int cpu = smp_processor_id(); + + out_8(&icp_native_regs[cpu]->xirr.bytes[0], value); +} + +static inline void icp_native_set_qirr(int n_cpu, u8 value) +{ + out_8(&icp_native_regs[n_cpu]->qirr.bytes[0], value); +} + +static void icp_native_set_cpu_priority(unsigned char cppr) +{ + xics_set_base_cppr(cppr); + icp_native_set_cppr(cppr); + iosync(); +} + +static void icp_native_eoi(struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + + iosync(); + icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq); +} + +static void icp_native_teardown_cpu(void) +{ + int cpu = smp_processor_id(); + + /* Clear any pending IPI */ + icp_native_set_qirr(cpu, 0xff); +} + +static void icp_native_flush_ipi(void) +{ + /* We take the ipi irq but and never return so we + * need to EOI the IPI, but want to leave our priority 0 + * + * should we check all the other interrupts too? + * should we be flagging idle loop instead? + * or creating some task to be scheduled? + */ + + icp_native_set_xirr((0x00 << 24) | XICS_IPI); +} + +static unsigned int icp_native_get_irq(void) +{ + unsigned int xirr = icp_native_get_xirr(); + unsigned int vec = xirr & 0x00ffffff; + unsigned int irq; + + if (vec == XICS_IRQ_SPURIOUS) + return NO_IRQ; + + irq = irq_radix_revmap_lookup(xics_host, vec); + if (likely(irq != NO_IRQ)) { + xics_push_cppr(vec); + return irq; + } + + /* We don't have a linux mapping, so have rtas mask it. */ + xics_mask_unknown_vec(vec); + + /* We might learn about it later, so EOI it */ + icp_native_set_xirr(xirr); + + return NO_IRQ; +} + +#ifdef CONFIG_SMP + +static void icp_native_cause_ipi(int cpu, unsigned long data) +{ + icp_native_set_qirr(cpu, IPI_PRIORITY); +} + +static irqreturn_t icp_native_ipi_action(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + + icp_native_set_qirr(cpu, 0xff); + + return smp_ipi_demux(); +} + +#endif /* CONFIG_SMP */ + +static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr, + unsigned long size) +{ + char *rname; + int i, cpu = -1; + + /* This may look gross but it's good enough for now, we don't quite + * have a hard -> linux processor id matching. + */ + for_each_possible_cpu(i) { + if (!cpu_present(i)) + continue; + if (hw_id == get_hard_smp_processor_id(i)) { + cpu = i; + break; + } + } + + /* Fail, skip that CPU. Don't print, it's normal, some XICS come up + * with way more entries in there than you have CPUs + */ + if (cpu == -1) + return 0; + + rname = kasprintf(GFP_KERNEL, "CPU %d [0x%x] Interrupt Presentation", + cpu, hw_id); + + if (!request_mem_region(addr, size, rname)) { + pr_warning("icp_native: Could not reserve ICP MMIO" + " for CPU %d, interrupt server #0x%x\n", + cpu, hw_id); + return -EBUSY; + } + + icp_native_regs[cpu] = ioremap(addr, size); + if (!icp_native_regs[cpu]) { + pr_warning("icp_native: Failed ioremap for CPU %d, " + "interrupt server #0x%x, addr %#lx\n", + cpu, hw_id, addr); + release_mem_region(addr, size); + return -ENOMEM; + } + return 0; +} + +static int __init icp_native_init_one_node(struct device_node *np, + unsigned int *indx) +{ + unsigned int ilen; + const u32 *ireg; + int i; + int reg_tuple_size; + int num_servers = 0; + + /* This code does the theorically broken assumption that the interrupt + * server numbers are the same as the hard CPU numbers. + * This happens to be the case so far but we are playing with fire... + * should be fixed one of these days. -BenH. + */ + ireg = of_get_property(np, "ibm,interrupt-server-ranges", &ilen); + + /* Do that ever happen ? we'll know soon enough... but even good'old + * f80 does have that property .. + */ + WARN_ON((ireg == NULL) || (ilen != 2*sizeof(u32))); + + if (ireg) { + *indx = of_read_number(ireg, 1); + if (ilen >= 2*sizeof(u32)) + num_servers = of_read_number(ireg + 1, 1); + } + + ireg = of_get_property(np, "reg", &ilen); + if (!ireg) { + pr_err("icp_native: Can't find interrupt reg property"); + return -1; + } + + reg_tuple_size = (of_n_addr_cells(np) + of_n_size_cells(np)) * 4; + if (((ilen % reg_tuple_size) != 0) + || (num_servers && (num_servers != (ilen / reg_tuple_size)))) { + pr_err("icp_native: ICP reg len (%d) != num servers (%d)", + ilen / reg_tuple_size, num_servers); + return -1; + } + + for (i = 0; i < (ilen / reg_tuple_size); i++) { + struct resource r; + int err; + + err = of_address_to_resource(np, i, &r); + if (err) { + pr_err("icp_native: Could not translate ICP MMIO" + " for interrupt server 0x%x (%d)\n", *indx, err); + return -1; + } + + if (icp_native_map_one_cpu(*indx, r.start, r.end - r.start)) + return -1; + + (*indx)++; + } + return 0; +} + +static const struct icp_ops icp_native_ops = { + .get_irq = icp_native_get_irq, + .eoi = icp_native_eoi, + .set_priority = icp_native_set_cpu_priority, + .teardown_cpu = icp_native_teardown_cpu, + .flush_ipi = icp_native_flush_ipi, +#ifdef CONFIG_SMP + .ipi_action = icp_native_ipi_action, + .cause_ipi = icp_native_cause_ipi, +#endif +}; + +int icp_native_init(void) +{ + struct device_node *np; + u32 indx = 0; + int found = 0; + + for_each_compatible_node(np, NULL, "ibm,ppc-xicp") + if (icp_native_init_one_node(np, &indx) == 0) + found = 1; + if (!found) { + for_each_node_by_type(np, + "PowerPC-External-Interrupt-Presentation") { + if (icp_native_init_one_node(np, &indx) == 0) + found = 1; + } + } + + if (found == 0) + return -ENODEV; + + icp_ops = &icp_native_ops; + + return 0; +} diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c new file mode 100644 index 00000000000..c782f85cf7e --- /dev/null +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -0,0 +1,240 @@ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/of.h> +#include <linux/spinlock.h> +#include <linux/msi.h> + +#include <asm/prom.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/irq.h> +#include <asm/errno.h> +#include <asm/xics.h> +#include <asm/rtas.h> + +/* RTAS service tokens */ +static int ibm_get_xive; +static int ibm_set_xive; +static int ibm_int_on; +static int ibm_int_off; + +static int ics_rtas_map(struct ics *ics, unsigned int virq); +static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec); +static long ics_rtas_get_server(struct ics *ics, unsigned long vec); +static int ics_rtas_host_match(struct ics *ics, struct device_node *node); + +/* Only one global & state struct ics */ +static struct ics ics_rtas = { + .map = ics_rtas_map, + .mask_unknown = ics_rtas_mask_unknown, + .get_server = ics_rtas_get_server, + .host_match = ics_rtas_host_match, +}; + +static void ics_rtas_unmask_irq(struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + int call_status; + int server; + + pr_devel("xics: unmask virq %d [hw 0x%x]\n", d->irq, hw_irq); + + if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) + return; + + server = xics_get_irq_server(d->irq, d->affinity, 0); + + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server, + DEFAULT_PRIORITY); + if (call_status != 0) { + printk(KERN_ERR + "%s: ibm_set_xive irq %u server %x returned %d\n", + __func__, hw_irq, server, call_status); + return; + } + + /* Now unmask the interrupt (often a no-op) */ + call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq); + if (call_status != 0) { + printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n", + __func__, hw_irq, call_status); + return; + } +} + +static unsigned int ics_rtas_startup(struct irq_data *d) +{ +#ifdef CONFIG_PCI_MSI + /* + * The generic MSI code returns with the interrupt disabled on the + * card, using the MSI mask bits. Firmware doesn't appear to unmask + * at that level, so we do it here by hand. + */ + if (d->msi_desc) + unmask_msi_irq(d); +#endif + /* unmask it */ + ics_rtas_unmask_irq(d); + return 0; +} + +static void ics_rtas_mask_real_irq(unsigned int hw_irq) +{ + int call_status; + + if (hw_irq == XICS_IPI) + return; + + call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq); + if (call_status != 0) { + printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n", + __func__, hw_irq, call_status); + return; + } + + /* Have to set XIVE to 0xff to be able to remove a slot */ + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, + xics_default_server, 0xff); + if (call_status != 0) { + printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n", + __func__, hw_irq, call_status); + return; + } +} + +static void ics_rtas_mask_irq(struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + + pr_devel("xics: mask virq %d [hw 0x%x]\n", d->irq, hw_irq); + + if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) + return; + ics_rtas_mask_real_irq(hw_irq); +} + +static int ics_rtas_set_affinity(struct irq_data *d, + const struct cpumask *cpumask, + bool force) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + int status; + int xics_status[2]; + int irq_server; + + if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) + return -1; + + status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq); + + if (status) { + printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", + __func__, hw_irq, status); + return -1; + } + + irq_server = xics_get_irq_server(d->irq, cpumask, 1); + if (irq_server == -1) { + char cpulist[128]; + cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); + printk(KERN_WARNING + "%s: No online cpus in the mask %s for irq %d\n", + __func__, cpulist, d->irq); + return -1; + } + + status = rtas_call(ibm_set_xive, 3, 1, NULL, + hw_irq, irq_server, xics_status[1]); + + if (status) { + printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", + __func__, hw_irq, status); + return -1; + } + + return IRQ_SET_MASK_OK; +} + +static struct irq_chip ics_rtas_irq_chip = { + .name = "XICS", + .irq_startup = ics_rtas_startup, + .irq_mask = ics_rtas_mask_irq, + .irq_unmask = ics_rtas_unmask_irq, + .irq_eoi = NULL, /* Patched at init time */ + .irq_set_affinity = ics_rtas_set_affinity +}; + +static int ics_rtas_map(struct ics *ics, unsigned int virq) +{ + unsigned int hw_irq = (unsigned int)virq_to_hw(virq); + int status[2]; + int rc; + + if (WARN_ON(hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)) + return -EINVAL; + + /* Check if RTAS knows about this interrupt */ + rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq); + if (rc) + return -ENXIO; + + irq_set_chip_and_handler(virq, &ics_rtas_irq_chip, handle_fasteoi_irq); + irq_set_chip_data(virq, &ics_rtas); + + return 0; +} + +static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec) +{ + ics_rtas_mask_real_irq(vec); +} + +static long ics_rtas_get_server(struct ics *ics, unsigned long vec) +{ + int rc, status[2]; + + rc = rtas_call(ibm_get_xive, 1, 3, status, vec); + if (rc) + return -1; + return status[0]; +} + +static int ics_rtas_host_match(struct ics *ics, struct device_node *node) +{ + /* IBM machines have interrupt parents of various funky types for things + * like vdevices, events, etc... The trick we use here is to match + * everything here except the legacy 8259 which is compatible "chrp,iic" + */ + return !of_device_is_compatible(node, "chrp,iic"); +} + +int ics_rtas_init(void) +{ + ibm_get_xive = rtas_token("ibm,get-xive"); + ibm_set_xive = rtas_token("ibm,set-xive"); + ibm_int_on = rtas_token("ibm,int-on"); + ibm_int_off = rtas_token("ibm,int-off"); + + /* We enable the RTAS "ICS" if RTAS is present with the + * appropriate tokens + */ + if (ibm_get_xive == RTAS_UNKNOWN_SERVICE || + ibm_set_xive == RTAS_UNKNOWN_SERVICE) + return -ENODEV; + + /* We need to patch our irq chip's EOI to point to the + * right ICP + */ + ics_rtas_irq_chip.irq_eoi = icp_ops->eoi; + + /* Register ourselves */ + xics_register_ics(&ics_rtas); + + return 0; +} + diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c new file mode 100644 index 00000000000..445c5a01b76 --- /dev/null +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -0,0 +1,443 @@ +/* + * Copyright 2011 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#include <linux/types.h> +#include <linux/threads.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/debugfs.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/irq.h> +#include <asm/errno.h> +#include <asm/rtas.h> +#include <asm/xics.h> +#include <asm/firmware.h> + +/* Globals common to all ICP/ICS implementations */ +const struct icp_ops *icp_ops; + +unsigned int xics_default_server = 0xff; +unsigned int xics_default_distrib_server = 0; +unsigned int xics_interrupt_server_size = 8; + +DEFINE_PER_CPU(struct xics_cppr, xics_cppr); + +struct irq_host *xics_host; + +static LIST_HEAD(ics_list); + +void xics_update_irq_servers(void) +{ + int i, j; + struct device_node *np; + u32 ilen; + const u32 *ireg; + u32 hcpuid; + + /* Find the server numbers for the boot cpu. */ + np = of_get_cpu_node(boot_cpuid, NULL); + BUG_ON(!np); + + hcpuid = get_hard_smp_processor_id(boot_cpuid); + xics_default_server = xics_default_distrib_server = hcpuid; + + pr_devel("xics: xics_default_server = 0x%x\n", xics_default_server); + + ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen); + if (!ireg) { + of_node_put(np); + return; + } + + i = ilen / sizeof(int); + + /* Global interrupt distribution server is specified in the last + * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last + * entry fom this property for current boot cpu id and use it as + * default distribution server + */ + for (j = 0; j < i; j += 2) { + if (ireg[j] == hcpuid) { + xics_default_distrib_server = ireg[j+1]; + break; + } + } + pr_devel("xics: xics_default_distrib_server = 0x%x\n", + xics_default_distrib_server); + of_node_put(np); +} + +/* GIQ stuff, currently only supported on RTAS setups, will have + * to be sorted properly for bare metal + */ +void xics_set_cpu_giq(unsigned int gserver, unsigned int join) +{ +#ifdef CONFIG_PPC_RTAS + int index; + int status; + + if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL)) + return; + + index = (1UL << xics_interrupt_server_size) - 1 - gserver; + + status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join); + + WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n", + GLOBAL_INTERRUPT_QUEUE, index, join, status); +#endif +} + +void xics_setup_cpu(void) +{ + icp_ops->set_priority(LOWEST_PRIORITY); + + xics_set_cpu_giq(xics_default_distrib_server, 1); +} + +void xics_mask_unknown_vec(unsigned int vec) +{ + struct ics *ics; + + pr_err("Interrupt 0x%x (real) is invalid, disabling it.\n", vec); + + list_for_each_entry(ics, &ics_list, link) + ics->mask_unknown(ics, vec); +} + + +#ifdef CONFIG_SMP + +static void xics_request_ipi(void) +{ + unsigned int ipi; + + ipi = irq_create_mapping(xics_host, XICS_IPI); + BUG_ON(ipi == NO_IRQ); + + /* + * IPIs are marked IRQF_DISABLED as they must run with irqs + * disabled, and PERCPU. The handler was set in map. + */ + BUG_ON(request_irq(ipi, icp_ops->ipi_action, + IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL)); +} + +int __init xics_smp_probe(void) +{ + /* Setup cause_ipi callback based on which ICP is used */ + smp_ops->cause_ipi = icp_ops->cause_ipi; + + /* Register all the IPIs */ + xics_request_ipi(); + + return cpumask_weight(cpu_possible_mask); +} + +#endif /* CONFIG_SMP */ + +void xics_teardown_cpu(void) +{ + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + + /* + * we have to reset the cppr index to 0 because we're + * not going to return from the IPI + */ + os_cppr->index = 0; + icp_ops->set_priority(0); + icp_ops->teardown_cpu(); +} + +void xics_kexec_teardown_cpu(int secondary) +{ + xics_teardown_cpu(); + + icp_ops->flush_ipi(); + + /* + * Some machines need to have at least one cpu in the GIQ, + * so leave the master cpu in the group. + */ + if (secondary) + xics_set_cpu_giq(xics_default_distrib_server, 0); +} + + +#ifdef CONFIG_HOTPLUG_CPU + +/* Interrupts are disabled. */ +void xics_migrate_irqs_away(void) +{ + int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id(); + unsigned int irq, virq; + + /* If we used to be the default server, move to the new "boot_cpuid" */ + if (hw_cpu == xics_default_server) + xics_update_irq_servers(); + + /* Reject any interrupt that was queued to us... */ + icp_ops->set_priority(0); + + /* Remove ourselves from the global interrupt queue */ + xics_set_cpu_giq(xics_default_distrib_server, 0); + + /* Allow IPIs again... */ + icp_ops->set_priority(DEFAULT_PRIORITY); + + for_each_irq(virq) { + struct irq_desc *desc; + struct irq_chip *chip; + long server; + unsigned long flags; + struct ics *ics; + + /* We can't set affinity on ISA interrupts */ + if (virq < NUM_ISA_INTERRUPTS) + continue; + if (!virq_is_host(virq, xics_host)) + continue; + irq = (unsigned int)virq_to_hw(virq); + /* We need to get IPIs still. */ + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) + continue; + desc = irq_to_desc(virq); + /* We only need to migrate enabled IRQS */ + if (!desc || !desc->action) + continue; + chip = irq_desc_get_chip(desc); + if (!chip || !chip->irq_set_affinity) + continue; + + raw_spin_lock_irqsave(&desc->lock, flags); + + /* Locate interrupt server */ + server = -1; + ics = irq_get_chip_data(virq); + if (ics) + server = ics->get_server(ics, irq); + if (server < 0) { + printk(KERN_ERR "%s: Can't find server for irq %d\n", + __func__, irq); + goto unlock; + } + + /* We only support delivery to all cpus or to one cpu. + * The irq has to be migrated only in the single cpu + * case. + */ + if (server != hw_cpu) + goto unlock; + + /* This is expected during cpu offline. */ + if (cpu_online(cpu)) + pr_warning("IRQ %u affinity broken off cpu %u\n", + virq, cpu); + + /* Reset affinity to all cpus */ + raw_spin_unlock_irqrestore(&desc->lock, flags); + irq_set_affinity(virq, cpu_all_mask); + continue; +unlock: + raw_spin_unlock_irqrestore(&desc->lock, flags); + } +} +#endif /* CONFIG_HOTPLUG_CPU */ + +#ifdef CONFIG_SMP +/* + * For the moment we only implement delivery to all cpus or one cpu. + * + * If the requested affinity is cpu_all_mask, we set global affinity. + * If not we set it to the first cpu in the mask, even if multiple cpus + * are set. This is so things like irqbalance (which set core and package + * wide affinities) do the right thing. + * + * We need to fix this to implement support for the links + */ +int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask, + unsigned int strict_check) +{ + + if (!distribute_irqs) + return xics_default_server; + + if (!cpumask_subset(cpu_possible_mask, cpumask)) { + int server = cpumask_first_and(cpu_online_mask, cpumask); + + if (server < nr_cpu_ids) + return get_hard_smp_processor_id(server); + + if (strict_check) + return -1; + } + + /* + * Workaround issue with some versions of JS20 firmware that + * deliver interrupts to cpus which haven't been started. This + * happens when using the maxcpus= boot option. + */ + if (cpumask_equal(cpu_online_mask, cpu_present_mask)) + return xics_default_distrib_server; + + return xics_default_server; +} +#endif /* CONFIG_SMP */ + +static int xics_host_match(struct irq_host *h, struct device_node *node) +{ + struct ics *ics; + + list_for_each_entry(ics, &ics_list, link) + if (ics->host_match(ics, node)) + return 1; + + return 0; +} + +/* Dummies */ +static void xics_ipi_unmask(struct irq_data *d) { } +static void xics_ipi_mask(struct irq_data *d) { } + +static struct irq_chip xics_ipi_chip = { + .name = "XICS", + .irq_eoi = NULL, /* Patched at init time */ + .irq_mask = xics_ipi_mask, + .irq_unmask = xics_ipi_unmask, +}; + +static int xics_host_map(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw) +{ + struct ics *ics; + + pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw); + + /* Insert the interrupt mapping into the radix tree for fast lookup */ + irq_radix_revmap_insert(xics_host, virq, hw); + + /* They aren't all level sensitive but we just don't really know */ + irq_set_status_flags(virq, IRQ_LEVEL); + + /* Don't call into ICS for IPIs */ + if (hw == XICS_IPI) { + irq_set_chip_and_handler(virq, &xics_ipi_chip, + handle_percpu_irq); + return 0; + } + + /* Let the ICS setup the chip data */ + list_for_each_entry(ics, &ics_list, link) + if (ics->map(ics, virq) == 0) + return 0; + + return -EINVAL; +} + +static int xics_host_xlate(struct irq_host *h, struct device_node *ct, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) + +{ + /* Current xics implementation translates everything + * to level. It is not technically right for MSIs but this + * is irrelevant at this point. We might get smarter in the future + */ + *out_hwirq = intspec[0]; + *out_flags = IRQ_TYPE_LEVEL_LOW; + + return 0; +} + +static struct irq_host_ops xics_host_ops = { + .match = xics_host_match, + .map = xics_host_map, + .xlate = xics_host_xlate, +}; + +static void __init xics_init_host(void) +{ + xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops, + XICS_IRQ_SPURIOUS); + BUG_ON(xics_host == NULL); + irq_set_default_host(xics_host); +} + +void __init xics_register_ics(struct ics *ics) +{ + list_add(&ics->link, &ics_list); +} + +static void __init xics_get_server_size(void) +{ + struct device_node *np; + const u32 *isize; + + /* We fetch the interrupt server size from the first ICS node + * we find if any + */ + np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xics"); + if (!np) + return; + isize = of_get_property(np, "ibm,interrupt-server#-size", NULL); + if (!isize) + return; + xics_interrupt_server_size = *isize; + of_node_put(np); +} + +void __init xics_init(void) +{ + int rc = -1; + + /* Fist locate ICP */ +#ifdef CONFIG_PPC_ICP_HV + if (firmware_has_feature(FW_FEATURE_LPAR)) + rc = icp_hv_init(); +#endif +#ifdef CONFIG_PPC_ICP_NATIVE + if (rc < 0) + rc = icp_native_init(); +#endif + if (rc < 0) { + pr_warning("XICS: Cannot find a Presentation Controller !\n"); + return; + } + + /* Copy get_irq callback over to ppc_md */ + ppc_md.get_irq = icp_ops->get_irq; + + /* Patch up IPI chip EOI */ + xics_ipi_chip.irq_eoi = icp_ops->eoi; + + /* Now locate ICS */ +#ifdef CONFIG_PPC_ICS_RTAS + rc = ics_rtas_init(); +#endif + if (rc < 0) + pr_warning("XICS: Cannot find a Source Controller !\n"); + + /* Initialize common bits */ + xics_get_server_size(); + xics_update_irq_servers(); + xics_init_host(); + xics_setup_cpu(); +} diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 0a13fc19e28..6183799754a 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -71,7 +71,7 @@ static unsigned char xilinx_intc_map_senses[] = { */ static void xilinx_intc_mask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void * regs = irq_data_get_irq_chip_data(d); pr_debug("mask: %d\n", irq); out_be32(regs + XINTC_CIE, 1 << irq); @@ -87,7 +87,7 @@ static int xilinx_intc_set_type(struct irq_data *d, unsigned int flow_type) */ static void xilinx_intc_level_unmask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void * regs = irq_data_get_irq_chip_data(d); pr_debug("unmask: %d\n", irq); out_be32(regs + XINTC_SIE, 1 << irq); @@ -112,7 +112,7 @@ static struct irq_chip xilinx_intc_level_irqchip = { */ static void xilinx_intc_edge_unmask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void *regs = irq_data_get_irq_chip_data(d); pr_debug("unmask: %d\n", irq); out_be32(regs + XINTC_SIE, 1 << irq); @@ -120,7 +120,7 @@ static void xilinx_intc_edge_unmask(struct irq_data *d) static void xilinx_intc_edge_ack(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void * regs = irq_data_get_irq_chip_data(d); pr_debug("ack: %d\n", irq); out_be32(regs + XINTC_IAR, 1 << irq); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 33794c1d92c..42541bbcc7f 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -334,7 +334,7 @@ static void release_output_lock(void) int cpus_are_in_xmon(void) { - return !cpus_empty(cpus_in_xmon); + return !cpumask_empty(&cpus_in_xmon); } #endif @@ -373,7 +373,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) #ifdef CONFIG_SMP cpu = smp_processor_id(); - if (cpu_isset(cpu, cpus_in_xmon)) { + if (cpumask_test_cpu(cpu, &cpus_in_xmon)) { get_output_lock(); excprint(regs); printf("cpu 0x%x: Exception %lx %s in xmon, " @@ -396,10 +396,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi) } xmon_fault_jmp[cpu] = recurse_jmp; - cpu_set(cpu, cpus_in_xmon); + cpumask_set_cpu(cpu, &cpus_in_xmon); bp = NULL; - if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) bp = at_breakpoint(regs->nip); if (bp || unrecoverable_excp(regs)) fromipi = 0; @@ -437,10 +437,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi) xmon_owner = cpu; mb(); if (ncpus > 1) { - smp_send_debugger_break(MSG_ALL_BUT_SELF); + smp_send_debugger_break(); /* wait for other cpus to come in */ for (timeout = 100000000; timeout != 0; --timeout) { - if (cpus_weight(cpus_in_xmon) >= ncpus) + if (cpumask_weight(&cpus_in_xmon) >= ncpus) break; barrier(); } @@ -484,7 +484,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) } } leave: - cpu_clear(cpu, cpus_in_xmon); + cpumask_clear_cpu(cpu, &cpus_in_xmon); xmon_fault_jmp[cpu] = NULL; #else /* UP is simple... */ @@ -529,7 +529,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) } } #else - if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) { + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) { bp = at_breakpoint(regs->nip); if (bp != NULL) { int stepped = emulate_step(regs, bp->instr[0]); @@ -578,7 +578,7 @@ static int xmon_bpt(struct pt_regs *regs) struct bpt *bp; unsigned long offset; - if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF)) + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT)) return 0; /* Are we at the trap at bp->instr[1] for some bp? */ @@ -609,7 +609,7 @@ static int xmon_sstep(struct pt_regs *regs) static int xmon_dabr_match(struct pt_regs *regs) { - if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF)) + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT)) return 0; if (dabr.enabled == 0) return 0; @@ -619,7 +619,7 @@ static int xmon_dabr_match(struct pt_regs *regs) static int xmon_iabr_match(struct pt_regs *regs) { - if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF)) + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT)) return 0; if (iabr == NULL) return 0; @@ -630,7 +630,7 @@ static int xmon_iabr_match(struct pt_regs *regs) static int xmon_ipi(struct pt_regs *regs) { #ifdef CONFIG_SMP - if (in_xmon && !cpu_isset(smp_processor_id(), cpus_in_xmon)) + if (in_xmon && !cpumask_test_cpu(smp_processor_id(), &cpus_in_xmon)) xmon_core(regs, 1); #endif return 0; @@ -644,7 +644,7 @@ static int xmon_fault_handler(struct pt_regs *regs) if (in_xmon && catch_memory_errors) handle_fault(regs); /* doesn't return */ - if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) { + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) { bp = in_breakpoint_table(regs->nip, &offset); if (bp != NULL) { regs->nip = bp->address + offset; @@ -929,7 +929,7 @@ static int do_step(struct pt_regs *regs) int stepped; /* check we are in 64-bit kernel mode, translation enabled */ - if ((regs->msr & (MSR_SF|MSR_PR|MSR_IR)) == (MSR_SF|MSR_IR)) { + if ((regs->msr & (MSR_64BIT|MSR_PR|MSR_IR)) == (MSR_64BIT|MSR_IR)) { if (mread(regs->nip, &instr, 4) == 4) { stepped = emulate_step(regs, instr); if (stepped < 0) { @@ -976,7 +976,7 @@ static int cpu_cmd(void) printf("cpus stopped:"); count = 0; for (cpu = 0; cpu < NR_CPUS; ++cpu) { - if (cpu_isset(cpu, cpus_in_xmon)) { + if (cpumask_test_cpu(cpu, &cpus_in_xmon)) { if (count == 0) printf(" %x", cpu); ++count; @@ -992,7 +992,7 @@ static int cpu_cmd(void) return 0; } /* try to switch to cpu specified */ - if (!cpu_isset(cpu, cpus_in_xmon)) { + if (!cpumask_test_cpu(cpu, &cpus_in_xmon)) { printf("cpu 0x%x isn't in xmon\n", cpu); return 0; } @@ -1497,6 +1497,10 @@ static void prregs(struct pt_regs *fp) #endif printf("pc = "); xmon_print_symbol(fp->nip, " ", "\n"); + if (TRAP(fp) != 0xc00 && cpu_has_feature(CPU_FTR_CFAR)) { + printf("cfar= "); + xmon_print_symbol(fp->orig_gpr3, " ", "\n"); + } printf("lr = "); xmon_print_symbol(fp->link, " ", "\n"); printf("msr = "REG" cr = %.8lx\n", fp->msr, fp->ccr); @@ -2663,7 +2667,7 @@ static void dump_stab(void) void dump_segments(void) { - if (cpu_has_feature(CPU_FTR_SLB)) + if (mmu_has_feature(MMU_FTR_SLB)) dump_slb(); else dump_stab(); diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index 1cf81d77c5a..7f0b7cda625 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o obj-$(CONFIG_S390_PRNG) += prng.o +obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 58f46734465..a9ce135893f 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -31,7 +31,8 @@ #define AES_KEYLEN_192 2 #define AES_KEYLEN_256 4 -static char keylen_flag = 0; +static u8 *ctrblk; +static char keylen_flag; struct s390_aes_ctx { u8 iv[AES_BLOCK_SIZE]; @@ -45,6 +46,24 @@ struct s390_aes_ctx { } fallback; }; +struct pcc_param { + u8 key[32]; + u8 tweak[16]; + u8 block[16]; + u8 bit[16]; + u8 xts[16]; +}; + +struct s390_xts_ctx { + u8 key[32]; + u8 xts_param[16]; + struct pcc_param pcc; + long enc; + long dec; + int key_len; + struct crypto_blkcipher *fallback; +}; + /* * Check if the key_len is supported by the HW. * Returns 0 if it is, a positive number if it is not and software fallback is @@ -504,15 +523,337 @@ static struct crypto_alg cbc_aes_alg = { } }; +static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int len) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + unsigned int ret; + + xts_ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + xts_ctx->fallback->base.crt_flags |= (tfm->crt_flags & + CRYPTO_TFM_REQ_MASK); + + ret = crypto_blkcipher_setkey(xts_ctx->fallback, key, len); + if (ret) { + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= (xts_ctx->fallback->base.crt_flags & + CRYPTO_TFM_RES_MASK); + } + return ret; +} + +static int xts_fallback_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_blkcipher *tfm; + unsigned int ret; + + tfm = desc->tfm; + desc->tfm = xts_ctx->fallback; + + ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int xts_fallback_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_blkcipher *tfm; + unsigned int ret; + + tfm = desc->tfm; + desc->tfm = xts_ctx->fallback; + + ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + + switch (key_len) { + case 32: + xts_ctx->enc = KM_XTS_128_ENCRYPT; + xts_ctx->dec = KM_XTS_128_DECRYPT; + memcpy(xts_ctx->key + 16, in_key, 16); + memcpy(xts_ctx->pcc.key + 16, in_key + 16, 16); + break; + case 48: + xts_ctx->enc = 0; + xts_ctx->dec = 0; + xts_fallback_setkey(tfm, in_key, key_len); + break; + case 64: + xts_ctx->enc = KM_XTS_256_ENCRYPT; + xts_ctx->dec = KM_XTS_256_DECRYPT; + memcpy(xts_ctx->key, in_key, 32); + memcpy(xts_ctx->pcc.key, in_key + 32, 32); + break; + default: + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + xts_ctx->key_len = key_len; + return 0; +} + +static int xts_aes_crypt(struct blkcipher_desc *desc, long func, + struct s390_xts_ctx *xts_ctx, + struct blkcipher_walk *walk) +{ + unsigned int offset = (xts_ctx->key_len >> 1) & 0x10; + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes = walk->nbytes; + unsigned int n; + u8 *in, *out; + void *param; + + if (!nbytes) + goto out; + + memset(xts_ctx->pcc.block, 0, sizeof(xts_ctx->pcc.block)); + memset(xts_ctx->pcc.bit, 0, sizeof(xts_ctx->pcc.bit)); + memset(xts_ctx->pcc.xts, 0, sizeof(xts_ctx->pcc.xts)); + memcpy(xts_ctx->pcc.tweak, walk->iv, sizeof(xts_ctx->pcc.tweak)); + param = xts_ctx->pcc.key + offset; + ret = crypt_s390_pcc(func, param); + BUG_ON(ret < 0); + + memcpy(xts_ctx->xts_param, xts_ctx->pcc.xts, 16); + param = xts_ctx->key + offset; + do { + /* only use complete blocks */ + n = nbytes & ~(AES_BLOCK_SIZE - 1); + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + + ret = crypt_s390_km(func, param, out, in, n); + BUG_ON(ret < 0 || ret != n); + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } while ((nbytes = walk->nbytes)); +out: + return ret; +} + +static int xts_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(xts_ctx->key_len == 48)) + return xts_fallback_encrypt(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk); +} + +static int xts_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(xts_ctx->key_len == 48)) + return xts_fallback_decrypt(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk); +} + +static int xts_fallback_init(struct crypto_tfm *tfm) +{ + const char *name = tfm->__crt_alg->cra_name; + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + + xts_ctx->fallback = crypto_alloc_blkcipher(name, 0, + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(xts_ctx->fallback)) { + pr_err("Allocating XTS fallback algorithm %s failed\n", + name); + return PTR_ERR(xts_ctx->fallback); + } + return 0; +} + +static void xts_fallback_exit(struct crypto_tfm *tfm) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + + crypto_free_blkcipher(xts_ctx->fallback); + xts_ctx->fallback = NULL; +} + +static struct crypto_alg xts_aes_alg = { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_xts_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(xts_aes_alg.cra_list), + .cra_init = xts_fallback_init, + .cra_exit = xts_fallback_exit, + .cra_u = { + .blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_aes_set_key, + .encrypt = xts_aes_encrypt, + .decrypt = xts_aes_decrypt, + } + } +}; + +static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + switch (key_len) { + case 16: + sctx->enc = KMCTR_AES_128_ENCRYPT; + sctx->dec = KMCTR_AES_128_DECRYPT; + break; + case 24: + sctx->enc = KMCTR_AES_192_ENCRYPT; + sctx->dec = KMCTR_AES_192_DECRYPT; + break; + case 32: + sctx->enc = KMCTR_AES_256_ENCRYPT; + sctx->dec = KMCTR_AES_256_DECRYPT; + break; + } + + return aes_set_key(tfm, in_key, key_len); +} + +static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, + struct s390_aes_ctx *sctx, struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); + unsigned int i, n, nbytes; + u8 buf[AES_BLOCK_SIZE]; + u8 *out, *in; + + if (!walk->nbytes) + return ret; + + memcpy(ctrblk, walk->iv, AES_BLOCK_SIZE); + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + while (nbytes >= AES_BLOCK_SIZE) { + /* only use complete blocks, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : + nbytes & ~(AES_BLOCK_SIZE - 1); + for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) { + memcpy(ctrblk + i, ctrblk + i - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(ctrblk + i, AES_BLOCK_SIZE); + } + ret = crypt_s390_kmctr(func, sctx->key, out, in, n, ctrblk); + BUG_ON(ret < 0 || ret != n); + if (n > AES_BLOCK_SIZE) + memcpy(ctrblk, ctrblk + n - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(ctrblk, AES_BLOCK_SIZE); + out += n; + in += n; + nbytes -= n; + } + ret = blkcipher_walk_done(desc, walk, nbytes); + } + /* + * final block may be < AES_BLOCK_SIZE, copy only nbytes + */ + if (nbytes) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + ret = crypt_s390_kmctr(func, sctx->key, buf, in, + AES_BLOCK_SIZE, ctrblk); + BUG_ON(ret < 0 || ret != AES_BLOCK_SIZE); + memcpy(out, buf, nbytes); + crypto_inc(ctrblk, AES_BLOCK_SIZE); + ret = blkcipher_walk_done(desc, walk, 0); + } + memcpy(walk->iv, ctrblk, AES_BLOCK_SIZE); + return ret; +} + +static int ctr_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_aes_crypt(desc, sctx->enc, sctx, &walk); +} + +static int ctr_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_aes_crypt(desc, sctx->dec, sctx, &walk); +} + +static struct crypto_alg ctr_aes_alg = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ctr_aes_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ctr_aes_set_key, + .encrypt = ctr_aes_encrypt, + .decrypt = ctr_aes_decrypt, + } + } +}; + static int __init aes_s390_init(void) { int ret; - if (crypt_s390_func_available(KM_AES_128_ENCRYPT)) + if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA)) keylen_flag |= AES_KEYLEN_128; - if (crypt_s390_func_available(KM_AES_192_ENCRYPT)) + if (crypt_s390_func_available(KM_AES_192_ENCRYPT, CRYPT_S390_MSA)) keylen_flag |= AES_KEYLEN_192; - if (crypt_s390_func_available(KM_AES_256_ENCRYPT)) + if (crypt_s390_func_available(KM_AES_256_ENCRYPT, CRYPT_S390_MSA)) keylen_flag |= AES_KEYLEN_256; if (!keylen_flag) @@ -535,9 +876,40 @@ static int __init aes_s390_init(void) if (ret) goto cbc_aes_err; + if (crypt_s390_func_available(KM_XTS_128_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KM_XTS_256_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ret = crypto_register_alg(&xts_aes_alg); + if (ret) + goto xts_aes_err; + } + + if (crypt_s390_func_available(KMCTR_AES_128_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_AES_192_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_AES_256_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ctrblk = (u8 *) __get_free_page(GFP_KERNEL); + if (!ctrblk) { + ret = -ENOMEM; + goto ctr_aes_err; + } + ret = crypto_register_alg(&ctr_aes_alg); + if (ret) { + free_page((unsigned long) ctrblk); + goto ctr_aes_err; + } + } + out: return ret; +ctr_aes_err: + crypto_unregister_alg(&xts_aes_alg); +xts_aes_err: + crypto_unregister_alg(&cbc_aes_alg); cbc_aes_err: crypto_unregister_alg(&ecb_aes_alg); ecb_aes_err: @@ -548,6 +920,9 @@ aes_err: static void __exit aes_s390_fini(void) { + crypto_unregister_alg(&ctr_aes_alg); + free_page((unsigned long) ctrblk); + crypto_unregister_alg(&xts_aes_alg); crypto_unregister_alg(&cbc_aes_alg); crypto_unregister_alg(&ecb_aes_alg); crypto_unregister_alg(&aes_alg); diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index 7ee9a1b4ad9..49676771bd6 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -24,13 +24,18 @@ #define CRYPT_S390_PRIORITY 300 #define CRYPT_S390_COMPOSITE_PRIORITY 400 +#define CRYPT_S390_MSA 0x1 +#define CRYPT_S390_MSA3 0x2 +#define CRYPT_S390_MSA4 0x4 + /* s390 cryptographic operations */ enum crypt_s390_operations { CRYPT_S390_KM = 0x0100, CRYPT_S390_KMC = 0x0200, CRYPT_S390_KIMD = 0x0300, CRYPT_S390_KLMD = 0x0400, - CRYPT_S390_KMAC = 0x0500 + CRYPT_S390_KMAC = 0x0500, + CRYPT_S390_KMCTR = 0x0600 }; /* @@ -51,6 +56,10 @@ enum crypt_s390_km_func { KM_AES_192_DECRYPT = CRYPT_S390_KM | 0x13 | 0x80, KM_AES_256_ENCRYPT = CRYPT_S390_KM | 0x14, KM_AES_256_DECRYPT = CRYPT_S390_KM | 0x14 | 0x80, + KM_XTS_128_ENCRYPT = CRYPT_S390_KM | 0x32, + KM_XTS_128_DECRYPT = CRYPT_S390_KM | 0x32 | 0x80, + KM_XTS_256_ENCRYPT = CRYPT_S390_KM | 0x34, + KM_XTS_256_DECRYPT = CRYPT_S390_KM | 0x34 | 0x80, }; /* @@ -75,6 +84,26 @@ enum crypt_s390_kmc_func { }; /* + * function codes for KMCTR (CIPHER MESSAGE WITH COUNTER) + * instruction + */ +enum crypt_s390_kmctr_func { + KMCTR_QUERY = CRYPT_S390_KMCTR | 0x0, + KMCTR_DEA_ENCRYPT = CRYPT_S390_KMCTR | 0x1, + KMCTR_DEA_DECRYPT = CRYPT_S390_KMCTR | 0x1 | 0x80, + KMCTR_TDEA_128_ENCRYPT = CRYPT_S390_KMCTR | 0x2, + KMCTR_TDEA_128_DECRYPT = CRYPT_S390_KMCTR | 0x2 | 0x80, + KMCTR_TDEA_192_ENCRYPT = CRYPT_S390_KMCTR | 0x3, + KMCTR_TDEA_192_DECRYPT = CRYPT_S390_KMCTR | 0x3 | 0x80, + KMCTR_AES_128_ENCRYPT = CRYPT_S390_KMCTR | 0x12, + KMCTR_AES_128_DECRYPT = CRYPT_S390_KMCTR | 0x12 | 0x80, + KMCTR_AES_192_ENCRYPT = CRYPT_S390_KMCTR | 0x13, + KMCTR_AES_192_DECRYPT = CRYPT_S390_KMCTR | 0x13 | 0x80, + KMCTR_AES_256_ENCRYPT = CRYPT_S390_KMCTR | 0x14, + KMCTR_AES_256_DECRYPT = CRYPT_S390_KMCTR | 0x14 | 0x80, +}; + +/* * function codes for KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) * instruction */ @@ -83,6 +112,7 @@ enum crypt_s390_kimd_func { KIMD_SHA_1 = CRYPT_S390_KIMD | 1, KIMD_SHA_256 = CRYPT_S390_KIMD | 2, KIMD_SHA_512 = CRYPT_S390_KIMD | 3, + KIMD_GHASH = CRYPT_S390_KIMD | 65, }; /* @@ -284,6 +314,45 @@ static inline int crypt_s390_kmac(long func, void *param, } /** + * crypt_s390_kmctr: + * @func: the function code passed to KMCTR; see crypt_s390_kmctr_func + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @src: address of source memory area + * @src_len: length of src operand in bytes + * @counter: address of counter value + * + * Executes the KMCTR (CIPHER MESSAGE WITH COUNTER) operation of the CPU. + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for encryption/decryption funcs + */ +static inline int crypt_s390_kmctr(long func, void *param, u8 *dest, + const u8 *src, long src_len, u8 *counter) +{ + register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; + register long __src_len asm("3") = src_len; + register u8 *__dest asm("4") = dest; + register u8 *__ctr asm("6") = counter; + int ret = -1; + + asm volatile( + "0: .insn rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest), + "+a" (__ctr) + : "d" (__func), "a" (__param) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; +} + +/** * crypt_s390_func_available: * @func: the function code of the specific function; 0 if op in general * @@ -291,13 +360,17 @@ static inline int crypt_s390_kmac(long func, void *param, * * Returns 1 if func available; 0 if func or op in general not available */ -static inline int crypt_s390_func_available(int func) +static inline int crypt_s390_func_available(int func, + unsigned int facility_mask) { unsigned char status[16]; int ret; - /* check if CPACF facility (bit 17) is available */ - if (!test_facility(17)) + if (facility_mask & CRYPT_S390_MSA && !test_facility(17)) + return 0; + if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76)) + return 0; + if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77)) return 0; switch (func & CRYPT_S390_OP_MASK) { @@ -316,6 +389,10 @@ static inline int crypt_s390_func_available(int func) case CRYPT_S390_KMAC: ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0); break; + case CRYPT_S390_KMCTR: + ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0, + NULL); + break; default: return 0; } @@ -326,4 +403,31 @@ static inline int crypt_s390_func_available(int func) return (status[func >> 3] & (0x80 >> (func & 7))) != 0; } +/** + * crypt_s390_pcc: + * @func: the function code passed to KM; see crypt_s390_km_func + * @param: address of parameter block; see POP for details on each func + * + * Executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION) operation of the CPU. + * + * Returns -1 for failure, 0 for success. + */ +static inline int crypt_s390_pcc(long func, void *param) +{ + register long __func asm("0") = func & 0x7f; /* encrypt or decrypt */ + register void *__param asm("1") = param; + int ret = -1; + + asm volatile( + "0: .insn rre,0xb92c0000,0,0 \n" /* PCC opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "+d" (ret) + : "d" (__func), "a" (__param) : "cc", "memory"); + return ret; +} + + #endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */ diff --git a/arch/s390/crypto/des_check_key.c b/arch/s390/crypto/des_check_key.c deleted file mode 100644 index 5706af26644..00000000000 --- a/arch/s390/crypto/des_check_key.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Cryptographic API. - * - * Function for checking keys for the DES and Tripple DES Encryption - * algorithms. - * - * Originally released as descore by Dana L. How <how@isl.stanford.edu>. - * Modified by Raimar Falke <rf13@inf.tu-dresden.de> for the Linux-Kernel. - * Derived from Cryptoapi and Nettle implementations, adapted for in-place - * scatterlist interface. Changed LGPL to GPL per section 3 of the LGPL. - * - * s390 Version: - * Copyright IBM Corp. 2003 - * Author(s): Thomas Spatzier - * Jan Glauber (jan.glauber@de.ibm.com) - * - * Derived from "crypto/des.c" - * Copyright (c) 1992 Dana L. How. - * Copyright (c) Raimar Falke <rf13@inf.tu-dresden.de> - * Copyright (c) Gisle Sflensminde <gisle@ii.uib.no> - * Copyright (C) 2001 Niels Mvller. - * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/crypto.h> -#include "crypto_des.h" - -#define ROR(d,c,o) ((d) = (d) >> (c) | (d) << (o)) - -static const u8 parity[] = { - 8,1,0,8,0,8,8,0,0,8,8,0,8,0,2,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,3, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 4,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,5,0,8,0,8,8,0,0,8,8,0,8,0,6,8, -}; - -/* - * RFC2451: Weak key checks SHOULD be performed. - */ -int -crypto_des_check_key(const u8 *key, unsigned int keylen, u32 *flags) -{ - u32 n, w; - - n = parity[key[0]]; n <<= 4; - n |= parity[key[1]]; n <<= 4; - n |= parity[key[2]]; n <<= 4; - n |= parity[key[3]]; n <<= 4; - n |= parity[key[4]]; n <<= 4; - n |= parity[key[5]]; n <<= 4; - n |= parity[key[6]]; n <<= 4; - n |= parity[key[7]]; - w = 0x88888888L; - - if ((*flags & CRYPTO_TFM_REQ_WEAK_KEY) - && !((n - (w >> 3)) & w)) { /* 1 in 10^10 keys passes this test */ - if (n < 0x41415151) { - if (n < 0x31312121) { - if (n < 0x14141515) { - /* 01 01 01 01 01 01 01 01 */ - if (n == 0x11111111) goto weak; - /* 01 1F 01 1F 01 0E 01 0E */ - if (n == 0x13131212) goto weak; - } else { - /* 01 E0 01 E0 01 F1 01 F1 */ - if (n == 0x14141515) goto weak; - /* 01 FE 01 FE 01 FE 01 FE */ - if (n == 0x16161616) goto weak; - } - } else { - if (n < 0x34342525) { - /* 1F 01 1F 01 0E 01 0E 01 */ - if (n == 0x31312121) goto weak; - /* 1F 1F 1F 1F 0E 0E 0E 0E (?) */ - if (n == 0x33332222) goto weak; - } else { - /* 1F E0 1F E0 0E F1 0E F1 */ - if (n == 0x34342525) goto weak; - /* 1F FE 1F FE 0E FE 0E FE */ - if (n == 0x36362626) goto weak; - } - } - } else { - if (n < 0x61616161) { - if (n < 0x44445555) { - /* E0 01 E0 01 F1 01 F1 01 */ - if (n == 0x41415151) goto weak; - /* E0 1F E0 1F F1 0E F1 0E */ - if (n == 0x43435252) goto weak; - } else { - /* E0 E0 E0 E0 F1 F1 F1 F1 (?) */ - if (n == 0x44445555) goto weak; - /* E0 FE E0 FE F1 FE F1 FE */ - if (n == 0x46465656) goto weak; - } - } else { - if (n < 0x64646565) { - /* FE 01 FE 01 FE 01 FE 01 */ - if (n == 0x61616161) goto weak; - /* FE 1F FE 1F FE 0E FE 0E */ - if (n == 0x63636262) goto weak; - } else { - /* FE E0 FE E0 FE F1 FE F1 */ - if (n == 0x64646565) goto weak; - /* FE FE FE FE FE FE FE FE */ - if (n == 0x66666666) goto weak; - } - } - } - } - return 0; -weak: - *flags |= CRYPTO_TFM_RES_WEAK_KEY; - return -EINVAL; -} - -EXPORT_SYMBOL(crypto_des_check_key); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Key Check function for DES & DES3 Cipher Algorithms"); diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index cc542011839..a52bfd124d8 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -3,7 +3,7 @@ * * s390 implementation of the DES Cipher Algorithm. * - * Copyright IBM Corp. 2003,2007 + * Copyright IBM Corp. 2003,2011 * Author(s): Thomas Spatzier * Jan Glauber (jan.glauber@de.ibm.com) * @@ -22,22 +22,19 @@ #include "crypt_s390.h" -#define DES3_192_KEY_SIZE (3 * DES_KEY_SIZE) +#define DES3_KEY_SIZE (3 * DES_KEY_SIZE) -struct crypt_s390_des_ctx { - u8 iv[DES_BLOCK_SIZE]; - u8 key[DES_KEY_SIZE]; -}; +static u8 *ctrblk; -struct crypt_s390_des3_192_ctx { +struct s390_des_ctx { u8 iv[DES_BLOCK_SIZE]; - u8 key[DES3_192_KEY_SIZE]; + u8 key[DES3_KEY_SIZE]; }; static int des_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) + unsigned int key_len) { - struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); u32 *flags = &tfm->crt_flags; u32 tmp[DES_EXPKEY_WORDS]; @@ -47,22 +44,22 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key, return -EINVAL; } - memcpy(dctx->key, key, keylen); + memcpy(ctx->key, key, key_len); return 0; } static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_DEA_ENCRYPT, dctx->key, out, in, DES_BLOCK_SIZE); + crypt_s390_km(KM_DEA_ENCRYPT, ctx->key, out, in, DES_BLOCK_SIZE); } static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_DEA_DECRYPT, dctx->key, out, in, DES_BLOCK_SIZE); + crypt_s390_km(KM_DEA_DECRYPT, ctx->key, out, in, DES_BLOCK_SIZE); } static struct crypto_alg des_alg = { @@ -71,7 +68,7 @@ static struct crypto_alg des_alg = { .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(des_alg.cra_list), .cra_u = { @@ -86,7 +83,7 @@ static struct crypto_alg des_alg = { }; static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, - void *param, struct blkcipher_walk *walk) + u8 *key, struct blkcipher_walk *walk) { int ret = blkcipher_walk_virt(desc, walk); unsigned int nbytes; @@ -97,7 +94,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_km(func, param, out, in, n); + ret = crypt_s390_km(func, key, out, in, n); BUG_ON((ret < 0) || (ret != n)); nbytes &= DES_BLOCK_SIZE - 1; @@ -108,7 +105,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, } static int cbc_desall_crypt(struct blkcipher_desc *desc, long func, - void *param, struct blkcipher_walk *walk) + u8 *iv, struct blkcipher_walk *walk) { int ret = blkcipher_walk_virt(desc, walk); unsigned int nbytes = walk->nbytes; @@ -116,20 +113,20 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, long func, if (!nbytes) goto out; - memcpy(param, walk->iv, DES_BLOCK_SIZE); + memcpy(iv, walk->iv, DES_BLOCK_SIZE); do { /* only use complete blocks */ unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1); u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_kmc(func, param, out, in, n); + ret = crypt_s390_kmc(func, iv, out, in, n); BUG_ON((ret < 0) || (ret != n)); nbytes &= DES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); } while ((nbytes = walk->nbytes)); - memcpy(walk->iv, param, DES_BLOCK_SIZE); + memcpy(walk->iv, iv, DES_BLOCK_SIZE); out: return ret; @@ -139,22 +136,22 @@ static int ecb_des_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, sctx->key, &walk); + return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, ctx->key, &walk); } static int ecb_des_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_DEA_DECRYPT, sctx->key, &walk); + return ecb_desall_crypt(desc, KM_DEA_DECRYPT, ctx->key, &walk); } static struct crypto_alg ecb_des_alg = { @@ -163,7 +160,7 @@ static struct crypto_alg ecb_des_alg = { .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(ecb_des_alg.cra_list), @@ -182,22 +179,22 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, sctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, ctx->iv, &walk); } static int cbc_des_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, sctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, ctx->iv, &walk); } static struct crypto_alg cbc_des_alg = { @@ -206,7 +203,7 @@ static struct crypto_alg cbc_des_alg = { .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(cbc_des_alg.cra_list), @@ -235,10 +232,10 @@ static struct crypto_alg cbc_des_alg = { * property. * */ -static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) +static int des3_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len) { - struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); u32 *flags = &tfm->crt_flags; if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && @@ -248,141 +245,276 @@ static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key, *flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } - memcpy(dctx->key, key, keylen); + memcpy(ctx->key, key, key_len); return 0; } -static void des3_192_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_TDEA_192_ENCRYPT, dctx->key, dst, (void*)src, - DES_BLOCK_SIZE); + crypt_s390_km(KM_TDEA_192_ENCRYPT, ctx->key, dst, src, DES_BLOCK_SIZE); } -static void des3_192_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_TDEA_192_DECRYPT, dctx->key, dst, (void*)src, - DES_BLOCK_SIZE); + crypt_s390_km(KM_TDEA_192_DECRYPT, ctx->key, dst, src, DES_BLOCK_SIZE); } -static struct crypto_alg des3_192_alg = { +static struct crypto_alg des3_alg = { .cra_name = "des3_ede", .cra_driver_name = "des3_ede-s390", .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(des3_192_alg.cra_list), + .cra_list = LIST_HEAD_INIT(des3_alg.cra_list), .cra_u = { .cipher = { - .cia_min_keysize = DES3_192_KEY_SIZE, - .cia_max_keysize = DES3_192_KEY_SIZE, - .cia_setkey = des3_192_setkey, - .cia_encrypt = des3_192_encrypt, - .cia_decrypt = des3_192_decrypt, + .cia_min_keysize = DES3_KEY_SIZE, + .cia_max_keysize = DES3_KEY_SIZE, + .cia_setkey = des3_setkey, + .cia_encrypt = des3_encrypt, + .cia_decrypt = des3_decrypt, } } }; -static int ecb_des3_192_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, sctx->key, &walk); + return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, ctx->key, &walk); } -static int ecb_des3_192_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, sctx->key, &walk); + return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, ctx->key, &walk); } -static struct crypto_alg ecb_des3_192_alg = { +static struct crypto_alg ecb_des3_alg = { .cra_name = "ecb(des3_ede)", .cra_driver_name = "ecb-des3_ede-s390", .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT( - ecb_des3_192_alg.cra_list), + ecb_des3_alg.cra_list), .cra_u = { .blkcipher = { - .min_keysize = DES3_192_KEY_SIZE, - .max_keysize = DES3_192_KEY_SIZE, - .setkey = des3_192_setkey, - .encrypt = ecb_des3_192_encrypt, - .decrypt = ecb_des3_192_decrypt, + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .setkey = des3_setkey, + .encrypt = ecb_des3_encrypt, + .decrypt = ecb_des3_decrypt, } } }; -static int cbc_des3_192_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, sctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, ctx->iv, &walk); } -static int cbc_des3_192_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, sctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, ctx->iv, &walk); } -static struct crypto_alg cbc_des3_192_alg = { +static struct crypto_alg cbc_des3_alg = { .cra_name = "cbc(des3_ede)", .cra_driver_name = "cbc-des3_ede-s390", .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT( - cbc_des3_192_alg.cra_list), + cbc_des3_alg.cra_list), .cra_u = { .blkcipher = { - .min_keysize = DES3_192_KEY_SIZE, - .max_keysize = DES3_192_KEY_SIZE, + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, .ivsize = DES_BLOCK_SIZE, - .setkey = des3_192_setkey, - .encrypt = cbc_des3_192_encrypt, - .decrypt = cbc_des3_192_decrypt, + .setkey = des3_setkey, + .encrypt = cbc_des3_encrypt, + .decrypt = cbc_des3_decrypt, } } }; -static int des_s390_init(void) +static int ctr_desall_crypt(struct blkcipher_desc *desc, long func, + struct s390_des_ctx *ctx, struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE); + unsigned int i, n, nbytes; + u8 buf[DES_BLOCK_SIZE]; + u8 *out, *in; + + memcpy(ctrblk, walk->iv, DES_BLOCK_SIZE); + while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + while (nbytes >= DES_BLOCK_SIZE) { + /* align to block size, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : + nbytes & ~(DES_BLOCK_SIZE - 1); + for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) { + memcpy(ctrblk + i, ctrblk + i - DES_BLOCK_SIZE, + DES_BLOCK_SIZE); + crypto_inc(ctrblk + i, DES_BLOCK_SIZE); + } + ret = crypt_s390_kmctr(func, ctx->key, out, in, n, ctrblk); + BUG_ON((ret < 0) || (ret != n)); + if (n > DES_BLOCK_SIZE) + memcpy(ctrblk, ctrblk + n - DES_BLOCK_SIZE, + DES_BLOCK_SIZE); + crypto_inc(ctrblk, DES_BLOCK_SIZE); + out += n; + in += n; + nbytes -= n; + } + ret = blkcipher_walk_done(desc, walk, nbytes); + } + + /* final block may be < DES_BLOCK_SIZE, copy only nbytes */ + if (nbytes) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + ret = crypt_s390_kmctr(func, ctx->key, buf, in, + DES_BLOCK_SIZE, ctrblk); + BUG_ON(ret < 0 || ret != DES_BLOCK_SIZE); + memcpy(out, buf, nbytes); + crypto_inc(ctrblk, DES_BLOCK_SIZE); + ret = blkcipher_walk_done(desc, walk, 0); + } + memcpy(walk->iv, ctrblk, DES_BLOCK_SIZE); + return ret; +} + +static int ctr_des_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_DEA_ENCRYPT, ctx, &walk); +} + +static int ctr_des_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_DEA_DECRYPT, ctx, &walk); +} + +static struct crypto_alg ctr_des_alg = { + .cra_name = "ctr(des)", + .cra_driver_name = "ctr-des-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ctr_des_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des_setkey, + .encrypt = ctr_des_encrypt, + .decrypt = ctr_des_decrypt, + } + } +}; + +static int ctr_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_TDEA_192_ENCRYPT, ctx, &walk); +} + +static int ctr_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_TDEA_192_DECRYPT, ctx, &walk); +} + +static struct crypto_alg ctr_des3_alg = { + .cra_name = "ctr(des3_ede)", + .cra_driver_name = "ctr-des3_ede-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ctr_des3_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des3_setkey, + .encrypt = ctr_des3_encrypt, + .decrypt = ctr_des3_decrypt, + } + } +}; + +static int __init des_s390_init(void) { int ret; - if (!crypt_s390_func_available(KM_DEA_ENCRYPT) || - !crypt_s390_func_available(KM_TDEA_192_ENCRYPT)) + if (!crypt_s390_func_available(KM_DEA_ENCRYPT, CRYPT_S390_MSA) || + !crypt_s390_func_available(KM_TDEA_192_ENCRYPT, CRYPT_S390_MSA)) return -EOPNOTSUPP; ret = crypto_register_alg(&des_alg); @@ -394,23 +526,46 @@ static int des_s390_init(void) ret = crypto_register_alg(&cbc_des_alg); if (ret) goto cbc_des_err; - ret = crypto_register_alg(&des3_192_alg); + ret = crypto_register_alg(&des3_alg); if (ret) - goto des3_192_err; - ret = crypto_register_alg(&ecb_des3_192_alg); + goto des3_err; + ret = crypto_register_alg(&ecb_des3_alg); if (ret) - goto ecb_des3_192_err; - ret = crypto_register_alg(&cbc_des3_192_alg); + goto ecb_des3_err; + ret = crypto_register_alg(&cbc_des3_alg); if (ret) - goto cbc_des3_192_err; + goto cbc_des3_err; + + if (crypt_s390_func_available(KMCTR_DEA_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_TDEA_192_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ret = crypto_register_alg(&ctr_des_alg); + if (ret) + goto ctr_des_err; + ret = crypto_register_alg(&ctr_des3_alg); + if (ret) + goto ctr_des3_err; + ctrblk = (u8 *) __get_free_page(GFP_KERNEL); + if (!ctrblk) { + ret = -ENOMEM; + goto ctr_mem_err; + } + } out: return ret; -cbc_des3_192_err: - crypto_unregister_alg(&ecb_des3_192_alg); -ecb_des3_192_err: - crypto_unregister_alg(&des3_192_alg); -des3_192_err: +ctr_mem_err: + crypto_unregister_alg(&ctr_des3_alg); +ctr_des3_err: + crypto_unregister_alg(&ctr_des_alg); +ctr_des_err: + crypto_unregister_alg(&cbc_des3_alg); +cbc_des3_err: + crypto_unregister_alg(&ecb_des3_alg); +ecb_des3_err: + crypto_unregister_alg(&des3_alg); +des3_err: crypto_unregister_alg(&cbc_des_alg); cbc_des_err: crypto_unregister_alg(&ecb_des_alg); @@ -422,9 +577,14 @@ des_err: static void __exit des_s390_exit(void) { - crypto_unregister_alg(&cbc_des3_192_alg); - crypto_unregister_alg(&ecb_des3_192_alg); - crypto_unregister_alg(&des3_192_alg); + if (ctrblk) { + crypto_unregister_alg(&ctr_des_alg); + crypto_unregister_alg(&ctr_des3_alg); + free_page((unsigned long) ctrblk); + } + crypto_unregister_alg(&cbc_des3_alg); + crypto_unregister_alg(&ecb_des3_alg); + crypto_unregister_alg(&des3_alg); crypto_unregister_alg(&cbc_des_alg); crypto_unregister_alg(&ecb_des_alg); crypto_unregister_alg(&des_alg); diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c new file mode 100644 index 00000000000..b1bd170f24b --- /dev/null +++ b/arch/s390/crypto/ghash_s390.c @@ -0,0 +1,162 @@ +/* + * Cryptographic API. + * + * s390 implementation of the GHASH algorithm for GCM (Galois/Counter Mode). + * + * Copyright IBM Corp. 2011 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#include <crypto/internal/hash.h> +#include <linux/module.h> + +#include "crypt_s390.h" + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +struct ghash_ctx { + u8 icv[16]; + u8 key[16]; +}; + +struct ghash_desc_ctx { + u8 buffer[GHASH_BLOCK_SIZE]; + u32 bytes; +}; + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memset(dctx, 0, sizeof(*dctx)); + + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->key, key, GHASH_BLOCK_SIZE); + memset(ctx->icv, 0, GHASH_BLOCK_SIZE); + + return 0; +} + +static int ghash_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + unsigned int n; + u8 *buf = dctx->buffer; + int ret; + + if (dctx->bytes) { + u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); + + n = min(srclen, dctx->bytes); + dctx->bytes -= n; + srclen -= n; + + memcpy(pos, src, n); + src += n; + + if (!dctx->bytes) { + ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, + GHASH_BLOCK_SIZE); + BUG_ON(ret != GHASH_BLOCK_SIZE); + } + } + + n = srclen & ~(GHASH_BLOCK_SIZE - 1); + if (n) { + ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n); + BUG_ON(ret != n); + src += n; + srclen -= n; + } + + if (srclen) { + dctx->bytes = GHASH_BLOCK_SIZE - srclen; + memcpy(buf, src, srclen); + } + + return 0; +} + +static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +{ + u8 *buf = dctx->buffer; + int ret; + + if (dctx->bytes) { + u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); + + memset(pos, 0, dctx->bytes); + + ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE); + BUG_ON(ret != GHASH_BLOCK_SIZE); + } + + dctx->bytes = 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + + ghash_flush(ctx, dctx); + memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE); + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), + }, +}; + +static int __init ghash_mod_init(void) +{ + if (!crypt_s390_func_available(KIMD_GHASH, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) + return -EOPNOTSUPP; + + return crypto_register_shash(&ghash_alg); +} + +static void __exit ghash_mod_exit(void) +{ + crypto_unregister_shash(&ghash_alg); +} + +module_init(ghash_mod_init); +module_exit(ghash_mod_exit); + +MODULE_ALIAS("ghash"); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation"); diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 8b16c479585..0808fbf0f7d 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -166,7 +166,7 @@ static int __init prng_init(void) int ret; /* check if the CPU has a PRNG */ - if (!crypt_s390_func_available(KMC_PRNG)) + if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA)) return -EOPNOTSUPP; if (prng_chunk_size < 8) diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index f6de7826c97..e9868c6e0a0 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -90,7 +90,7 @@ static struct shash_alg alg = { static int __init sha1_s390_init(void) { - if (!crypt_s390_func_available(KIMD_SHA_1)) + if (!crypt_s390_func_available(KIMD_SHA_1, CRYPT_S390_MSA)) return -EOPNOTSUPP; return crypto_register_shash(&alg); } diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 61a7db37212..5ed8d64fc2e 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -86,7 +86,7 @@ static struct shash_alg alg = { static int sha256_s390_init(void) { - if (!crypt_s390_func_available(KIMD_SHA_256)) + if (!crypt_s390_func_available(KIMD_SHA_256, CRYPT_S390_MSA)) return -EOPNOTSUPP; return crypto_register_shash(&alg); diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 4bf73d0dc52..32a81383b69 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -132,7 +132,7 @@ static int __init init(void) { int ret; - if (!crypt_s390_func_available(KIMD_SHA_512)) + if (!crypt_s390_func_available(KIMD_SHA_512, CRYPT_S390_MSA)) return -EOPNOTSUPP; if ((ret = crypto_register_shash(&sha512_alg)) < 0) goto out; diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c index 14726eef1ce..f0907995b4c 100644 --- a/arch/sh/kernel/cpu/sh4/sq.c +++ b/arch/sh/kernel/cpu/sh4/sq.c @@ -20,6 +20,7 @@ #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/io.h> +#include <linux/prefetch.h> #include <asm/page.h> #include <asm/cacheflush.h> #include <cpu/sq.h> diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index e560d102215..63a027c9ada 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -25,6 +25,10 @@ config SPARC select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG select HAVE_ARCH_JUMP_LABEL + select HAVE_GENERIC_HARDIRQS + select GENERIC_HARDIRQS_NO_DEPRECATED + select GENERIC_IRQ_SHOW + select USE_GENERIC_SMP_HELPERS if SMP config SPARC32 def_bool !64BIT @@ -43,15 +47,12 @@ config SPARC64 select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_SYSCALL_TRACEPOINTS - select USE_GENERIC_SMP_HELPERS if SMP select RTC_DRV_CMOS select RTC_DRV_BQ4802 select RTC_DRV_SUN4V select RTC_DRV_STARFIRE select HAVE_PERF_EVENTS select PERF_USE_VMALLOC - select HAVE_GENERIC_HARDIRQS - select GENERIC_IRQ_SHOW select IRQ_PREFLOW_FASTEOI config ARCH_DEFCONFIG diff --git a/arch/sparc/include/asm/cpudata_32.h b/arch/sparc/include/asm/cpudata_32.h index 31d48a0e32c..a4c5a938b93 100644 --- a/arch/sparc/include/asm/cpudata_32.h +++ b/arch/sparc/include/asm/cpudata_32.h @@ -16,6 +16,10 @@ typedef struct { unsigned long clock_tick; unsigned int multiplier; unsigned int counter; +#ifdef CONFIG_SMP + unsigned int irq_resched_count; + unsigned int irq_call_count; +#endif int prom_node; int mid; int next; @@ -23,5 +27,6 @@ typedef struct { DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) +#define local_cpu_data() __get_cpu_var(__cpu_data) #endif /* _SPARC_CPUDATA_H */ diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h index 86666f70322..482c79e2a41 100644 --- a/arch/sparc/include/asm/floppy_32.h +++ b/arch/sparc/include/asm/floppy_32.h @@ -281,28 +281,27 @@ static inline void sun_fd_enable_dma(void) pdma_areasize = pdma_size; } -/* Our low-level entry point in arch/sparc/kernel/entry.S */ -extern int sparc_floppy_request_irq(int irq, unsigned long flags, - irq_handler_t irq_handler); +extern int sparc_floppy_request_irq(unsigned int irq, + irq_handler_t irq_handler); static int sun_fd_request_irq(void) { static int once = 0; - int error; - if(!once) { + if (!once) { once = 1; - error = sparc_floppy_request_irq(FLOPPY_IRQ, - IRQF_DISABLED, - floppy_interrupt); - return ((error == 0) ? 0 : -1); - } else return 0; + return sparc_floppy_request_irq(FLOPPY_IRQ, floppy_interrupt); + } else { + return 0; + } } static struct linux_prom_registers fd_regs[2]; static int sun_floppy_init(void) { + struct platform_device *op; + struct device_node *dp; char state[128]; phandle tnode, fd_node; int num_regs; @@ -310,7 +309,6 @@ static int sun_floppy_init(void) use_virtual_dma = 1; - FLOPPY_IRQ = 11; /* Forget it if we aren't on a machine that could possibly * ever have a floppy drive. */ @@ -349,6 +347,26 @@ static int sun_floppy_init(void) sun_fdc = (struct sun_flpy_controller *) of_ioremap(&r, 0, fd_regs[0].reg_size, "floppy"); + /* Look up irq in platform_device. + * We try "SUNW,fdtwo" and "fd" + */ + for_each_node_by_name(dp, "SUNW,fdtwo") { + op = of_find_device_by_node(dp); + if (op) + break; + } + if (!op) { + for_each_node_by_name(dp, "fd") { + op = of_find_device_by_node(dp); + if (op) + break; + } + } + if (!op) + goto no_sun_fdc; + + FLOPPY_IRQ = op->archdata.irqs[0]; + /* Last minute sanity check... */ if(sun_fdc->status_82072 == 0xff) { sun_fdc = NULL; diff --git a/arch/sparc/include/asm/io.h b/arch/sparc/include/asm/io.h index a34b2994937..f6902cf3cbe 100644 --- a/arch/sparc/include/asm/io.h +++ b/arch/sparc/include/asm/io.h @@ -5,4 +5,17 @@ #else #include <asm/io_32.h> #endif + +/* + * Defines used for both SPARC32 and SPARC64 + */ + +/* Big endian versions of memory read/write routines */ +#define readb_be(__addr) __raw_readb(__addr) +#define readw_be(__addr) __raw_readw(__addr) +#define readl_be(__addr) __raw_readl(__addr) +#define writeb_be(__b, __addr) __raw_writeb(__b, __addr) +#define writel_be(__w, __addr) __raw_writel(__w, __addr) +#define writew_be(__l, __addr) __raw_writew(__l, __addr) + #endif diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h index eced3e3ebd3..2ae3acaeb1b 100644 --- a/arch/sparc/include/asm/irq_32.h +++ b/arch/sparc/include/asm/irq_32.h @@ -6,7 +6,11 @@ #ifndef _SPARC_IRQ_H #define _SPARC_IRQ_H -#define NR_IRQS 16 +/* Allocated number of logical irq numbers. + * sun4d boxes (ss2000e) should be OK with ~32. + * Be on the safe side and make room for 64 + */ +#define NR_IRQS 64 #include <linux/interrupt.h> diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h index c04f96fb753..6bdaf1e43d2 100644 --- a/arch/sparc/include/asm/leon.h +++ b/arch/sparc/include/asm/leon.h @@ -52,29 +52,6 @@ #define LEON_DIAGF_VALID 0x2000 #define LEON_DIAGF_VALID_SHIFT 13 -/* - * Interrupt Sources - * - * The interrupt source numbers directly map to the trap type and to - * the bits used in the Interrupt Clear, Interrupt Force, Interrupt Mask, - * and the Interrupt Pending Registers. - */ -#define LEON_INTERRUPT_CORRECTABLE_MEMORY_ERROR 1 -#define LEON_INTERRUPT_UART_1_RX_TX 2 -#define LEON_INTERRUPT_UART_0_RX_TX 3 -#define LEON_INTERRUPT_EXTERNAL_0 4 -#define LEON_INTERRUPT_EXTERNAL_1 5 -#define LEON_INTERRUPT_EXTERNAL_2 6 -#define LEON_INTERRUPT_EXTERNAL_3 7 -#define LEON_INTERRUPT_TIMER1 8 -#define LEON_INTERRUPT_TIMER2 9 -#define LEON_INTERRUPT_EMPTY1 10 -#define LEON_INTERRUPT_EMPTY2 11 -#define LEON_INTERRUPT_OPEN_ETH 12 -#define LEON_INTERRUPT_EMPTY4 13 -#define LEON_INTERRUPT_EMPTY5 14 -#define LEON_INTERRUPT_EMPTY6 15 - /* irq masks */ #define LEON_HARD_INT(x) (1 << (x)) /* irq 0-15 */ #define LEON_IRQMASK_R 0x0000fffe /* bit 15- 1 of lregs.irqmask */ @@ -183,7 +160,6 @@ static inline void leon_srmmu_enabletlb(void) /* macro access for leon_readnobuffer_reg() */ #define LEON_BYPASSCACHE_LOAD_VA(x) leon_readnobuffer_reg((unsigned long)(x)) -extern void sparc_leon_eirq_register(int eirq); extern void leon_init(void); extern void leon_switch_mm(void); extern void leon_init_IRQ(void); @@ -239,8 +215,8 @@ static inline int sparc_leon3_cpuid(void) #endif /*!__ASSEMBLY__*/ #ifdef CONFIG_SMP -# define LEON3_IRQ_RESCHEDULE 13 -# define LEON3_IRQ_TICKER (leon_percpu_timer_dev[0].irq) +# define LEON3_IRQ_IPI_DEFAULT 13 +# define LEON3_IRQ_TICKER (leon3_ticker_irq) # define LEON3_IRQ_CROSS_CALL 15 #endif @@ -339,9 +315,9 @@ struct leon2_cacheregs { #include <linux/interrupt.h> struct device_node; -extern int sparc_leon_eirq_get(int eirq, int cpu); -extern irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id); -extern void sparc_leon_eirq_register(int eirq); +extern unsigned int leon_build_device_irq(unsigned int real_irq, + irq_flow_handler_t flow_handler, + const char *name, int do_ack); extern void leon_clear_clock_irq(void); extern void leon_load_profile_irq(int cpu, unsigned int limit); extern void leon_init_timers(irq_handler_t counter_fn); @@ -358,6 +334,7 @@ extern void leon3_getCacheRegs(struct leon3_cacheregs *regs); extern int leon_flush_needed(void); extern void leon_switch_mm(void); extern int srmmu_swprobe_trace; +extern int leon3_ticker_irq; #ifdef CONFIG_SMP extern int leon_smp_nrcpus(void); @@ -366,17 +343,19 @@ extern void leon_smp_done(void); extern void leon_boot_cpus(void); extern int leon_boot_one_cpu(int i); void leon_init_smp(void); -extern void cpu_probe(void); extern void cpu_idle(void); extern void init_IRQ(void); extern void cpu_panic(void); extern int __leon_processor_id(void); void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu); +extern irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused); -extern unsigned int real_irq_entry[], smpleon_ticker[]; +extern unsigned int real_irq_entry[]; +extern unsigned int smpleon_ipi[]; extern unsigned int patchme_maybe_smp_msg[]; extern unsigned int t_nmi[], linux_trap_ipi15_leon[]; extern unsigned int linux_trap_ipi15_sun4m[]; +extern int leon_ipi_irq; #endif /* CONFIG_SMP */ diff --git a/arch/sparc/include/asm/pcic.h b/arch/sparc/include/asm/pcic.h index f20ef562b26..7eb5d78f521 100644 --- a/arch/sparc/include/asm/pcic.h +++ b/arch/sparc/include/asm/pcic.h @@ -29,11 +29,17 @@ struct linux_pcic { int pcic_imdim; }; -extern int pcic_probe(void); -/* Erm... MJ redefined pcibios_present() so that it does not work early. */ +#ifdef CONFIG_PCI extern int pcic_present(void); +extern int pcic_probe(void); +extern void pci_time_init(void); extern void sun4m_pci_init_IRQ(void); - +#else +static inline int pcic_present(void) { return 0; } +static inline int pcic_probe(void) { return 0; } +static inline void pci_time_init(void) {} +static inline void sun4m_pci_init_IRQ(void) {} +#endif #endif /* Size of PCI I/O space which we relocate. */ diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 303bd4dc829..5b31a8e8982 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -8,6 +8,8 @@ * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ +#include <linux/const.h> + #ifndef __ASSEMBLY__ #include <asm-generic/4level-fixup.h> @@ -456,9 +458,9 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma, #endif /* !(__ASSEMBLY__) */ -#define VMALLOC_START 0xfe600000 +#define VMALLOC_START _AC(0xfe600000,UL) /* XXX Alter this when I get around to fixing sun4c - Anton */ -#define VMALLOC_END 0xffc00000 +#define VMALLOC_END _AC(0xffc00000,UL) /* We provide our own get_unmapped_area to cope with VA holes for userland */ diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index f8dddb7045b..b77128c8052 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -699,6 +699,9 @@ extern pmd_t swapper_low_pmd_dir[2048]; extern void paging_init(void); extern unsigned long find_ecache_flush_span(unsigned long size); +struct seq_file; +extern void mmu_info(struct seq_file *); + /* These do nothing with the way I have things setup. */ #define mmu_lockarea(vaddr, len) (vaddr) #define mmu_unlockarea(vaddr, len) do { } while(0) diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 2643c62f4ac..64718ba2643 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -11,4 +11,16 @@ # define COMMAND_LINE_SIZE 256 #endif +#ifdef __KERNEL__ + +#ifdef CONFIG_SPARC32 +/* The CPU that was used for booting + * Only sun4d + leon may have boot_cpu_id != 0 + */ +extern unsigned char boot_cpu_id; +extern unsigned char boot_cpu_id4; +#endif + +#endif /* __KERNEL__ */ + #endif /* _SPARC_SETUP_H */ diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index d82d7f4c0a7..093f10843ff 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -50,42 +50,38 @@ void smp_callin(void); void smp_boot_cpus(void); void smp_store_cpu_info(int); +void smp_resched_interrupt(void); +void smp_call_function_single_interrupt(void); +void smp_call_function_interrupt(void); + struct seq_file; void smp_bogo(struct seq_file *); void smp_info(struct seq_file *); BTFIXUPDEF_CALL(void, smp_cross_call, smpfunc_t, cpumask_t, unsigned long, unsigned long, unsigned long, unsigned long) BTFIXUPDEF_CALL(int, __hard_smp_processor_id, void) +BTFIXUPDEF_CALL(void, smp_ipi_resched, int); +BTFIXUPDEF_CALL(void, smp_ipi_single, int); +BTFIXUPDEF_CALL(void, smp_ipi_mask_one, int); BTFIXUPDEF_BLACKBOX(hard_smp_processor_id) BTFIXUPDEF_BLACKBOX(load_current) #define smp_cross_call(func,mask,arg1,arg2,arg3,arg4) BTFIXUP_CALL(smp_cross_call)(func,mask,arg1,arg2,arg3,arg4) -static inline void xc0(smpfunc_t func) { smp_cross_call(func, cpu_online_map, 0, 0, 0, 0); } +static inline void xc0(smpfunc_t func) { smp_cross_call(func, *cpu_online_mask, 0, 0, 0, 0); } static inline void xc1(smpfunc_t func, unsigned long arg1) -{ smp_cross_call(func, cpu_online_map, arg1, 0, 0, 0); } +{ smp_cross_call(func, *cpu_online_mask, arg1, 0, 0, 0); } static inline void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2) -{ smp_cross_call(func, cpu_online_map, arg1, arg2, 0, 0); } +{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, 0, 0); } static inline void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, 0); } +{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, 0); } static inline void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4) -{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, arg4); } - -static inline int smp_call_function(void (*func)(void *info), void *info, int wait) -{ - xc1((smpfunc_t)func, (unsigned long)info); - return 0; -} +{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, arg4); } -static inline int smp_call_function_single(int cpuid, void (*func) (void *info), - void *info, int wait) -{ - smp_cross_call((smpfunc_t)func, cpumask_of_cpu(cpuid), - (unsigned long) info, 0, 0, 0); - return 0; -} +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); static inline int cpu_logical_map(int cpu) { @@ -135,6 +131,11 @@ static inline int hard_smp_processor_id(void) __asm__ __volatile__("lda [%g0] ASI_M_VIKING_TMP1, %0\n\t" "nop; nop" : "=&r" (cpuid)); + - leon + __asm__ __volatile__( "rd %asr17, %0\n\t" + "srl %0, 0x1c, %0\n\t" + "nop\n\t" : + "=&r" (cpuid)); See btfixup.h and btfixupprep.c to understand how a blackbox works. */ __asm__ __volatile__("sethi %%hi(___b_hard_smp_processor_id), %0\n\t" diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index f49e11cd4de..20bca895071 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -49,6 +49,10 @@ extern void cpu_play_dead(void); extern void smp_fetch_global_regs(void); +struct seq_file; +void smp_bogo(struct seq_file *); +void smp_info(struct seq_file *); + #ifdef CONFIG_HOTPLUG_CPU extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index 7f9b9dba38a..5f5b8bf3f50 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -9,6 +9,7 @@ #ifndef __ASSEMBLY__ #include <asm/psr.h> +#include <asm/processor.h> /* for cpu_relax */ #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) diff --git a/arch/sparc/include/asm/system_32.h b/arch/sparc/include/asm/system_32.h index 890036b3689..47a7e862474 100644 --- a/arch/sparc/include/asm/system_32.h +++ b/arch/sparc/include/asm/system_32.h @@ -15,11 +15,6 @@ #include <linux/irqflags.h> -static inline unsigned int probe_irq_mask(unsigned long val) -{ - return 0; -} - /* * Sparc (general) CPU types */ diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h index e3b65d8cf41..3c96d3bb9f1 100644 --- a/arch/sparc/include/asm/system_64.h +++ b/arch/sparc/include/asm/system_64.h @@ -29,10 +29,6 @@ enum sparc_cpu { /* This cannot ever be a sun4c :) That's just history. */ #define ARCH_SUN4C 0 -extern const char *sparc_cpu_type; -extern const char *sparc_fpu_type; -extern const char *sparc_pmu_type; - extern char reboot_command[]; /* These are here in an effort to more fully work around Spitfire Errata diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index 9d897b6db98..c5387ed0add 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -404,8 +404,9 @@ #define __NR_open_by_handle_at 333 #define __NR_clock_adjtime 334 #define __NR_syncfs 335 +#define __NR_sendmmsg 336 -#define NR_syscalls 336 +#define NR_syscalls 337 #ifdef __32bit_syscall_numbers__ /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants, diff --git a/arch/sparc/include/asm/winmacro.h b/arch/sparc/include/asm/winmacro.h index 5b0a06dc3bc..a9be04b0d04 100644 --- a/arch/sparc/include/asm/winmacro.h +++ b/arch/sparc/include/asm/winmacro.h @@ -103,6 +103,7 @@ st %scratch, [%cur_reg + TI_W_SAVED]; #ifdef CONFIG_SMP +/* Results of LOAD_CURRENT() after BTFIXUP for SUN4M, SUN4D & LEON (comments) */ #define LOAD_CURRENT4M(dest_reg, idreg) \ rd %tbr, %idreg; \ sethi %hi(current_set), %dest_reg; \ @@ -118,6 +119,14 @@ or %dest_reg, %lo(C_LABEL(current_set)), %dest_reg; \ ld [%idreg + %dest_reg], %dest_reg; +#define LOAD_CURRENT_LEON(dest_reg, idreg) \ + rd %asr17, %idreg; \ + sethi %hi(current_set), %dest_reg; \ + srl %idreg, 0x1c, %idreg; \ + or %dest_reg, %lo(current_set), %dest_reg; \ + sll %idreg, 0x2, %idreg; \ + ld [%idreg + %dest_reg], %dest_reg; + /* Blackbox - take care with this... - check smp4m and smp4d before changing this. */ #define LOAD_CURRENT(dest_reg, idreg) \ sethi %hi(___b_load_current), %idreg; \ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 99aa4db6e9c..9cff2709a96 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -71,10 +71,6 @@ obj-$(CONFIG_SPARC64) += pcr.o obj-$(CONFIG_SPARC64) += nmi.o obj-$(CONFIG_SPARC64_SMP) += cpumap.o -# sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation -obj-$(CONFIG_SPARC32) += devres.o -devres-y := ../../../kernel/irq/devres.o - obj-y += dma.o obj-$(CONFIG_SPARC32_PCI) += pcic.o diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 7925c54f413..138dbbc8dc8 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -4,6 +4,7 @@ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) */ +#include <linux/seq_file.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -11,7 +12,9 @@ #include <linux/threads.h> #include <asm/spitfire.h> +#include <asm/pgtable.h> #include <asm/oplib.h> +#include <asm/setup.h> #include <asm/page.h> #include <asm/head.h> #include <asm/psr.h> @@ -23,6 +26,9 @@ DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 }; EXPORT_PER_CPU_SYMBOL(__cpu_data); +int ncpus_probed; +unsigned int fsr_storage; + struct cpu_info { int psr_vers; const char *name; @@ -247,13 +253,12 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { * machine type value into consideration too. I will fix this. */ -const char *sparc_cpu_type; -const char *sparc_fpu_type; +static const char *sparc_cpu_type; +static const char *sparc_fpu_type; const char *sparc_pmu_type; -unsigned int fsr_storage; -static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) +static void __init set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) { const struct manufacturer_info *manuf; int i; @@ -313,7 +318,123 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) } #ifdef CONFIG_SPARC32 -void __cpuinit cpu_probe(void) +static int show_cpuinfo(struct seq_file *m, void *__unused) +{ + seq_printf(m, + "cpu\t\t: %s\n" + "fpu\t\t: %s\n" + "promlib\t\t: Version %d Revision %d\n" + "prom\t\t: %d.%d\n" + "type\t\t: %s\n" + "ncpus probed\t: %d\n" + "ncpus active\t: %d\n" +#ifndef CONFIG_SMP + "CPU0Bogo\t: %lu.%02lu\n" + "CPU0ClkTck\t: %ld\n" +#endif + , + sparc_cpu_type, + sparc_fpu_type , + romvec->pv_romvers, + prom_rev, + romvec->pv_printrev >> 16, + romvec->pv_printrev & 0xffff, + &cputypval[0], + ncpus_probed, + num_online_cpus() +#ifndef CONFIG_SMP + , cpu_data(0).udelay_val/(500000/HZ), + (cpu_data(0).udelay_val/(5000/HZ)) % 100, + cpu_data(0).clock_tick +#endif + ); + +#ifdef CONFIG_SMP + smp_bogo(m); +#endif + mmu_info(m); +#ifdef CONFIG_SMP + smp_info(m); +#endif + return 0; +} +#endif /* CONFIG_SPARC32 */ + +#ifdef CONFIG_SPARC64 +unsigned int dcache_parity_tl1_occurred; +unsigned int icache_parity_tl1_occurred; + + +static int show_cpuinfo(struct seq_file *m, void *__unused) +{ + seq_printf(m, + "cpu\t\t: %s\n" + "fpu\t\t: %s\n" + "pmu\t\t: %s\n" + "prom\t\t: %s\n" + "type\t\t: %s\n" + "ncpus probed\t: %d\n" + "ncpus active\t: %d\n" + "D$ parity tl1\t: %u\n" + "I$ parity tl1\t: %u\n" +#ifndef CONFIG_SMP + "Cpu0ClkTck\t: %016lx\n" +#endif + , + sparc_cpu_type, + sparc_fpu_type, + sparc_pmu_type, + prom_version, + ((tlb_type == hypervisor) ? + "sun4v" : + "sun4u"), + ncpus_probed, + num_online_cpus(), + dcache_parity_tl1_occurred, + icache_parity_tl1_occurred +#ifndef CONFIG_SMP + , cpu_data(0).clock_tick +#endif + ); +#ifdef CONFIG_SMP + smp_bogo(m); +#endif + mmu_info(m); +#ifdef CONFIG_SMP + smp_info(m); +#endif + return 0; +} +#endif /* CONFIG_SPARC64 */ + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + /* The pointer we are returning is arbitrary, + * it just has to be non-NULL and not IS_ERR + * in the success case. + */ + return *pos == 0 ? &c_start : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start =c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + +#ifdef CONFIG_SPARC32 +static int __init cpu_type_probe(void) { int psr_impl, psr_vers, fpu_vers; int psr; @@ -332,8 +453,12 @@ void __cpuinit cpu_probe(void) put_psr(psr); set_cpu_and_fpu(psr_impl, psr_vers, fpu_vers); + + return 0; } -#else +#endif /* CONFIG_SPARC32 */ + +#ifdef CONFIG_SPARC64 static void __init sun4v_cpu_probe(void) { switch (sun4v_chip_type) { @@ -374,6 +499,6 @@ static int __init cpu_type_probe(void) } return 0; } +#endif /* CONFIG_SPARC64 */ early_initcall(cpu_type_probe); -#endif diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index 8de64c8126b..d91fd782743 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -202,7 +202,7 @@ static struct cpuinfo_tree *build_cpuinfo_tree(void) new_tree->total_nodes = n; memcpy(&new_tree->level, tmp_level, sizeof(tmp_level)); - prev_cpu = cpu = first_cpu(cpu_online_map); + prev_cpu = cpu = cpumask_first(cpu_online_mask); /* Initialize all levels in the tree with the first CPU */ for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) { @@ -381,7 +381,7 @@ static int simple_map_to_cpu(unsigned int index) } /* Impossible, since num_online_cpus() <= num_possible_cpus() */ - return first_cpu(cpu_online_map); + return cpumask_first(cpu_online_mask); } static int _map_to_cpu(unsigned int index) diff --git a/arch/sparc/kernel/devices.c b/arch/sparc/kernel/devices.c index d2eddd6647c..113c052c304 100644 --- a/arch/sparc/kernel/devices.c +++ b/arch/sparc/kernel/devices.c @@ -20,7 +20,6 @@ #include <asm/system.h> #include <asm/cpudata.h> -extern void cpu_probe(void); extern void clock_stop_probe(void); /* tadpole.c */ extern void sun4c_probe_memerr_reg(void); @@ -115,7 +114,7 @@ int cpu_get_hwmid(phandle prom_node) void __init device_scan(void) { - prom_printf("Booting Linux...\n"); + printk(KERN_NOTICE "Booting Linux...\n"); #ifndef CONFIG_SMP { @@ -133,7 +132,6 @@ void __init device_scan(void) } #endif /* !CONFIG_SMP */ - cpu_probe(); { extern void auxio_probe(void); extern void auxio_power_probe(void); diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index 3add4de8a1a..dd1342c0a3b 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -497,7 +497,7 @@ static void dr_cpu_init_response(struct ds_data *resp, u64 req_num, tag->num_records = ncpus; i = 0; - for_each_cpu_mask(cpu, *mask) { + for_each_cpu(cpu, mask) { ent[i].cpu = cpu; ent[i].result = DR_CPU_RES_OK; ent[i].stat = default_stat; @@ -534,7 +534,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp, int resp_len, ncpus, cpu; unsigned long flags; - ncpus = cpus_weight(*mask); + ncpus = cpumask_weight(mask); resp_len = dr_cpu_size_response(ncpus); resp = kzalloc(resp_len, GFP_KERNEL); if (!resp) @@ -547,7 +547,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp, mdesc_populate_present_mask(mask); mdesc_fill_in_cpu_data(mask); - for_each_cpu_mask(cpu, *mask) { + for_each_cpu(cpu, mask) { int err; printk(KERN_INFO "ds-%llu: Starting cpu %d...\n", @@ -593,7 +593,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp, int resp_len, ncpus, cpu; unsigned long flags; - ncpus = cpus_weight(*mask); + ncpus = cpumask_weight(mask); resp_len = dr_cpu_size_response(ncpus); resp = kzalloc(resp_len, GFP_KERNEL); if (!resp) @@ -603,7 +603,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp, resp_len, ncpus, mask, DR_CPU_STAT_UNCONFIGURED); - for_each_cpu_mask(cpu, *mask) { + for_each_cpu(cpu, mask) { int err; printk(KERN_INFO "ds-%llu: Shutting down cpu %d...\n", @@ -649,13 +649,13 @@ static void __cpuinit dr_cpu_data(struct ds_info *dp, purge_dups(cpu_list, tag->num_records); - cpus_clear(mask); + cpumask_clear(&mask); for (i = 0; i < tag->num_records; i++) { if (cpu_list[i] == CPU_SENTINEL) continue; if (cpu_list[i] < nr_cpu_ids) - cpu_set(cpu_list[i], mask); + cpumask_set_cpu(cpu_list[i], &mask); } if (tag->type == DR_CPU_CONFIGURE) diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 6da784a5612..8341963f4c8 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -269,19 +269,22 @@ smp4m_ticker: /* Here is where we check for possible SMP IPI passed to us * on some level other than 15 which is the NMI and only used * for cross calls. That has a separate entry point below. + * + * IPIs are sent on Level 12, 13 and 14. See IRQ_IPI_*. */ maybe_smp4m_msg: GET_PROCESSOR4M_ID(o3) sethi %hi(sun4m_irq_percpu), %l5 sll %o3, 2, %o3 or %l5, %lo(sun4m_irq_percpu), %o5 - sethi %hi(0x40000000), %o2 + sethi %hi(0x70000000), %o2 ! Check all soft-IRQs ld [%o5 + %o3], %o1 ld [%o1 + 0x00], %o3 ! sun4m_irq_percpu[cpu]->pending andcc %o3, %o2, %g0 be,a smp4m_ticker cmp %l7, 14 - st %o2, [%o1 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x40000000 + /* Soft-IRQ IPI */ + st %o2, [%o1 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x70000000 WRITE_PAUSE ld [%o1 + 0x00], %g0 ! sun4m_irq_percpu[cpu]->pending WRITE_PAUSE @@ -290,9 +293,27 @@ maybe_smp4m_msg: WRITE_PAUSE wr %l4, PSR_ET, %psr WRITE_PAUSE - call smp_reschedule_irq + sll %o2, 28, %o2 ! shift for simpler checks below +maybe_smp4m_msg_check_single: + andcc %o2, 0x1, %g0 + beq,a maybe_smp4m_msg_check_mask + andcc %o2, 0x2, %g0 + call smp_call_function_single_interrupt nop - + andcc %o2, 0x2, %g0 +maybe_smp4m_msg_check_mask: + beq,a maybe_smp4m_msg_check_resched + andcc %o2, 0x4, %g0 + call smp_call_function_interrupt + nop + andcc %o2, 0x4, %g0 +maybe_smp4m_msg_check_resched: + /* rescheduling is done in RESTORE_ALL regardless, but incr stats */ + beq,a maybe_smp4m_msg_out + nop + call smp_resched_interrupt + nop +maybe_smp4m_msg_out: RESTORE_ALL .align 4 @@ -401,18 +422,18 @@ linux_trap_ipi15_sun4d: 1: b,a 1b #ifdef CONFIG_SPARC_LEON - - .globl smpleon_ticker - /* SMP per-cpu ticker interrupts are handled specially. */ -smpleon_ticker: + .globl smpleon_ipi + .extern leon_ipi_interrupt + /* SMP per-cpu IPI interrupts are handled specially. */ +smpleon_ipi: SAVE_ALL or %l0, PSR_PIL, %g2 wr %g2, 0x0, %psr WRITE_PAUSE wr %g2, PSR_ET, %psr WRITE_PAUSE - call leon_percpu_timer_interrupt - add %sp, STACKFRAME_SZ, %o0 + call leonsmp_ipi_interrupt + add %sp, STACKFRAME_SZ, %o1 ! pt_regs wr %l0, PSR_ET, %psr WRITE_PAUSE RESTORE_ALL diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S index 59423491cef..58778575983 100644 --- a/arch/sparc/kernel/head_32.S +++ b/arch/sparc/kernel/head_32.S @@ -810,31 +810,25 @@ found_version: got_prop: #ifdef CONFIG_SPARC_LEON /* no cpu-type check is needed, it is a SPARC-LEON */ -#ifdef CONFIG_SMP - ba leon_smp_init - nop - .global leon_smp_init -leon_smp_init: - sethi %hi(boot_cpu_id), %g1 ! master always 0 - stb %g0, [%g1 + %lo(boot_cpu_id)] - sethi %hi(boot_cpu_id4), %g1 ! master always 0 - stb %g0, [%g1 + %lo(boot_cpu_id4)] + sethi %hi(boot_cpu_id), %g2 ! boot-cpu index - rd %asr17,%g1 - srl %g1,28,%g1 +#ifdef CONFIG_SMP + ldub [%g2 + %lo(boot_cpu_id)], %g1 + cmp %g1, 0xff ! unset means first CPU + bne leon_smp_cpu_startup ! continue only with master + nop +#endif + /* Get CPU-ID from most significant 4-bit of ASR17 */ + rd %asr17, %g1 + srl %g1, 28, %g1 - cmp %g0,%g1 - beq sun4c_continue_boot !continue with master - nop + /* Update boot_cpu_id only on boot cpu */ + stub %g1, [%g2 + %lo(boot_cpu_id)] - ba leon_smp_cpu_startup - nop -#else ba sun4c_continue_boot nop #endif -#endif set cputypval, %o2 ldub [%o2 + 0x4], %l1 @@ -893,9 +887,6 @@ sun4d_init: sta %g4, [%g0] ASI_M_VIKING_TMP1 sethi %hi(boot_cpu_id), %g5 stb %g4, [%g5 + %lo(boot_cpu_id)] - sll %g4, 2, %g4 - sethi %hi(boot_cpu_id4), %g5 - stb %g4, [%g5 + %lo(boot_cpu_id4)] #endif /* Fall through to sun4m_init */ @@ -1024,14 +1015,28 @@ sun4c_continue_boot: bl 1b add %o0, 0x1, %o0 + /* If boot_cpu_id has not been setup by machine specific + * init-code above we default it to zero. + */ + sethi %hi(boot_cpu_id), %g2 + ldub [%g2 + %lo(boot_cpu_id)], %g3 + cmp %g3, 0xff + bne 1f + nop + mov %g0, %g3 + stub %g3, [%g2 + %lo(boot_cpu_id)] + +1: /* boot_cpu_id set. calculate boot_cpu_id4 = boot_cpu_id*4 */ + sll %g3, 2, %g3 + sethi %hi(boot_cpu_id4), %g2 + stub %g3, [%g2 + %lo(boot_cpu_id4)] + /* Initialize the uwinmask value for init task just in case. * But first make current_set[boot_cpu_id] point to something useful. */ set init_thread_union, %g6 set current_set, %g2 #ifdef CONFIG_SMP - sethi %hi(boot_cpu_id4), %g3 - ldub [%g3 + %lo(boot_cpu_id4)], %g3 st %g6, [%g2] add %g2, %g3, %g2 #endif diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index c6ce9a6a479..1c9c80a1a86 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -50,10 +50,15 @@ #include <asm/io-unit.h> #include <asm/leon.h> +/* This function must make sure that caches and memory are coherent after DMA + * On LEON systems without cache snooping it flushes the entire D-CACHE. + */ #ifndef CONFIG_SPARC_LEON -#define mmu_inval_dma_area(p, l) /* Anton pulled it out for 2.4.0-xx */ +static inline void dma_make_coherent(unsigned long pa, unsigned long len) +{ +} #else -static inline void mmu_inval_dma_area(void *va, unsigned long len) +static inline void dma_make_coherent(unsigned long pa, unsigned long len) { if (!sparc_leon3_snooping_enabled()) leon_flush_dcache_all(); @@ -284,7 +289,6 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len, printk("sbus_alloc_consistent: cannot occupy 0x%lx", len_total); goto err_nova; } - mmu_inval_dma_area((void *)va, len_total); // XXX The mmu_map_dma_area does this for us below, see comments. // sparc_mapiorange(0, virt_to_phys(va), res->start, len_total); @@ -336,7 +340,6 @@ static void sbus_free_coherent(struct device *dev, size_t n, void *p, release_resource(res); kfree(res); - /* mmu_inval_dma_area(va, n); */ /* it's consistent, isn't it */ pgv = virt_to_page(p); mmu_unmap_dma_area(dev, ba, n); @@ -463,7 +466,6 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len, printk("pci_alloc_consistent: cannot occupy 0x%lx", len_total); goto err_nova; } - mmu_inval_dma_area(va, len_total); sparc_mapiorange(0, virt_to_phys(va), res->start, len_total); *pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */ @@ -489,7 +491,6 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p, dma_addr_t ba) { struct resource *res; - void *pgp; if ((res = _sparc_find_resource(&_sparc_dvma, (unsigned long)p)) == NULL) { @@ -509,14 +510,12 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p, return; } - pgp = phys_to_virt(ba); /* bus_to_virt actually */ - mmu_inval_dma_area(pgp, n); + dma_make_coherent(ba, n); sparc_unmapiorange((unsigned long)p, n); release_resource(res); kfree(res); - - free_pages((unsigned long)pgp, get_order(n)); + free_pages((unsigned long)phys_to_virt(ba), get_order(n)); } /* @@ -535,7 +534,7 @@ static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { if (dir != PCI_DMA_TODEVICE) - mmu_inval_dma_area(phys_to_virt(ba), PAGE_ALIGN(size)); + dma_make_coherent(ba, PAGE_ALIGN(size)); } /* Map a set of buffers described by scatterlist in streaming @@ -562,8 +561,7 @@ static int pci32_map_sg(struct device *device, struct scatterlist *sgl, /* IIep is write-through, not flushing. */ for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg_page(sg)) == NULL); - sg->dma_address = virt_to_phys(sg_virt(sg)); + sg->dma_address = sg_phys(sg); sg->dma_length = sg->length; } return nents; @@ -582,9 +580,7 @@ static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl, if (dir != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg_page(sg)) == NULL); - mmu_inval_dma_area(page_address(sg_page(sg)), - PAGE_ALIGN(sg->length)); + dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length)); } } } @@ -603,8 +599,7 @@ static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba, size_t size, enum dma_data_direction dir) { if (dir != PCI_DMA_TODEVICE) { - mmu_inval_dma_area(phys_to_virt(ba), - PAGE_ALIGN(size)); + dma_make_coherent(ba, PAGE_ALIGN(size)); } } @@ -612,8 +607,7 @@ static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba, size_t size, enum dma_data_direction dir) { if (dir != PCI_DMA_TODEVICE) { - mmu_inval_dma_area(phys_to_virt(ba), - PAGE_ALIGN(size)); + dma_make_coherent(ba, PAGE_ALIGN(size)); } } @@ -631,9 +625,7 @@ static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, if (dir != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg_page(sg)) == NULL); - mmu_inval_dma_area(page_address(sg_page(sg)), - PAGE_ALIGN(sg->length)); + dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length)); } } } @@ -646,9 +638,7 @@ static void pci32_sync_sg_for_device(struct device *device, struct scatterlist * if (dir != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg_page(sg)) == NULL); - mmu_inval_dma_area(page_address(sg_page(sg)), - PAGE_ALIGN(sg->length)); + dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length)); } } } diff --git a/arch/sparc/kernel/irq.h b/arch/sparc/kernel/irq.h index 008453b798e..100b9c204e7 100644 --- a/arch/sparc/kernel/irq.h +++ b/arch/sparc/kernel/irq.h @@ -2,6 +2,23 @@ #include <asm/btfixup.h> +struct irq_bucket { + struct irq_bucket *next; + unsigned int real_irq; + unsigned int irq; + unsigned int pil; +}; + +#define SUN4D_MAX_BOARD 10 +#define SUN4D_MAX_IRQ ((SUN4D_MAX_BOARD + 2) << 5) + +/* Map between the irq identifier used in hw to the + * irq_bucket. The map is sufficient large to hold + * the sun4d hw identifiers. + */ +extern struct irq_bucket *irq_map[SUN4D_MAX_IRQ]; + + /* sun4m specific type definitions */ /* This maps direct to CPU specific interrupt registers */ @@ -35,6 +52,10 @@ struct sparc_irq_config { }; extern struct sparc_irq_config sparc_irq_config; +unsigned int irq_alloc(unsigned int real_irq, unsigned int pil); +void irq_link(unsigned int irq); +void irq_unlink(unsigned int irq); +void handler_irq(unsigned int pil, struct pt_regs *regs); /* Dave Redman (djhr@tadpole.co.uk) * changed these to function pointers.. it saves cycles and will allow @@ -44,33 +65,9 @@ extern struct sparc_irq_config sparc_irq_config; * Changed these to btfixup entities... It saves cycles :) */ -BTFIXUPDEF_CALL(void, disable_irq, unsigned int) -BTFIXUPDEF_CALL(void, enable_irq, unsigned int) -BTFIXUPDEF_CALL(void, disable_pil_irq, unsigned int) -BTFIXUPDEF_CALL(void, enable_pil_irq, unsigned int) BTFIXUPDEF_CALL(void, clear_clock_irq, void) BTFIXUPDEF_CALL(void, load_profile_irq, int, unsigned int) -static inline void __disable_irq(unsigned int irq) -{ - BTFIXUP_CALL(disable_irq)(irq); -} - -static inline void __enable_irq(unsigned int irq) -{ - BTFIXUP_CALL(enable_irq)(irq); -} - -static inline void disable_pil_irq(unsigned int irq) -{ - BTFIXUP_CALL(disable_pil_irq)(irq); -} - -static inline void enable_pil_irq(unsigned int irq) -{ - BTFIXUP_CALL(enable_pil_irq)(irq); -} - static inline void clear_clock_irq(void) { BTFIXUP_CALL(clear_clock_irq)(); @@ -89,4 +86,10 @@ BTFIXUPDEF_CALL(void, set_irq_udt, int) #define set_cpu_int(cpu,level) BTFIXUP_CALL(set_cpu_int)(cpu,level) #define clear_cpu_int(cpu,level) BTFIXUP_CALL(clear_cpu_int)(cpu,level) #define set_irq_udt(cpu) BTFIXUP_CALL(set_irq_udt)(cpu) + +/* All SUN4D IPIs are sent on this IRQ, may be shared with hard IRQs */ +#define SUN4D_IPI_IRQ 14 + +extern void sun4d_ipi_interrupt(void); + #endif diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c index 7c93df4099c..9b89d842913 100644 --- a/arch/sparc/kernel/irq_32.c +++ b/arch/sparc/kernel/irq_32.c @@ -15,6 +15,7 @@ #include <linux/seq_file.h> #include <asm/cacheflush.h> +#include <asm/cpudata.h> #include <asm/pcic.h> #include <asm/leon.h> @@ -101,284 +102,173 @@ EXPORT_SYMBOL(arch_local_irq_restore); * directed CPU interrupts using the existing enable/disable irq code * with tweaks. * + * Sun4d complicates things even further. IRQ numbers are arbitrary + * 32-bit values in that case. Since this is similar to sparc64, + * we adopt a virtual IRQ numbering scheme as is done there. + * Virutal interrupt numbers are allocated by build_irq(). So NR_IRQS + * just becomes a limit of how many interrupt sources we can handle in + * a single system. Even fully loaded SS2000 machines top off at + * about 32 interrupt sources or so, therefore a NR_IRQS value of 64 + * is more than enough. + * + * We keep a map of per-PIL enable interrupts. These get wired + * up via the irq_chip->startup() method which gets invoked by + * the generic IRQ layer during request_irq(). */ +/* Table of allocated irqs. Unused entries has irq == 0 */ +static struct irq_bucket irq_table[NR_IRQS]; +/* Protect access to irq_table */ +static DEFINE_SPINLOCK(irq_table_lock); -/* - * Dave Redman (djhr@tadpole.co.uk) - * - * There used to be extern calls and hard coded values here.. very sucky! - * instead, because some of the devices attach very early, I do something - * equally sucky but at least we'll never try to free statically allocated - * space or call kmalloc before kmalloc_init :(. - * - * In fact it's the timer10 that attaches first.. then timer14 - * then kmalloc_init is called.. then the tty interrupts attach. - * hmmm.... - * - */ -#define MAX_STATIC_ALLOC 4 -struct irqaction static_irqaction[MAX_STATIC_ALLOC]; -int static_irq_count; - -static struct { - struct irqaction *action; - int flags; -} sparc_irq[NR_IRQS]; -#define SPARC_IRQ_INPROGRESS 1 - -/* Used to protect the IRQ action lists */ -DEFINE_SPINLOCK(irq_action_lock); +/* Map between the irq identifier used in hw to the irq_bucket. */ +struct irq_bucket *irq_map[SUN4D_MAX_IRQ]; +/* Protect access to irq_map */ +static DEFINE_SPINLOCK(irq_map_lock); -int show_interrupts(struct seq_file *p, void *v) +/* Allocate a new irq from the irq_table */ +unsigned int irq_alloc(unsigned int real_irq, unsigned int pil) { - int i = *(loff_t *)v; - struct irqaction *action; unsigned long flags; -#ifdef CONFIG_SMP - int j; -#endif + unsigned int i; + + spin_lock_irqsave(&irq_table_lock, flags); + for (i = 1; i < NR_IRQS; i++) { + if (irq_table[i].real_irq == real_irq && irq_table[i].pil == pil) + goto found; + } - if (sparc_cpu_model == sun4d) - return show_sun4d_interrupts(p, v); + for (i = 1; i < NR_IRQS; i++) { + if (!irq_table[i].irq) + break; + } - spin_lock_irqsave(&irq_action_lock, flags); if (i < NR_IRQS) { - action = sparc_irq[i].action; - if (!action) - goto out_unlock; - seq_printf(p, "%3d: ", i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) { - seq_printf(p, "%10u ", - kstat_cpu(j).irqs[i]); - } -#endif - seq_printf(p, " %c %s", - (action->flags & IRQF_DISABLED) ? '+' : ' ', - action->name); - for (action = action->next; action; action = action->next) { - seq_printf(p, ",%s %s", - (action->flags & IRQF_DISABLED) ? " +" : "", - action->name); - } - seq_putc(p, '\n'); + irq_table[i].real_irq = real_irq; + irq_table[i].irq = i; + irq_table[i].pil = pil; + } else { + printk(KERN_ERR "IRQ: Out of virtual IRQs.\n"); + i = 0; } -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); - return 0; +found: + spin_unlock_irqrestore(&irq_table_lock, flags); + + return i; } -void free_irq(unsigned int irq, void *dev_id) +/* Based on a single pil handler_irq may need to call several + * interrupt handlers. Use irq_map as entry to irq_table, + * and let each entry in irq_table point to the next entry. + */ +void irq_link(unsigned int irq) { - struct irqaction *action; - struct irqaction **actionp; + struct irq_bucket *p; unsigned long flags; - unsigned int cpu_irq; - - if (sparc_cpu_model == sun4d) { - sun4d_free_irq(irq, dev_id); - return; - } - cpu_irq = irq & (NR_IRQS - 1); - if (cpu_irq > 14) { /* 14 irq levels on the sparc */ - printk(KERN_ERR "Trying to free bogus IRQ %d\n", irq); - return; - } + unsigned int pil; - spin_lock_irqsave(&irq_action_lock, flags); + BUG_ON(irq >= NR_IRQS); - actionp = &sparc_irq[cpu_irq].action; - action = *actionp; + spin_lock_irqsave(&irq_map_lock, flags); - if (!action->handler) { - printk(KERN_ERR "Trying to free free IRQ%d\n", irq); - goto out_unlock; - } - if (dev_id) { - for (; action; action = action->next) { - if (action->dev_id == dev_id) - break; - actionp = &action->next; - } - if (!action) { - printk(KERN_ERR "Trying to free free shared IRQ%d\n", - irq); - goto out_unlock; - } - } else if (action->flags & IRQF_SHARED) { - printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n", - irq); - goto out_unlock; - } - if (action->flags & SA_STATIC_ALLOC) { - /* - * This interrupt is marked as specially allocated - * so it is a bad idea to free it. - */ - printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n", - irq, action->name); - goto out_unlock; - } - - *actionp = action->next; + p = &irq_table[irq]; + pil = p->pil; + BUG_ON(pil > SUN4D_MAX_IRQ); + p->next = irq_map[pil]; + irq_map[pil] = p; - spin_unlock_irqrestore(&irq_action_lock, flags); + spin_unlock_irqrestore(&irq_map_lock, flags); +} - synchronize_irq(irq); +void irq_unlink(unsigned int irq) +{ + struct irq_bucket *p, **pnext; + unsigned long flags; - spin_lock_irqsave(&irq_action_lock, flags); + BUG_ON(irq >= NR_IRQS); - kfree(action); + spin_lock_irqsave(&irq_map_lock, flags); - if (!sparc_irq[cpu_irq].action) - __disable_irq(irq); + p = &irq_table[irq]; + BUG_ON(p->pil > SUN4D_MAX_IRQ); + pnext = &irq_map[p->pil]; + while (*pnext != p) + pnext = &(*pnext)->next; + *pnext = p->next; -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); + spin_unlock_irqrestore(&irq_map_lock, flags); } -EXPORT_SYMBOL(free_irq); - -/* - * This is called when we want to synchronize with - * interrupts. We may for example tell a device to - * stop sending interrupts: but to make sure there - * are no interrupts that are executing on another - * CPU we need to call this function. - */ -#ifdef CONFIG_SMP -void synchronize_irq(unsigned int irq) -{ - unsigned int cpu_irq; - cpu_irq = irq & (NR_IRQS - 1); - while (sparc_irq[cpu_irq].flags & SPARC_IRQ_INPROGRESS) - cpu_relax(); -} -EXPORT_SYMBOL(synchronize_irq); -#endif /* SMP */ -void unexpected_irq(int irq, void *dev_id, struct pt_regs *regs) +/* /proc/interrupts printing */ +int arch_show_interrupts(struct seq_file *p, int prec) { - int i; - struct irqaction *action; - unsigned int cpu_irq; + int j; - cpu_irq = irq & (NR_IRQS - 1); - action = sparc_irq[cpu_irq].action; - - printk(KERN_ERR "IO device interrupt, irq = %d\n", irq); - printk(KERN_ERR "PC = %08lx NPC = %08lx FP=%08lx\n", regs->pc, - regs->npc, regs->u_regs[14]); - if (action) { - printk(KERN_ERR "Expecting: "); - for (i = 0; i < 16; i++) - if (action->handler) - printk(KERN_CONT "[%s:%d:0x%x] ", action->name, - i, (unsigned int)action->handler); - } - printk(KERN_ERR "AIEEE\n"); - panic("bogus interrupt received"); +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_data(j).irq_resched_count); + seq_printf(p, " IPI rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_data(j).irq_call_count); + seq_printf(p, " IPI function call interrupts\n"); +#endif + seq_printf(p, "NMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_data(j).counter); + seq_printf(p, " Non-maskable interrupts\n"); + return 0; } -void handler_irq(int pil, struct pt_regs *regs) +void handler_irq(unsigned int pil, struct pt_regs *regs) { struct pt_regs *old_regs; - struct irqaction *action; - int cpu = smp_processor_id(); + struct irq_bucket *p; + BUG_ON(pil > 15); old_regs = set_irq_regs(regs); irq_enter(); - disable_pil_irq(pil); -#ifdef CONFIG_SMP - /* Only rotate on lower priority IRQs (scsi, ethernet, etc.). */ - if ((sparc_cpu_model==sun4m) && (pil < 10)) - smp4m_irq_rotate(cpu); -#endif - action = sparc_irq[pil].action; - sparc_irq[pil].flags |= SPARC_IRQ_INPROGRESS; - kstat_cpu(cpu).irqs[pil]++; - do { - if (!action || !action->handler) - unexpected_irq(pil, NULL, regs); - action->handler(pil, action->dev_id); - action = action->next; - } while (action); - sparc_irq[pil].flags &= ~SPARC_IRQ_INPROGRESS; - enable_pil_irq(pil); + + p = irq_map[pil]; + while (p) { + struct irq_bucket *next = p->next; + + generic_handle_irq(p->irq); + p = next; + } irq_exit(); set_irq_regs(old_regs); } #if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE) +static unsigned int floppy_irq; -/* - * Fast IRQs on the Sparc can only have one routine attached to them, - * thus no sharing possible. - */ -static int request_fast_irq(unsigned int irq, - void (*handler)(void), - unsigned long irqflags, const char *devname) +int sparc_floppy_request_irq(unsigned int irq, irq_handler_t irq_handler) { - struct irqaction *action; - unsigned long flags; unsigned int cpu_irq; - int ret; + int err; + #if defined CONFIG_SMP && !defined CONFIG_SPARC_LEON struct tt_entry *trap_table; #endif - cpu_irq = irq & (NR_IRQS - 1); - if (cpu_irq > 14) { - ret = -EINVAL; - goto out; - } - if (!handler) { - ret = -EINVAL; - goto out; - } - spin_lock_irqsave(&irq_action_lock, flags); + err = request_irq(irq, irq_handler, 0, "floppy", NULL); + if (err) + return -1; - action = sparc_irq[cpu_irq].action; - if (action) { - if (action->flags & IRQF_SHARED) - panic("Trying to register fast irq when already shared.\n"); - if (irqflags & IRQF_SHARED) - panic("Trying to register fast irq as shared.\n"); + /* Save for later use in floppy interrupt handler */ + floppy_irq = irq; - /* Anyway, someone already owns it so cannot be made fast. */ - printk(KERN_ERR "request_fast_irq: Trying to register yet already owned.\n"); - ret = -EBUSY; - goto out_unlock; - } - - /* - * If this is flagged as statically allocated then we use our - * private struct which is never freed. - */ - if (irqflags & SA_STATIC_ALLOC) { - if (static_irq_count < MAX_STATIC_ALLOC) - action = &static_irqaction[static_irq_count++]; - else - printk(KERN_ERR "Fast IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n", - irq, devname); - } - - if (action == NULL) - action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC); - if (!action) { - ret = -ENOMEM; - goto out_unlock; - } + cpu_irq = (irq & (NR_IRQS - 1)); /* Dork with trap table if we get this far. */ #define INSTANTIATE(table) \ table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_one = SPARC_RD_PSR_L0; \ table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two = \ - SPARC_BRANCH((unsigned long) handler, \ + SPARC_BRANCH((unsigned long) floppy_hardint, \ (unsigned long) &table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two);\ table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_three = SPARC_RD_WIM_L3; \ table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_four = SPARC_NOP; @@ -399,22 +289,9 @@ static int request_fast_irq(unsigned int irq, * writing we have no CPU-neutral interface to fine-grained flushes. */ flush_cache_all(); - - action->flags = irqflags; - action->name = devname; - action->dev_id = NULL; - action->next = NULL; - - sparc_irq[cpu_irq].action = action; - - __enable_irq(irq); - - ret = 0; -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); -out: - return ret; + return 0; } +EXPORT_SYMBOL(sparc_floppy_request_irq); /* * These variables are used to access state from the assembler @@ -440,154 +317,23 @@ EXPORT_SYMBOL(pdma_base); unsigned long pdma_areasize; EXPORT_SYMBOL(pdma_areasize); -static irq_handler_t floppy_irq_handler; - +/* Use the generic irq support to call floppy_interrupt + * which was setup using request_irq() in sparc_floppy_request_irq(). + * We only have one floppy interrupt so we do not need to check + * for additional handlers being wired up by irq_link() + */ void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs) { struct pt_regs *old_regs; - int cpu = smp_processor_id(); old_regs = set_irq_regs(regs); - disable_pil_irq(irq); irq_enter(); - kstat_cpu(cpu).irqs[irq]++; - floppy_irq_handler(irq, dev_id); + generic_handle_irq(floppy_irq); irq_exit(); - enable_pil_irq(irq); set_irq_regs(old_regs); - /* - * XXX Eek, it's totally changed with preempt_count() and such - * if (softirq_pending(cpu)) - * do_softirq(); - */ -} - -int sparc_floppy_request_irq(int irq, unsigned long flags, - irq_handler_t irq_handler) -{ - floppy_irq_handler = irq_handler; - return request_fast_irq(irq, floppy_hardint, flags, "floppy"); } -EXPORT_SYMBOL(sparc_floppy_request_irq); - #endif -int request_irq(unsigned int irq, - irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) -{ - struct irqaction *action, **actionp; - unsigned long flags; - unsigned int cpu_irq; - int ret; - - if (sparc_cpu_model == sun4d) - return sun4d_request_irq(irq, handler, irqflags, devname, dev_id); - - cpu_irq = irq & (NR_IRQS - 1); - if (cpu_irq > 14) { - ret = -EINVAL; - goto out; - } - if (!handler) { - ret = -EINVAL; - goto out; - } - - spin_lock_irqsave(&irq_action_lock, flags); - - actionp = &sparc_irq[cpu_irq].action; - action = *actionp; - if (action) { - if (!(action->flags & IRQF_SHARED) || !(irqflags & IRQF_SHARED)) { - ret = -EBUSY; - goto out_unlock; - } - if ((action->flags & IRQF_DISABLED) != (irqflags & IRQF_DISABLED)) { - printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n", - irq); - ret = -EBUSY; - goto out_unlock; - } - for ( ; action; action = *actionp) - actionp = &action->next; - } - - /* If this is flagged as statically allocated then we use our - * private struct which is never freed. - */ - if (irqflags & SA_STATIC_ALLOC) { - if (static_irq_count < MAX_STATIC_ALLOC) - action = &static_irqaction[static_irq_count++]; - else - printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n", - irq, devname); - } - if (action == NULL) - action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC); - if (!action) { - ret = -ENOMEM; - goto out_unlock; - } - - action->handler = handler; - action->flags = irqflags; - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - *actionp = action; - - __enable_irq(irq); - - ret = 0; -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); -out: - return ret; -} -EXPORT_SYMBOL(request_irq); - -void disable_irq_nosync(unsigned int irq) -{ - __disable_irq(irq); -} -EXPORT_SYMBOL(disable_irq_nosync); - -void disable_irq(unsigned int irq) -{ - __disable_irq(irq); -} -EXPORT_SYMBOL(disable_irq); - -void enable_irq(unsigned int irq) -{ - __enable_irq(irq); -} -EXPORT_SYMBOL(enable_irq); - -/* - * We really don't need these at all on the Sparc. We only have - * stubs here because they are exported to modules. - */ -unsigned long probe_irq_on(void) -{ - return 0; -} -EXPORT_SYMBOL(probe_irq_on); - -int probe_irq_off(unsigned long mask) -{ - return 0; -} -EXPORT_SYMBOL(probe_irq_off); - -static unsigned int build_device_irq(struct platform_device *op, - unsigned int real_irq) -{ - return real_irq; -} - /* djhr * This could probably be made indirect too and assigned in the CPU * bits of the code. That would be much nicer I think and would also @@ -598,8 +344,6 @@ static unsigned int build_device_irq(struct platform_device *op, void __init init_IRQ(void) { - sparc_irq_config.build_device_irq = build_device_irq; - switch (sparc_cpu_model) { case sun4c: case sun4: @@ -607,14 +351,11 @@ void __init init_IRQ(void) break; case sun4m: -#ifdef CONFIG_PCI pcic_probe(); - if (pcic_present()) { + if (pcic_present()) sun4m_pci_init_IRQ(); - break; - } -#endif - sun4m_init_IRQ(); + else + sun4m_init_IRQ(); break; case sun4d: @@ -632,9 +373,3 @@ void __init init_IRQ(void) btfixup(); } -#ifdef CONFIG_PROC_FS -void init_irq_proc(void) -{ - /* For now, nothing... */ -} -#endif /* CONFIG_PROC_FS */ diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index b1d275ce343..4e78862d12f 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -224,13 +224,13 @@ static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity) int cpuid; cpumask_copy(&mask, affinity); - if (cpus_equal(mask, cpu_online_map)) { + if (cpumask_equal(&mask, cpu_online_mask)) { cpuid = map_to_cpu(irq); } else { cpumask_t tmp; - cpus_and(tmp, cpu_online_map, mask); - cpuid = cpus_empty(tmp) ? map_to_cpu(irq) : first_cpu(tmp); + cpumask_and(&tmp, cpu_online_mask, &mask); + cpuid = cpumask_empty(&tmp) ? map_to_cpu(irq) : cpumask_first(&tmp); } return cpuid; diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 24ad449886b..6f6544cfa0e 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -6,11 +6,9 @@ #include <asm/traps.h> /* cpu.c */ -extern const char *sparc_cpu_type; extern const char *sparc_pmu_type; -extern const char *sparc_fpu_type; - extern unsigned int fsr_storage; +extern int ncpus_probed; #ifdef CONFIG_SPARC32 /* cpu.c */ @@ -37,6 +35,7 @@ extern void sun4c_init_IRQ(void); extern unsigned int lvl14_resolution; extern void sun4m_init_IRQ(void); +extern void sun4m_unmask_profile_irq(void); extern void sun4m_clear_profile_irq(int cpu); /* sun4d_irq.c */ diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 2969f777fa1..2f538ac2e13 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -19,53 +19,70 @@ #include <asm/leon_amba.h> #include <asm/traps.h> #include <asm/cacheflush.h> +#include <asm/smp.h> +#include <asm/setup.h> #include "prom.h" #include "irq.h" struct leon3_irqctrl_regs_map *leon3_irqctrl_regs; /* interrupt controller base address */ struct leon3_gptimer_regs_map *leon3_gptimer_regs; /* timer controller base address */ -struct amba_apb_device leon_percpu_timer_dev[16]; int leondebug_irq_disable; int leon_debug_irqout; static int dummy_master_l10_counter; unsigned long amba_system_id; +static DEFINE_SPINLOCK(leon_irq_lock); unsigned long leon3_gptimer_irq; /* interrupt controller irq number */ unsigned long leon3_gptimer_idx; /* Timer Index (0..6) within Timer Core */ +int leon3_ticker_irq; /* Timer ticker IRQ */ unsigned int sparc_leon_eirq; -#define LEON_IMASK ((&leon3_irqctrl_regs->mask[0])) +#define LEON_IMASK(cpu) (&leon3_irqctrl_regs->mask[cpu]) +#define LEON_IACK (&leon3_irqctrl_regs->iclear) +#define LEON_DO_ACK_HW 1 -/* Return the IRQ of the pending IRQ on the extended IRQ controller */ -int sparc_leon_eirq_get(int eirq, int cpu) +/* Return the last ACKed IRQ by the Extended IRQ controller. It has already + * been (automatically) ACKed when the CPU takes the trap. + */ +static inline unsigned int leon_eirq_get(int cpu) { return LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->intid[cpu]) & 0x1f; } -irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id) +/* Handle one or multiple IRQs from the extended interrupt controller */ +static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc) { - printk(KERN_ERR "sparc_leon_eirq_isr: ERROR EXTENDED IRQ\n"); - return IRQ_HANDLED; + unsigned int eirq; + int cpu = sparc_leon3_cpuid(); + + eirq = leon_eirq_get(cpu); + if ((eirq & 0x10) && irq_map[eirq]->irq) /* bit4 tells if IRQ happened */ + generic_handle_irq(irq_map[eirq]->irq); } /* The extended IRQ controller has been found, this function registers it */ -void sparc_leon_eirq_register(int eirq) +void leon_eirq_setup(unsigned int eirq) { - int irq; + unsigned long mask, oldmask; + unsigned int veirq; - /* Register a "BAD" handler for this interrupt, it should never happen */ - irq = request_irq(eirq, sparc_leon_eirq_isr, - (IRQF_DISABLED | SA_STATIC_ALLOC), "extirq", NULL); - - if (irq) { - printk(KERN_ERR - "sparc_leon_eirq_register: unable to attach IRQ%d\n", - eirq); - } else { - sparc_leon_eirq = eirq; + if (eirq < 1 || eirq > 0xf) { + printk(KERN_ERR "LEON EXT IRQ NUMBER BAD: %d\n", eirq); + return; } + veirq = leon_build_device_irq(eirq, leon_handle_ext_irq, "extirq", 0); + + /* + * Unmask the Extended IRQ, the IRQs routed through the Ext-IRQ + * controller have a mask-bit of their own, so this is safe. + */ + irq_link(veirq); + mask = 1 << eirq; + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(boot_cpu_id)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(boot_cpu_id), (oldmask | mask)); + sparc_leon_eirq = eirq; } static inline unsigned long get_irqmask(unsigned int irq) @@ -83,35 +100,151 @@ static inline unsigned long get_irqmask(unsigned int irq) return mask; } -static void leon_enable_irq(unsigned int irq_nr) +#ifdef CONFIG_SMP +static int irq_choose_cpu(const struct cpumask *affinity) { - unsigned long mask, flags; - mask = get_irqmask(irq_nr); - local_irq_save(flags); - LEON3_BYPASS_STORE_PA(LEON_IMASK, - (LEON3_BYPASS_LOAD_PA(LEON_IMASK) | (mask))); - local_irq_restore(flags); + cpumask_t mask; + + cpus_and(mask, cpu_online_map, *affinity); + if (cpus_equal(mask, cpu_online_map) || cpus_empty(mask)) + return boot_cpu_id; + else + return first_cpu(mask); } +#else +#define irq_choose_cpu(affinity) boot_cpu_id +#endif -static void leon_disable_irq(unsigned int irq_nr) +static int leon_set_affinity(struct irq_data *data, const struct cpumask *dest, + bool force) { - unsigned long mask, flags; - mask = get_irqmask(irq_nr); - local_irq_save(flags); - LEON3_BYPASS_STORE_PA(LEON_IMASK, - (LEON3_BYPASS_LOAD_PA(LEON_IMASK) & ~(mask))); - local_irq_restore(flags); + unsigned long mask, oldmask, flags; + int oldcpu, newcpu; + + mask = (unsigned long)data->chip_data; + oldcpu = irq_choose_cpu(data->affinity); + newcpu = irq_choose_cpu(dest); + + if (oldcpu == newcpu) + goto out; + + /* unmask on old CPU first before enabling on the selected CPU */ + spin_lock_irqsave(&leon_irq_lock, flags); + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(oldcpu)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(oldcpu), (oldmask & ~mask)); + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(newcpu)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(newcpu), (oldmask | mask)); + spin_unlock_irqrestore(&leon_irq_lock, flags); +out: + return IRQ_SET_MASK_OK; +} + +static void leon_unmask_irq(struct irq_data *data) +{ + unsigned long mask, oldmask, flags; + int cpu; + + mask = (unsigned long)data->chip_data; + cpu = irq_choose_cpu(data->affinity); + spin_lock_irqsave(&leon_irq_lock, flags); + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask | mask)); + spin_unlock_irqrestore(&leon_irq_lock, flags); +} + +static void leon_mask_irq(struct irq_data *data) +{ + unsigned long mask, oldmask, flags; + int cpu; + + mask = (unsigned long)data->chip_data; + cpu = irq_choose_cpu(data->affinity); + spin_lock_irqsave(&leon_irq_lock, flags); + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask & ~mask)); + spin_unlock_irqrestore(&leon_irq_lock, flags); +} + +static unsigned int leon_startup_irq(struct irq_data *data) +{ + irq_link(data->irq); + leon_unmask_irq(data); + return 0; +} +static void leon_shutdown_irq(struct irq_data *data) +{ + leon_mask_irq(data); + irq_unlink(data->irq); +} + +/* Used by external level sensitive IRQ handlers on the LEON: ACK IRQ ctrl */ +static void leon_eoi_irq(struct irq_data *data) +{ + unsigned long mask = (unsigned long)data->chip_data; + + if (mask & LEON_DO_ACK_HW) + LEON3_BYPASS_STORE_PA(LEON_IACK, mask & ~LEON_DO_ACK_HW); +} + +static struct irq_chip leon_irq = { + .name = "leon", + .irq_startup = leon_startup_irq, + .irq_shutdown = leon_shutdown_irq, + .irq_mask = leon_mask_irq, + .irq_unmask = leon_unmask_irq, + .irq_eoi = leon_eoi_irq, + .irq_set_affinity = leon_set_affinity, +}; + +/* + * Build a LEON IRQ for the edge triggered LEON IRQ controller: + * Edge (normal) IRQ - handle_simple_irq, ack=DONT-CARE, never ack + * Level IRQ (PCI|Level-GPIO) - handle_fasteoi_irq, ack=1, ack after ISR + * Per-CPU Edge - handle_percpu_irq, ack=0 + */ +unsigned int leon_build_device_irq(unsigned int real_irq, + irq_flow_handler_t flow_handler, + const char *name, int do_ack) +{ + unsigned int irq; + unsigned long mask; + + irq = 0; + mask = get_irqmask(real_irq); + if (mask == 0) + goto out; + + irq = irq_alloc(real_irq, real_irq); + if (irq == 0) + goto out; + + if (do_ack) + mask |= LEON_DO_ACK_HW; + + irq_set_chip_and_handler_name(irq, &leon_irq, + flow_handler, name); + irq_set_chip_data(irq, (void *)mask); + +out: + return irq; +} + +static unsigned int _leon_build_device_irq(struct platform_device *op, + unsigned int real_irq) +{ + return leon_build_device_irq(real_irq, handle_simple_irq, "edge", 0); } void __init leon_init_timers(irq_handler_t counter_fn) { - int irq; + int irq, eirq; struct device_node *rootnp, *np, *nnp; struct property *pp; int len; - int cpu, icsel; + int icsel; int ampopts; + int err; leondebug_irq_disable = 0; leon_debug_irqout = 0; @@ -173,98 +306,85 @@ void __init leon_init_timers(irq_handler_t counter_fn) leon3_gptimer_irq = *(unsigned int *)pp->value; } while (0); - if (leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq) { - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0); - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx].rld, - (((1000000 / HZ) - 1))); - LEON3_BYPASS_STORE_PA( + if (!(leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq)) + goto bad; + + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld, + (((1000000 / HZ) - 1))); + LEON3_BYPASS_STORE_PA( &leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, 0); #ifdef CONFIG_SMP - leon_percpu_timer_dev[0].start = (int)leon3_gptimer_regs; - leon_percpu_timer_dev[0].irq = leon3_gptimer_irq + 1 + - leon3_gptimer_idx; - - if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) & - (1<<LEON3_GPTIMER_SEPIRQ))) { - prom_printf("irq timer not configured with separate irqs\n"); - BUG(); - } + leon3_ticker_irq = leon3_gptimer_irq + 1 + leon3_gptimer_idx; - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx+1].val, 0); - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld, - (((1000000/HZ) - 1))); - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, 0); -# endif - - /* - * The IRQ controller may (if implemented) consist of multiple - * IRQ controllers, each mapped on a 4Kb boundary. - * Each CPU may be routed to different IRQCTRLs, however - * we assume that all CPUs (in SMP system) is routed to the - * same IRQ Controller, and for non-SMP only one IRQCTRL is - * accessed anyway. - * In AMP systems, Linux must run on CPU0 for the time being. - */ - cpu = sparc_leon3_cpuid(); - icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[cpu/8]); - icsel = (icsel >> ((7 - (cpu&0x7)) * 4)) & 0xf; - leon3_irqctrl_regs += icsel; - } else { - goto bad; + if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) & + (1<<LEON3_GPTIMER_SEPIRQ))) { + printk(KERN_ERR "timer not configured with separate irqs\n"); + BUG(); } - irq = request_irq(leon3_gptimer_irq+leon3_gptimer_idx, - counter_fn, - (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].val, + 0); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld, + (((1000000/HZ) - 1))); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, + 0); +#endif - if (irq) { - printk(KERN_ERR "leon_time_init: unable to attach IRQ%d\n", - LEON_INTERRUPT_TIMER1); + /* + * The IRQ controller may (if implemented) consist of multiple + * IRQ controllers, each mapped on a 4Kb boundary. + * Each CPU may be routed to different IRQCTRLs, however + * we assume that all CPUs (in SMP system) is routed to the + * same IRQ Controller, and for non-SMP only one IRQCTRL is + * accessed anyway. + * In AMP systems, Linux must run on CPU0 for the time being. + */ + icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[boot_cpu_id/8]); + icsel = (icsel >> ((7 - (boot_cpu_id&0x7)) * 4)) & 0xf; + leon3_irqctrl_regs += icsel; + + /* Mask all IRQs on boot-cpu IRQ controller */ + LEON3_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[boot_cpu_id], 0); + + /* Probe extended IRQ controller */ + eirq = (LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->mpstatus) + >> 16) & 0xf; + if (eirq != 0) + leon_eirq_setup(eirq); + + irq = _leon_build_device_irq(NULL, leon3_gptimer_irq+leon3_gptimer_idx); + err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL); + if (err) { + printk(KERN_ERR "unable to attach timer IRQ%d\n", irq); prom_halt(); } -# ifdef CONFIG_SMP - { - unsigned long flags; - struct tt_entry *trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_percpu_timer_dev[0].irq - 1)]; - - /* For SMP we use the level 14 ticker, however the bootup code - * has copied the firmwares level 14 vector into boot cpu's - * trap table, we must fix this now or we get squashed. - */ - local_irq_save(flags); - - patchme_maybe_smp_msg[0] = 0x01000000; /* NOP out the branch */ - - /* Adjust so that we jump directly to smpleon_ticker */ - trap_table->inst_three += smpleon_ticker - real_irq_entry; + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, + LEON3_GPTIMER_EN | + LEON3_GPTIMER_RL | + LEON3_GPTIMER_LD | + LEON3_GPTIMER_IRQEN); - local_flush_cache_all(); - local_irq_restore(flags); +#ifdef CONFIG_SMP + /* Install per-cpu IRQ handler for broadcasted ticker */ + irq = leon_build_device_irq(leon3_ticker_irq, handle_percpu_irq, + "per-cpu", 0); + err = request_irq(irq, leon_percpu_timer_interrupt, + IRQF_PERCPU | IRQF_TIMER, "ticker", + NULL); + if (err) { + printk(KERN_ERR "unable to attach ticker IRQ%d\n", irq); + prom_halt(); } -# endif - - if (leon3_gptimer_regs) { - LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, - LEON3_GPTIMER_EN | - LEON3_GPTIMER_RL | - LEON3_GPTIMER_LD | LEON3_GPTIMER_IRQEN); -#ifdef CONFIG_SMP - LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, - LEON3_GPTIMER_EN | - LEON3_GPTIMER_RL | - LEON3_GPTIMER_LD | - LEON3_GPTIMER_IRQEN); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, + LEON3_GPTIMER_EN | + LEON3_GPTIMER_RL | + LEON3_GPTIMER_LD | + LEON3_GPTIMER_IRQEN); #endif - - } return; bad: printk(KERN_ERR "No Timer/irqctrl found\n"); @@ -281,9 +401,6 @@ void leon_load_profile_irq(int cpu, unsigned int limit) BUG(); } - - - void __init leon_trans_init(struct device_node *dp) { if (strcmp(dp->type, "cpu") == 0 && strcmp(dp->name, "<NULL>") == 0) { @@ -337,22 +454,18 @@ void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu) { unsigned long mask, flags, *addr; mask = get_irqmask(irq_nr); - local_irq_save(flags); - addr = (unsigned long *)&(leon3_irqctrl_regs->mask[cpu]); - LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | (mask))); - local_irq_restore(flags); + spin_lock_irqsave(&leon_irq_lock, flags); + addr = (unsigned long *)LEON_IMASK(cpu); + LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | mask)); + spin_unlock_irqrestore(&leon_irq_lock, flags); } #endif void __init leon_init_IRQ(void) { - sparc_irq_config.init_timers = leon_init_timers; - - BTFIXUPSET_CALL(enable_irq, leon_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, leon_disable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(enable_pil_irq, leon_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_pil_irq, leon_disable_irq, BTFIXUPCALL_NORM); + sparc_irq_config.init_timers = leon_init_timers; + sparc_irq_config.build_device_irq = _leon_build_device_irq; BTFIXUPSET_CALL(clear_clock_irq, leon_clear_clock_irq, BTFIXUPCALL_NORM); diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 8f5de4aa3c0..fe8fb44c609 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -14,6 +14,7 @@ #include <linux/smp.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> +#include <linux/of.h> #include <linux/init.h> #include <linux/spinlock.h> #include <linux/mm.h> @@ -29,6 +30,7 @@ #include <asm/ptrace.h> #include <asm/atomic.h> #include <asm/irq_regs.h> +#include <asm/traps.h> #include <asm/delay.h> #include <asm/irq.h> @@ -50,9 +52,12 @@ extern ctxd_t *srmmu_ctx_table_phys; static int smp_processors_ready; extern volatile unsigned long cpu_callin_map[NR_CPUS]; -extern unsigned char boot_cpu_id; extern cpumask_t smp_commenced_mask; void __init leon_configure_cache_smp(void); +static void leon_ipi_init(void); + +/* IRQ number of LEON IPIs */ +int leon_ipi_irq = LEON3_IRQ_IPI_DEFAULT; static inline unsigned long do_swap(volatile unsigned long *ptr, unsigned long val) @@ -94,8 +99,6 @@ void __cpuinit leon_callin(void) local_flush_cache_all(); local_flush_tlb_all(); - cpu_probe(); - /* Fix idle thread fields. */ __asm__ __volatile__("ld [%0], %%g6\n\t" : : "r"(¤t_set[cpuid]) : "memory" /* paranoid */); @@ -104,11 +107,11 @@ void __cpuinit leon_callin(void) atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; - while (!cpu_isset(cpuid, smp_commenced_mask)) + while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) mb(); local_irq_enable(); - cpu_set(cpuid, cpu_online_map); + set_cpu_online(cpuid, true); } /* @@ -179,13 +182,16 @@ void __init leon_boot_cpus(void) int nrcpu = leon_smp_nrcpus(); int me = smp_processor_id(); + /* Setup IPI */ + leon_ipi_init(); + printk(KERN_INFO "%d:(%d:%d) cpus mpirq at 0x%x\n", (unsigned int)me, (unsigned int)nrcpu, (unsigned int)NR_CPUS, (unsigned int)&(leon3_irqctrl_regs->mpstatus)); leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, me); leon_enable_irq_cpu(LEON3_IRQ_TICKER, me); - leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, me); + leon_enable_irq_cpu(leon_ipi_irq, me); leon_smp_setbroadcast(1 << LEON3_IRQ_TICKER); @@ -220,6 +226,10 @@ int __cpuinit leon_boot_one_cpu(int i) (unsigned int)&leon3_irqctrl_regs->mpstatus); local_flush_cache_all(); + /* Make sure all IRQs are of from the start for this new CPU */ + LEON_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[i], 0); + + /* Wake one CPU */ LEON_BYPASS_STORE_PA(&(leon3_irqctrl_regs->mpstatus), 1 << i); /* wheee... it's going... */ @@ -236,7 +246,7 @@ int __cpuinit leon_boot_one_cpu(int i) } else { leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, i); leon_enable_irq_cpu(LEON3_IRQ_TICKER, i); - leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, i); + leon_enable_irq_cpu(leon_ipi_irq, i); } local_flush_cache_all(); @@ -262,21 +272,21 @@ void __init leon_smp_done(void) local_flush_cache_all(); /* Free unneeded trap tables */ - if (!cpu_isset(1, cpu_present_map)) { + if (!cpu_present(1)) { ClearPageReserved(virt_to_page(&trapbase_cpu1)); init_page_count(virt_to_page(&trapbase_cpu1)); free_page((unsigned long)&trapbase_cpu1); totalram_pages++; num_physpages++; } - if (!cpu_isset(2, cpu_present_map)) { + if (!cpu_present(2)) { ClearPageReserved(virt_to_page(&trapbase_cpu2)); init_page_count(virt_to_page(&trapbase_cpu2)); free_page((unsigned long)&trapbase_cpu2); totalram_pages++; num_physpages++; } - if (!cpu_isset(3, cpu_present_map)) { + if (!cpu_present(3)) { ClearPageReserved(virt_to_page(&trapbase_cpu3)); init_page_count(virt_to_page(&trapbase_cpu3)); free_page((unsigned long)&trapbase_cpu3); @@ -292,6 +302,99 @@ void leon_irq_rotate(int cpu) { } +struct leon_ipi_work { + int single; + int msk; + int resched; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct leon_ipi_work, leon_ipi_work); + +/* Initialize IPIs on the LEON, in order to save IRQ resources only one IRQ + * is used for all three types of IPIs. + */ +static void __init leon_ipi_init(void) +{ + int cpu, len; + struct leon_ipi_work *work; + struct property *pp; + struct device_node *rootnp; + struct tt_entry *trap_table; + unsigned long flags; + + /* Find IPI IRQ or stick with default value */ + rootnp = of_find_node_by_path("/ambapp0"); + if (rootnp) { + pp = of_find_property(rootnp, "ipi_num", &len); + if (pp && (*(int *)pp->value)) + leon_ipi_irq = *(int *)pp->value; + } + printk(KERN_INFO "leon: SMP IPIs at IRQ %d\n", leon_ipi_irq); + + /* Adjust so that we jump directly to smpleon_ipi */ + local_irq_save(flags); + trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_ipi_irq - 1)]; + trap_table->inst_three += smpleon_ipi - real_irq_entry; + local_flush_cache_all(); + local_irq_restore(flags); + + for_each_possible_cpu(cpu) { + work = &per_cpu(leon_ipi_work, cpu); + work->single = work->msk = work->resched = 0; + } +} + +static void leon_ipi_single(int cpu) +{ + struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu); + + /* Mark work */ + work->single = 1; + + /* Generate IRQ on the CPU */ + set_cpu_int(cpu, leon_ipi_irq); +} + +static void leon_ipi_mask_one(int cpu) +{ + struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu); + + /* Mark work */ + work->msk = 1; + + /* Generate IRQ on the CPU */ + set_cpu_int(cpu, leon_ipi_irq); +} + +static void leon_ipi_resched(int cpu) +{ + struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu); + + /* Mark work */ + work->resched = 1; + + /* Generate IRQ on the CPU (any IRQ will cause resched) */ + set_cpu_int(cpu, leon_ipi_irq); +} + +void leonsmp_ipi_interrupt(void) +{ + struct leon_ipi_work *work = &__get_cpu_var(leon_ipi_work); + + if (work->single) { + work->single = 0; + smp_call_function_single_interrupt(); + } + if (work->msk) { + work->msk = 0; + smp_call_function_interrupt(); + } + if (work->resched) { + work->resched = 0; + smp_resched_interrupt(); + } +} + static struct smp_funcall { smpfunc_t func; unsigned long arg1; @@ -337,10 +440,10 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, { register int i; - cpu_clear(smp_processor_id(), mask); - cpus_and(mask, cpu_online_map, mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + cpumask_and(&mask, cpu_online_mask, &mask); for (i = 0; i <= high; i++) { - if (cpu_isset(i, mask)) { + if (cpumask_test_cpu(i, &mask)) { ccall_info.processors_in[i] = 0; ccall_info.processors_out[i] = 0; set_cpu_int(i, LEON3_IRQ_CROSS_CALL); @@ -354,7 +457,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_in[i]) @@ -363,7 +466,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_out[i]) @@ -386,27 +489,23 @@ void leon_cross_call_irq(void) ccall_info.processors_out[i] = 1; } -void leon_percpu_timer_interrupt(struct pt_regs *regs) +irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused) { - struct pt_regs *old_regs; int cpu = smp_processor_id(); - old_regs = set_irq_regs(regs); - leon_clear_profile_irq(cpu); profile_tick(CPU_PROFILING); if (!--prof_counter(cpu)) { - int user = user_mode(regs); + int user = user_mode(get_irq_regs()); - irq_enter(); update_process_times(user); - irq_exit(); prof_counter(cpu) = prof_multiplier(cpu); } - set_irq_regs(old_regs); + + return IRQ_HANDLED; } static void __init smp_setup_percpu_timer(void) @@ -449,6 +548,9 @@ void __init leon_init_smp(void) BTFIXUPSET_CALL(smp_cross_call, leon_cross_call, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(__hard_smp_processor_id, __leon_processor_id, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_resched, leon_ipi_resched, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_single, leon_ipi_single, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_mask_one, leon_ipi_mask_one, BTFIXUPCALL_NORM); } #endif /* CONFIG_SPARC_LEON */ diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 56db06432ce..42f28c7420e 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -768,7 +768,7 @@ static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handl cpuid, NR_CPUS); continue; } - if (!cpu_isset(cpuid, *mask)) + if (!cpumask_test_cpu(cpuid, mask)) continue; #endif diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index 5c149689bb2..3bb2eace58c 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -622,8 +622,9 @@ static unsigned int __init build_one_device_irq(struct platform_device *op, out: nid = of_node_to_nid(dp); if (nid != -1) { - cpumask_t numa_mask = *cpumask_of_node(nid); + cpumask_t numa_mask; + cpumask_copy(&numa_mask, cpumask_of_node(nid)); irq_set_affinity(irq, &numa_mask); } diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c index 30982e9ab62..580651af73f 100644 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c @@ -284,8 +284,9 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm, nid = pbm->numa_node; if (nid != -1) { - cpumask_t numa_mask = *cpumask_of_node(nid); + cpumask_t numa_mask; + cpumask_copy(&numa_mask, cpumask_of_node(nid)); irq_set_affinity(irq, &numa_mask); } err = request_irq(irq, sparc64_msiq_interrupt, 0, diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index 2cdc131b50a..948601a066f 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -164,6 +164,9 @@ void __iomem *pcic_regs; volatile int pcic_speculative; volatile int pcic_trapped; +/* forward */ +unsigned int pcic_build_device_irq(struct platform_device *op, + unsigned int real_irq); #define CONFIG_CMD(bus, device_fn, where) (0x80000000 | (((unsigned int)bus) << 16) | (((unsigned int)device_fn) << 8) | (where & ~3)) @@ -523,6 +526,7 @@ static void pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) { struct pcic_ca2irq *p; + unsigned int real_irq; int i, ivec; char namebuf[64]; @@ -551,26 +555,25 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) i = p->pin; if (i >= 0 && i < 4) { ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_LO); - dev->irq = ivec >> (i << 2) & 0xF; + real_irq = ivec >> (i << 2) & 0xF; } else if (i >= 4 && i < 8) { ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_HI); - dev->irq = ivec >> ((i-4) << 2) & 0xF; + real_irq = ivec >> ((i-4) << 2) & 0xF; } else { /* Corrupted map */ printk("PCIC: BAD PIN %d\n", i); for (;;) {} } /* P3 */ /* printk("PCIC: device %s pin %d ivec 0x%x irq %x\n", namebuf, i, ivec, dev->irq); */ - /* - * dev->irq=0 means PROM did not bother to program the upper + /* real_irq means PROM did not bother to program the upper * half of PCIC. This happens on JS-E with PROM 3.11, for instance. */ - if (dev->irq == 0 || p->force) { + if (real_irq == 0 || p->force) { if (p->irq == 0 || p->irq >= 15) { /* Corrupted map */ printk("PCIC: BAD IRQ %d\n", p->irq); for (;;) {} } printk("PCIC: setting irq %d at pin %d for device %02x:%02x\n", p->irq, p->pin, dev->bus->number, dev->devfn); - dev->irq = p->irq; + real_irq = p->irq; i = p->pin; if (i >= 4) { @@ -584,7 +587,8 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) ivec |= p->irq << (i << 2); writew(ivec, pcic->pcic_regs+PCI_INT_SELECT_LO); } - } + } + dev->irq = pcic_build_device_irq(NULL, real_irq); } /* @@ -729,6 +733,7 @@ void __init pci_time_init(void) struct linux_pcic *pcic = &pcic0; unsigned long v; int timer_irq, irq; + int err; do_arch_gettimeoffset = pci_gettimeoffset; @@ -740,9 +745,10 @@ void __init pci_time_init(void) timer_irq = PCI_COUNTER_IRQ_SYS(v); writel (PCI_COUNTER_IRQ_SET(timer_irq, 0), pcic->pcic_regs+PCI_COUNTER_IRQ); - irq = request_irq(timer_irq, pcic_timer_handler, - (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL); - if (irq) { + irq = pcic_build_device_irq(NULL, timer_irq); + err = request_irq(irq, pcic_timer_handler, + IRQF_TIMER, "timer", NULL); + if (err) { prom_printf("time_init: unable to attach IRQ%d\n", timer_irq); prom_halt(); } @@ -803,50 +809,73 @@ static inline unsigned long get_irqmask(int irq_nr) return 1 << irq_nr; } -static void pcic_disable_irq(unsigned int irq_nr) +static void pcic_mask_irq(struct irq_data *data) { unsigned long mask, flags; - mask = get_irqmask(irq_nr); + mask = (unsigned long)data->chip_data; local_irq_save(flags); writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET); local_irq_restore(flags); } -static void pcic_enable_irq(unsigned int irq_nr) +static void pcic_unmask_irq(struct irq_data *data) { unsigned long mask, flags; - mask = get_irqmask(irq_nr); + mask = (unsigned long)data->chip_data; local_irq_save(flags); writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR); local_irq_restore(flags); } -static void pcic_load_profile_irq(int cpu, unsigned int limit) +static unsigned int pcic_startup_irq(struct irq_data *data) { - printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__); + irq_link(data->irq); + pcic_unmask_irq(data); + return 0; } -/* We assume the caller has disabled local interrupts when these are called, - * or else very bizarre behavior will result. - */ -static void pcic_disable_pil_irq(unsigned int pil) +static struct irq_chip pcic_irq = { + .name = "pcic", + .irq_startup = pcic_startup_irq, + .irq_mask = pcic_mask_irq, + .irq_unmask = pcic_unmask_irq, +}; + +unsigned int pcic_build_device_irq(struct platform_device *op, + unsigned int real_irq) { - writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET); + unsigned int irq; + unsigned long mask; + + irq = 0; + mask = get_irqmask(real_irq); + if (mask == 0) + goto out; + + irq = irq_alloc(real_irq, real_irq); + if (irq == 0) + goto out; + + irq_set_chip_and_handler_name(irq, &pcic_irq, + handle_level_irq, "PCIC"); + irq_set_chip_data(irq, (void *)mask); + +out: + return irq; } -static void pcic_enable_pil_irq(unsigned int pil) + +static void pcic_load_profile_irq(int cpu, unsigned int limit) { - writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR); + printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__); } void __init sun4m_pci_init_IRQ(void) { - BTFIXUPSET_CALL(enable_irq, pcic_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, pcic_disable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(enable_pil_irq, pcic_enable_pil_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_pil_irq, pcic_disable_pil_irq, BTFIXUPCALL_NORM); + sparc_irq_config.build_device_irq = pcic_build_device_irq; + BTFIXUPSET_CALL(clear_clock_irq, pcic_clear_clock_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, pcic_load_profile_irq, BTFIXUPCALL_NORM); } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index ee8426ede7c..2cb0e1c001e 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -26,6 +26,7 @@ #include <asm/nmi.h> #include <asm/pcr.h> +#include "kernel.h" #include "kstack.h" /* Sparc64 chips have two performance counters, 32-bits each, with diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 17529298c50..c8cc461ff75 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -128,8 +128,16 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); /* endless idle loop with no priority at all */ while(1) { - while (!need_resched()) - cpu_relax(); +#ifdef CONFIG_SPARC_LEON + if (pm_idle) { + while (!need_resched()) + (*pm_idle)(); + } else +#endif + { + while (!need_resched()) + cpu_relax(); + } preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c index 05fb2533058..5ce3d15a99b 100644 --- a/arch/sparc/kernel/prom_32.c +++ b/arch/sparc/kernel/prom_32.c @@ -326,7 +326,6 @@ void __init of_console_init(void) of_console_options = NULL; } - prom_printf(msg, of_console_path); printk(msg, of_console_path); } diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index 7b8b76c9557..3609bdee9ed 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -103,16 +103,20 @@ static unsigned int boot_flags __initdata = 0; /* Exported for mm/init.c:paging_init. */ unsigned long cmdline_memory_size __initdata = 0; +/* which CPU booted us (0xff = not set) */ +unsigned char boot_cpu_id = 0xff; /* 0xff will make it into DATA section... */ +unsigned char boot_cpu_id4; /* boot_cpu_id << 2 */ + static void prom_console_write(struct console *con, const char *s, unsigned n) { prom_write(s, n); } -static struct console prom_debug_console = { - .name = "debug", +static struct console prom_early_console = { + .name = "earlyprom", .write = prom_console_write, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_BOOT, .index = -1, }; @@ -133,8 +137,7 @@ static void __init process_switch(char c) prom_halt(); break; case 'p': - /* Use PROM debug console. */ - register_console(&prom_debug_console); + /* Just ignore, this behavior is now the default. */ break; default: printk("Unknown boot switch (-%c)\n", c); @@ -215,6 +218,10 @@ void __init setup_arch(char **cmdline_p) strcpy(boot_command_line, *cmdline_p); parse_early_param(); + boot_flags_init(*cmdline_p); + + register_console(&prom_early_console); + /* Set sparc_cpu_model */ sparc_cpu_model = sun_unknown; if (!strcmp(&cputypval[0], "sun4 ")) @@ -265,7 +272,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif - boot_flags_init(*cmdline_p); idprom_init(); if (ARCH_SUN4C) @@ -311,75 +317,6 @@ void __init setup_arch(char **cmdline_p) smp_setup_cpu_possible_map(); } -static int ncpus_probed; - -static int show_cpuinfo(struct seq_file *m, void *__unused) -{ - seq_printf(m, - "cpu\t\t: %s\n" - "fpu\t\t: %s\n" - "promlib\t\t: Version %d Revision %d\n" - "prom\t\t: %d.%d\n" - "type\t\t: %s\n" - "ncpus probed\t: %d\n" - "ncpus active\t: %d\n" -#ifndef CONFIG_SMP - "CPU0Bogo\t: %lu.%02lu\n" - "CPU0ClkTck\t: %ld\n" -#endif - , - sparc_cpu_type, - sparc_fpu_type , - romvec->pv_romvers, - prom_rev, - romvec->pv_printrev >> 16, - romvec->pv_printrev & 0xffff, - &cputypval[0], - ncpus_probed, - num_online_cpus() -#ifndef CONFIG_SMP - , cpu_data(0).udelay_val/(500000/HZ), - (cpu_data(0).udelay_val/(5000/HZ)) % 100, - cpu_data(0).clock_tick -#endif - ); - -#ifdef CONFIG_SMP - smp_bogo(m); -#endif - mmu_info(m); -#ifdef CONFIG_SMP - smp_info(m); -#endif - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - /* The pointer we are returning is arbitrary, - * it just has to be non-NULL and not IS_ERR - * in the success case. - */ - return *pos == 0 ? &c_start : NULL; -} - -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} - -static void c_stop(struct seq_file *m, void *v) -{ -} - -const struct seq_operations cpuinfo_op = { - .start =c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; - extern int stop_a_enabled; void sun_do_break(void) diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 29bafe051bb..f3b6850cc8d 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -339,84 +339,6 @@ void __init setup_arch(char **cmdline_p) paging_init(); } -/* BUFFER is PAGE_SIZE bytes long. */ - -extern void smp_info(struct seq_file *); -extern void smp_bogo(struct seq_file *); -extern void mmu_info(struct seq_file *); - -unsigned int dcache_parity_tl1_occurred; -unsigned int icache_parity_tl1_occurred; - -int ncpus_probed; - -static int show_cpuinfo(struct seq_file *m, void *__unused) -{ - seq_printf(m, - "cpu\t\t: %s\n" - "fpu\t\t: %s\n" - "pmu\t\t: %s\n" - "prom\t\t: %s\n" - "type\t\t: %s\n" - "ncpus probed\t: %d\n" - "ncpus active\t: %d\n" - "D$ parity tl1\t: %u\n" - "I$ parity tl1\t: %u\n" -#ifndef CONFIG_SMP - "Cpu0ClkTck\t: %016lx\n" -#endif - , - sparc_cpu_type, - sparc_fpu_type, - sparc_pmu_type, - prom_version, - ((tlb_type == hypervisor) ? - "sun4v" : - "sun4u"), - ncpus_probed, - num_online_cpus(), - dcache_parity_tl1_occurred, - icache_parity_tl1_occurred -#ifndef CONFIG_SMP - , cpu_data(0).clock_tick -#endif - ); -#ifdef CONFIG_SMP - smp_bogo(m); -#endif - mmu_info(m); -#ifdef CONFIG_SMP - smp_info(m); -#endif - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - /* The pointer we are returning is arbitrary, - * it just has to be non-NULL and not IS_ERR - * in the success case. - */ - return *pos == 0 ? &c_start : NULL; -} - -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} - -static void c_stop(struct seq_file *m, void *v) -{ -} - -const struct seq_operations cpuinfo_op = { - .start =c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; - extern int stop_a_enabled; void sun_do_break(void) diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 442286d8343..d5b3958be0b 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -37,8 +37,6 @@ #include "irq.h" volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,}; -unsigned char boot_cpu_id = 0; -unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */ cpumask_t smp_commenced_mask = CPU_MASK_NONE; @@ -130,14 +128,57 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 }; void smp_send_reschedule(int cpu) { /* - * XXX missing reschedule IPI, see scheduler_ipi() + * CPU model dependent way of implementing IPI generation targeting + * a single CPU. The trap handler needs only to do trap entry/return + * to call schedule. */ + BTFIXUP_CALL(smp_ipi_resched)(cpu); } void smp_send_stop(void) { } +void arch_send_call_function_single_ipi(int cpu) +{ + /* trigger one IPI single call on one CPU */ + BTFIXUP_CALL(smp_ipi_single)(cpu); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + int cpu; + + /* trigger IPI mask call on each CPU */ + for_each_cpu(cpu, mask) + BTFIXUP_CALL(smp_ipi_mask_one)(cpu); +} + +void smp_resched_interrupt(void) +{ + irq_enter(); + scheduler_ipi(); + local_cpu_data().irq_resched_count++; + irq_exit(); + /* re-schedule routine called by interrupt return code. */ +} + +void smp_call_function_single_interrupt(void) +{ + irq_enter(); + generic_smp_call_function_single_interrupt(); + local_cpu_data().irq_call_count++; + irq_exit(); +} + +void smp_call_function_interrupt(void) +{ + irq_enter(); + generic_smp_call_function_interrupt(); + local_cpu_data().irq_call_count++; + irq_exit(); +} + void smp_flush_cache_all(void) { xc0((smpfunc_t) BTFIXUP_CALL(local_flush_cache_all)); @@ -153,9 +194,10 @@ void smp_flush_tlb_all(void) void smp_flush_cache_mm(struct mm_struct *mm) { if(mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc1((smpfunc_t) BTFIXUP_CALL(local_flush_cache_mm), (unsigned long) mm); local_flush_cache_mm(mm); } @@ -164,9 +206,10 @@ void smp_flush_cache_mm(struct mm_struct *mm) void smp_flush_tlb_mm(struct mm_struct *mm) { if(mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) { + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) { xc1((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_mm), (unsigned long) mm); if(atomic_read(&mm->mm_users) == 1 && current->active_mm == mm) cpumask_copy(mm_cpumask(mm), @@ -182,9 +225,10 @@ void smp_flush_cache_range(struct vm_area_struct *vma, unsigned long start, struct mm_struct *mm = vma->vm_mm; if (mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc3((smpfunc_t) BTFIXUP_CALL(local_flush_cache_range), (unsigned long) vma, start, end); local_flush_cache_range(vma, start, end); } @@ -196,9 +240,10 @@ void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, struct mm_struct *mm = vma->vm_mm; if (mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc3((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_range), (unsigned long) vma, start, end); local_flush_tlb_range(vma, start, end); } @@ -209,9 +254,10 @@ void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page) struct mm_struct *mm = vma->vm_mm; if(mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc2((smpfunc_t) BTFIXUP_CALL(local_flush_cache_page), (unsigned long) vma, page); local_flush_cache_page(vma, page); } @@ -222,19 +268,15 @@ void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) struct mm_struct *mm = vma->vm_mm; if(mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc2((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_page), (unsigned long) vma, page); local_flush_tlb_page(vma, page); } } -void smp_reschedule_irq(void) -{ - set_need_resched(); -} - void smp_flush_page_to_ram(unsigned long page) { /* Current theory is that those who call this are the one's @@ -251,9 +293,10 @@ void smp_flush_page_to_ram(unsigned long page) void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc2((smpfunc_t) BTFIXUP_CALL(local_flush_sig_insns), (unsigned long) mm, insn_addr); local_flush_sig_insns(mm, insn_addr); } @@ -407,7 +450,7 @@ int __cpuinit __cpu_up(unsigned int cpu) }; if (!ret) { - cpu_set(cpu, smp_commenced_mask); + cpumask_set_cpu(cpu, &smp_commenced_mask); while (!cpu_online(cpu)) mb(); } diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 9478da7fdb3..99cb17251bb 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -121,11 +121,11 @@ void __cpuinit smp_callin(void) /* inform the notifiers about the new cpu */ notify_cpu_starting(cpuid); - while (!cpu_isset(cpuid, smp_commenced_mask)) + while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) rmb(); ipi_call_lock_irq(); - cpu_set(cpuid, cpu_online_map); + set_cpu_online(cpuid, true); ipi_call_unlock_irq(); /* idle thread is expected to have preempt disabled */ @@ -785,7 +785,7 @@ static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask /* Send cross call to all processors mentioned in MASK_P * except self. Really, there are only two cases currently, - * "&cpu_online_map" and "&mm->cpu_vm_mask". + * "cpu_online_mask" and "mm_cpumask(mm)". */ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask) { @@ -797,7 +797,7 @@ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 d /* Send cross call to all processors except self. */ static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2) { - smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map); + smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask); } extern unsigned long xcall_sync_tick; @@ -805,7 +805,7 @@ extern unsigned long xcall_sync_tick; static void smp_start_sync_tick_client(int cpu) { xcall_deliver((u64) &xcall_sync_tick, 0, 0, - &cpumask_of_cpu(cpu)); + cpumask_of(cpu)); } extern unsigned long xcall_call_function; @@ -820,7 +820,7 @@ extern unsigned long xcall_call_function_single; void arch_send_call_function_single_ipi(int cpu) { xcall_deliver((u64) &xcall_call_function_single, 0, 0, - &cpumask_of_cpu(cpu)); + cpumask_of(cpu)); } void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs) @@ -918,7 +918,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) } if (data0) { xcall_deliver(data0, __pa(pg_addr), - (u64) pg_addr, &cpumask_of_cpu(cpu)); + (u64) pg_addr, cpumask_of(cpu)); #ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes_xcall); #endif @@ -954,7 +954,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) } if (data0) { xcall_deliver(data0, __pa(pg_addr), - (u64) pg_addr, &cpu_online_map); + (u64) pg_addr, cpu_online_mask); #ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes_xcall); #endif @@ -1197,32 +1197,32 @@ void __devinit smp_fill_in_sib_core_maps(void) for_each_present_cpu(i) { unsigned int j; - cpus_clear(cpu_core_map[i]); + cpumask_clear(&cpu_core_map[i]); if (cpu_data(i).core_id == 0) { - cpu_set(i, cpu_core_map[i]); + cpumask_set_cpu(i, &cpu_core_map[i]); continue; } for_each_present_cpu(j) { if (cpu_data(i).core_id == cpu_data(j).core_id) - cpu_set(j, cpu_core_map[i]); + cpumask_set_cpu(j, &cpu_core_map[i]); } } for_each_present_cpu(i) { unsigned int j; - cpus_clear(per_cpu(cpu_sibling_map, i)); + cpumask_clear(&per_cpu(cpu_sibling_map, i)); if (cpu_data(i).proc_id == -1) { - cpu_set(i, per_cpu(cpu_sibling_map, i)); + cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i)); continue; } for_each_present_cpu(j) { if (cpu_data(i).proc_id == cpu_data(j).proc_id) - cpu_set(j, per_cpu(cpu_sibling_map, i)); + cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, i)); } } } @@ -1232,10 +1232,10 @@ int __cpuinit __cpu_up(unsigned int cpu) int ret = smp_boot_one_cpu(cpu); if (!ret) { - cpu_set(cpu, smp_commenced_mask); - while (!cpu_isset(cpu, cpu_online_map)) + cpumask_set_cpu(cpu, &smp_commenced_mask); + while (!cpu_online(cpu)) mb(); - if (!cpu_isset(cpu, cpu_online_map)) { + if (!cpu_online(cpu)) { ret = -ENODEV; } else { /* On SUN4V, writes to %tick and %stick are @@ -1269,7 +1269,7 @@ void cpu_play_dead(void) tb->nonresum_mondo_pa, 0); } - cpu_clear(cpu, smp_commenced_mask); + cpumask_clear_cpu(cpu, &smp_commenced_mask); membar_safe("#Sync"); local_irq_disable(); @@ -1290,13 +1290,13 @@ int __cpu_disable(void) cpuinfo_sparc *c; int i; - for_each_cpu_mask(i, cpu_core_map[cpu]) - cpu_clear(cpu, cpu_core_map[i]); - cpus_clear(cpu_core_map[cpu]); + for_each_cpu(i, &cpu_core_map[cpu]) + cpumask_clear_cpu(cpu, &cpu_core_map[i]); + cpumask_clear(&cpu_core_map[cpu]); - for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu)) - cpu_clear(cpu, per_cpu(cpu_sibling_map, i)); - cpus_clear(per_cpu(cpu_sibling_map, cpu)); + for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu)) + cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i)); + cpumask_clear(&per_cpu(cpu_sibling_map, cpu)); c = &cpu_data(cpu); @@ -1313,7 +1313,7 @@ int __cpu_disable(void) local_irq_disable(); ipi_call_lock(); - cpu_clear(cpu, cpu_online_map); + set_cpu_online(cpu, false); ipi_call_unlock(); cpu_map_rebuild(); @@ -1327,11 +1327,11 @@ void __cpu_die(unsigned int cpu) for (i = 0; i < 100; i++) { smp_rmb(); - if (!cpu_isset(cpu, smp_commenced_mask)) + if (!cpumask_test_cpu(cpu, &smp_commenced_mask)) break; msleep(100); } - if (cpu_isset(cpu, smp_commenced_mask)) { + if (cpumask_test_cpu(cpu, &smp_commenced_mask)) { printk(KERN_ERR "CPU %u didn't die...\n", cpu); } else { #if defined(CONFIG_SUN_LDOMS) @@ -1341,7 +1341,7 @@ void __cpu_die(unsigned int cpu) do { hv_err = sun4v_cpu_stop(cpu); if (hv_err == HV_EOK) { - cpu_clear(cpu, cpu_present_map); + set_cpu_present(cpu, false); break; } } while (--limit > 0); @@ -1362,7 +1362,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void smp_send_reschedule(int cpu) { xcall_deliver((u64) &xcall_receive_signal, 0, 0, - &cpumask_of_cpu(cpu)); + cpumask_of(cpu)); } void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) diff --git a/arch/sparc/kernel/sun4c_irq.c b/arch/sparc/kernel/sun4c_irq.c index 90eea38ad66..f6bf25a2ff8 100644 --- a/arch/sparc/kernel/sun4c_irq.c +++ b/arch/sparc/kernel/sun4c_irq.c @@ -65,62 +65,94 @@ */ unsigned char __iomem *interrupt_enable; -static void sun4c_disable_irq(unsigned int irq_nr) +static void sun4c_mask_irq(struct irq_data *data) { - unsigned long flags; - unsigned char current_mask, new_mask; - - local_irq_save(flags); - irq_nr &= (NR_IRQS - 1); - current_mask = sbus_readb(interrupt_enable); - switch (irq_nr) { - case 1: - new_mask = ((current_mask) & (~(SUN4C_INT_E1))); - break; - case 8: - new_mask = ((current_mask) & (~(SUN4C_INT_E8))); - break; - case 10: - new_mask = ((current_mask) & (~(SUN4C_INT_E10))); - break; - case 14: - new_mask = ((current_mask) & (~(SUN4C_INT_E14))); - break; - default: + unsigned long mask = (unsigned long)data->chip_data; + + if (mask) { + unsigned long flags; + + local_irq_save(flags); + mask = sbus_readb(interrupt_enable) & ~mask; + sbus_writeb(mask, interrupt_enable); local_irq_restore(flags); - return; } - sbus_writeb(new_mask, interrupt_enable); - local_irq_restore(flags); } -static void sun4c_enable_irq(unsigned int irq_nr) +static void sun4c_unmask_irq(struct irq_data *data) { - unsigned long flags; - unsigned char current_mask, new_mask; - - local_irq_save(flags); - irq_nr &= (NR_IRQS - 1); - current_mask = sbus_readb(interrupt_enable); - switch (irq_nr) { - case 1: - new_mask = ((current_mask) | SUN4C_INT_E1); - break; - case 8: - new_mask = ((current_mask) | SUN4C_INT_E8); - break; - case 10: - new_mask = ((current_mask) | SUN4C_INT_E10); - break; - case 14: - new_mask = ((current_mask) | SUN4C_INT_E14); - break; - default: + unsigned long mask = (unsigned long)data->chip_data; + + if (mask) { + unsigned long flags; + + local_irq_save(flags); + mask = sbus_readb(interrupt_enable) | mask; + sbus_writeb(mask, interrupt_enable); local_irq_restore(flags); - return; } - sbus_writeb(new_mask, interrupt_enable); - local_irq_restore(flags); +} + +static unsigned int sun4c_startup_irq(struct irq_data *data) +{ + irq_link(data->irq); + sun4c_unmask_irq(data); + + return 0; +} + +static void sun4c_shutdown_irq(struct irq_data *data) +{ + sun4c_mask_irq(data); + irq_unlink(data->irq); +} + +static struct irq_chip sun4c_irq = { + .name = "sun4c", + .irq_startup = sun4c_startup_irq, + .irq_shutdown = sun4c_shutdown_irq, + .irq_mask = sun4c_mask_irq, + .irq_unmask = sun4c_unmask_irq, +}; + +static unsigned int sun4c_build_device_irq(struct platform_device *op, + unsigned int real_irq) +{ + unsigned int irq; + + if (real_irq >= 16) { + prom_printf("Bogus sun4c IRQ %u\n", real_irq); + prom_halt(); + } + + irq = irq_alloc(real_irq, real_irq); + if (irq) { + unsigned long mask = 0UL; + + switch (real_irq) { + case 1: + mask = SUN4C_INT_E1; + break; + case 8: + mask = SUN4C_INT_E8; + break; + case 10: + mask = SUN4C_INT_E10; + break; + case 14: + mask = SUN4C_INT_E14; + break; + default: + /* All the rest are either always enabled, + * or are for signalling software interrupts. + */ + break; + } + irq_set_chip_and_handler_name(irq, &sun4c_irq, + handle_level_irq, "level"); + irq_set_chip_data(irq, (void *)mask); + } + return irq; } struct sun4c_timer_info { @@ -144,8 +176,9 @@ static void sun4c_load_profile_irq(int cpu, unsigned int limit) static void __init sun4c_init_timers(irq_handler_t counter_fn) { - const struct linux_prom_irqs *irq; + const struct linux_prom_irqs *prom_irqs; struct device_node *dp; + unsigned int irq; const u32 *addr; int err; @@ -163,9 +196,9 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn) sun4c_timers = (void __iomem *) (unsigned long) addr[0]; - irq = of_get_property(dp, "intr", NULL); + prom_irqs = of_get_property(dp, "intr", NULL); of_node_put(dp); - if (!irq) { + if (!prom_irqs) { prom_printf("sun4c_init_timers: No intr property\n"); prom_halt(); } @@ -178,15 +211,15 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn) master_l10_counter = &sun4c_timers->l10_count; - err = request_irq(irq[0].pri, counter_fn, - (IRQF_DISABLED | SA_STATIC_ALLOC), - "timer", NULL); + irq = sun4c_build_device_irq(NULL, prom_irqs[0].pri); + err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL); if (err) { prom_printf("sun4c_init_timers: request_irq() fails with %d\n", err); prom_halt(); } - sun4c_disable_irq(irq[1].pri); + /* disable timer interrupt */ + sun4c_mask_irq(irq_get_irq_data(irq)); } #ifdef CONFIG_SMP @@ -215,14 +248,11 @@ void __init sun4c_init_IRQ(void) interrupt_enable = (void __iomem *) (unsigned long) addr[0]; - BTFIXUPSET_CALL(enable_irq, sun4c_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, sun4c_disable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(enable_pil_irq, sun4c_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_pil_irq, sun4c_disable_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(clear_clock_irq, sun4c_clear_clock_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, sun4c_load_profile_irq, BTFIXUPCALL_NOP); - sparc_irq_config.init_timers = sun4c_init_timers; + sparc_irq_config.init_timers = sun4c_init_timers; + sparc_irq_config.build_device_irq = sun4c_build_device_irq; #ifdef CONFIG_SMP BTFIXUPSET_CALL(set_cpu_int, sun4c_nop, BTFIXUPCALL_NOP); diff --git a/arch/sparc/kernel/sun4d_irq.c b/arch/sparc/kernel/sun4d_irq.c index 77b4a899271..a9ea60eb2c1 100644 --- a/arch/sparc/kernel/sun4d_irq.c +++ b/arch/sparc/kernel/sun4d_irq.c @@ -14,6 +14,7 @@ #include <asm/io.h> #include <asm/sbi.h> #include <asm/cacheflush.h> +#include <asm/setup.h> #include "kernel.h" #include "irq.h" @@ -22,22 +23,20 @@ * cpu local. CPU local interrupts cover the timer interrupts * and whatnot, and we encode those as normal PILs between * 0 and 15. - * - * SBUS interrupts are encoded integers including the board number - * (plus one), the SBUS level, and the SBUS slot number. Sun4D - * IRQ dispatch is done by: - * - * 1) Reading the BW local interrupt table in order to get the bus - * interrupt mask. - * - * This table is indexed by SBUS interrupt level which can be - * derived from the PIL we got interrupted on. - * - * 2) For each bus showing interrupt pending from #1, read the - * SBI interrupt state register. This will indicate which slots - * have interrupts pending for that SBUS interrupt level. + * SBUS interrupts are encodes as a combination of board, level and slot. */ +struct sun4d_handler_data { + unsigned int cpuid; /* target cpu */ + unsigned int real_irq; /* interrupt level */ +}; + + +static unsigned int sun4d_encode_irq(int board, int lvl, int slot) +{ + return (board + 1) << 5 | (lvl << 2) | slot; +} + struct sun4d_timer_regs { u32 l10_timer_limit; u32 l10_cur_countx; @@ -48,17 +47,12 @@ struct sun4d_timer_regs { static struct sun4d_timer_regs __iomem *sun4d_timers; -#define TIMER_IRQ 10 - -#define MAX_STATIC_ALLOC 4 -static unsigned char sbus_tid[32]; - -static struct irqaction *irq_action[NR_IRQS]; +#define SUN4D_TIMER_IRQ 10 -static struct sbus_action { - struct irqaction *action; - /* For SMP this needs to be extended */ -} *sbus_actions; +/* Specify which cpu handle interrupts from which board. + * Index is board - value is cpu. + */ +static unsigned char board_to_cpu[32]; static int pil_to_sbus[] = { 0, @@ -79,152 +73,81 @@ static int pil_to_sbus[] = { 0, }; -static int sbus_to_pil[] = { - 0, - 2, - 3, - 5, - 7, - 9, - 11, - 13, -}; - -static int nsbi; - /* Exported for sun4d_smp.c */ DEFINE_SPINLOCK(sun4d_imsk_lock); -int show_sun4d_interrupts(struct seq_file *p, void *v) +/* SBUS interrupts are encoded integers including the board number + * (plus one), the SBUS level, and the SBUS slot number. Sun4D + * IRQ dispatch is done by: + * + * 1) Reading the BW local interrupt table in order to get the bus + * interrupt mask. + * + * This table is indexed by SBUS interrupt level which can be + * derived from the PIL we got interrupted on. + * + * 2) For each bus showing interrupt pending from #1, read the + * SBI interrupt state register. This will indicate which slots + * have interrupts pending for that SBUS interrupt level. + * + * 3) Call the genreric IRQ support. + */ +static void sun4d_sbus_handler_irq(int sbusl) { - int i = *(loff_t *) v, j = 0, k = 0, sbusl; - struct irqaction *action; - unsigned long flags; -#ifdef CONFIG_SMP - int x; -#endif - - spin_lock_irqsave(&irq_action_lock, flags); - if (i < NR_IRQS) { - sbusl = pil_to_sbus[i]; - if (!sbusl) { - action = *(i + irq_action); - if (!action) - goto out_unlock; - } else { - for (j = 0; j < nsbi; j++) { - for (k = 0; k < 4; k++) - action = sbus_actions[(j << 5) + (sbusl << 2) + k].action; - if (action) - goto found_it; - } - goto out_unlock; - } -found_it: seq_printf(p, "%3d: ", i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(x) - seq_printf(p, "%10u ", - kstat_cpu(cpu_logical_map(x)).irqs[i]); -#endif - seq_printf(p, "%c %s", - (action->flags & IRQF_DISABLED) ? '+' : ' ', - action->name); - action = action->next; - for (;;) { - for (; action; action = action->next) { - seq_printf(p, ",%s %s", - (action->flags & IRQF_DISABLED) ? " +" : "", - action->name); - } - if (!sbusl) - break; - k++; - if (k < 4) { - action = sbus_actions[(j << 5) + (sbusl << 2) + k].action; - } else { - j++; - if (j == nsbi) - break; - k = 0; - action = sbus_actions[(j << 5) + (sbusl << 2)].action; + unsigned int bus_mask; + unsigned int sbino, slot; + unsigned int sbil; + + bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff; + bw_clear_intr_mask(sbusl, bus_mask); + + sbil = (sbusl << 2); + /* Loop for each pending SBI */ + for (sbino = 0; bus_mask; sbino++) { + unsigned int idx, mask; + + bus_mask >>= 1; + if (!(bus_mask & 1)) + continue; + /* XXX This seems to ACK the irq twice. acquire_sbi() + * XXX uses swap, therefore this writes 0xf << sbil, + * XXX then later release_sbi() will write the individual + * XXX bits which were set again. + */ + mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil); + mask &= (0xf << sbil); + + /* Loop for each pending SBI slot */ + idx = 0; + slot = (1 << sbil); + while (mask != 0) { + unsigned int pil; + struct irq_bucket *p; + + idx++; + slot <<= 1; + if (!(mask & slot)) + continue; + + mask &= ~slot; + pil = sun4d_encode_irq(sbino, sbil, idx); + + p = irq_map[pil]; + while (p) { + struct irq_bucket *next; + + next = p->next; + generic_handle_irq(p->irq); + p = next; } + release_sbi(SBI2DEVID(sbino), slot); } - seq_putc(p, '\n'); } -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); - return 0; -} - -void sun4d_free_irq(unsigned int irq, void *dev_id) -{ - struct irqaction *action, **actionp; - struct irqaction *tmp = NULL; - unsigned long flags; - - spin_lock_irqsave(&irq_action_lock, flags); - if (irq < 15) - actionp = irq + irq_action; - else - actionp = &(sbus_actions[irq - (1 << 5)].action); - action = *actionp; - if (!action) { - printk(KERN_ERR "Trying to free free IRQ%d\n", irq); - goto out_unlock; - } - if (dev_id) { - for (; action; action = action->next) { - if (action->dev_id == dev_id) - break; - tmp = action; - } - if (!action) { - printk(KERN_ERR "Trying to free free shared IRQ%d\n", - irq); - goto out_unlock; - } - } else if (action->flags & IRQF_SHARED) { - printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n", - irq); - goto out_unlock; - } - if (action->flags & SA_STATIC_ALLOC) { - /* - * This interrupt is marked as specially allocated - * so it is a bad idea to free it. - */ - printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n", - irq, action->name); - goto out_unlock; - } - - if (tmp) - tmp->next = action->next; - else - *actionp = action->next; - - spin_unlock_irqrestore(&irq_action_lock, flags); - - synchronize_irq(irq); - - spin_lock_irqsave(&irq_action_lock, flags); - - kfree(action); - - if (!(*actionp)) - __disable_irq(irq); - -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); } void sun4d_handler_irq(int pil, struct pt_regs *regs) { struct pt_regs *old_regs; - struct irqaction *action; - int cpu = smp_processor_id(); /* SBUS IRQ level (1 - 7) */ int sbusl = pil_to_sbus[pil]; @@ -233,160 +156,96 @@ void sun4d_handler_irq(int pil, struct pt_regs *regs) cc_set_iclr(1 << pil); +#ifdef CONFIG_SMP + /* + * Check IPI data structures after IRQ has been cleared. Hard and Soft + * IRQ can happen at the same time, so both cases are always handled. + */ + if (pil == SUN4D_IPI_IRQ) + sun4d_ipi_interrupt(); +#endif + old_regs = set_irq_regs(regs); irq_enter(); - kstat_cpu(cpu).irqs[pil]++; - if (!sbusl) { - action = *(pil + irq_action); - if (!action) - unexpected_irq(pil, NULL, regs); - do { - action->handler(pil, action->dev_id); - action = action->next; - } while (action); + if (sbusl == 0) { + /* cpu interrupt */ + struct irq_bucket *p; + + p = irq_map[pil]; + while (p) { + struct irq_bucket *next; + + next = p->next; + generic_handle_irq(p->irq); + p = next; + } } else { - int bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff; - int sbino; - struct sbus_action *actionp; - unsigned mask, slot; - int sbil = (sbusl << 2); - - bw_clear_intr_mask(sbusl, bus_mask); - - /* Loop for each pending SBI */ - for (sbino = 0; bus_mask; sbino++, bus_mask >>= 1) - if (bus_mask & 1) { - mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil); - mask &= (0xf << sbil); - actionp = sbus_actions + (sbino << 5) + (sbil); - /* Loop for each pending SBI slot */ - for (slot = (1 << sbil); mask; slot <<= 1, actionp++) - if (mask & slot) { - mask &= ~slot; - action = actionp->action; - - if (!action) - unexpected_irq(pil, NULL, regs); - do { - action->handler(pil, action->dev_id); - action = action->next; - } while (action); - release_sbi(SBI2DEVID(sbino), slot); - } - } + /* SBUS interrupt */ + sun4d_sbus_handler_irq(sbusl); } irq_exit(); set_irq_regs(old_regs); } -int sun4d_request_irq(unsigned int irq, - irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) + +static void sun4d_mask_irq(struct irq_data *data) { - struct irqaction *action, *tmp = NULL, **actionp; + struct sun4d_handler_data *handler_data = data->handler_data; + unsigned int real_irq; +#ifdef CONFIG_SMP + int cpuid = handler_data->cpuid; unsigned long flags; - int ret; - - if (irq > 14 && irq < (1 << 5)) { - ret = -EINVAL; - goto out; - } - - if (!handler) { - ret = -EINVAL; - goto out; - } - - spin_lock_irqsave(&irq_action_lock, flags); - - if (irq >= (1 << 5)) - actionp = &(sbus_actions[irq - (1 << 5)].action); - else - actionp = irq + irq_action; - action = *actionp; - - if (action) { - if ((action->flags & IRQF_SHARED) && (irqflags & IRQF_SHARED)) { - for (tmp = action; tmp->next; tmp = tmp->next) - /* find last entry - tmp used below */; - } else { - ret = -EBUSY; - goto out_unlock; - } - if ((action->flags & IRQF_DISABLED) ^ (irqflags & IRQF_DISABLED)) { - printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n", - irq); - ret = -EBUSY; - goto out_unlock; - } - action = NULL; /* Or else! */ - } - - /* If this is flagged as statically allocated then we use our - * private struct which is never freed. - */ - if (irqflags & SA_STATIC_ALLOC) { - if (static_irq_count < MAX_STATIC_ALLOC) - action = &static_irqaction[static_irq_count++]; - else - printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n", - irq, devname); - } - - if (action == NULL) - action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC); - - if (!action) { - ret = -ENOMEM; - goto out_unlock; - } - - action->handler = handler; - action->flags = irqflags; - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - if (tmp) - tmp->next = action; - else - *actionp = action; - - __enable_irq(irq); - - ret = 0; -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); -out: - return ret; +#endif + real_irq = handler_data->real_irq; +#ifdef CONFIG_SMP + spin_lock_irqsave(&sun4d_imsk_lock, flags); + cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | (1 << real_irq)); + spin_unlock_irqrestore(&sun4d_imsk_lock, flags); +#else + cc_set_imsk(cc_get_imsk() | (1 << real_irq)); +#endif } -static void sun4d_disable_irq(unsigned int irq) +static void sun4d_unmask_irq(struct irq_data *data) { - int tid = sbus_tid[(irq >> 5) - 1]; + struct sun4d_handler_data *handler_data = data->handler_data; + unsigned int real_irq; +#ifdef CONFIG_SMP + int cpuid = handler_data->cpuid; unsigned long flags; +#endif + real_irq = handler_data->real_irq; - if (irq < NR_IRQS) - return; - +#ifdef CONFIG_SMP spin_lock_irqsave(&sun4d_imsk_lock, flags); - cc_set_imsk_other(tid, cc_get_imsk_other(tid) | (1 << sbus_to_pil[(irq >> 2) & 7])); + cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | ~(1 << real_irq)); spin_unlock_irqrestore(&sun4d_imsk_lock, flags); +#else + cc_set_imsk(cc_get_imsk() | ~(1 << real_irq)); +#endif } -static void sun4d_enable_irq(unsigned int irq) +static unsigned int sun4d_startup_irq(struct irq_data *data) { - int tid = sbus_tid[(irq >> 5) - 1]; - unsigned long flags; - - if (irq < NR_IRQS) - return; + irq_link(data->irq); + sun4d_unmask_irq(data); + return 0; +} - spin_lock_irqsave(&sun4d_imsk_lock, flags); - cc_set_imsk_other(tid, cc_get_imsk_other(tid) & ~(1 << sbus_to_pil[(irq >> 2) & 7])); - spin_unlock_irqrestore(&sun4d_imsk_lock, flags); +static void sun4d_shutdown_irq(struct irq_data *data) +{ + sun4d_mask_irq(data); + irq_unlink(data->irq); } +struct irq_chip sun4d_irq = { + .name = "sun4d", + .irq_startup = sun4d_startup_irq, + .irq_shutdown = sun4d_shutdown_irq, + .irq_unmask = sun4d_unmask_irq, + .irq_mask = sun4d_mask_irq, +}; + #ifdef CONFIG_SMP static void sun4d_set_cpu_int(int cpu, int level) { @@ -413,7 +272,7 @@ void __init sun4d_distribute_irqs(void) for_each_node_by_name(dp, "sbi") { int devid = of_getintprop_default(dp, "device-id", 0); int board = of_getintprop_default(dp, "board#", 0); - sbus_tid[board] = cpuid; + board_to_cpu[board] = cpuid; set_sbi_tid(devid, cpuid << 3); } printk(KERN_ERR "All sbus IRQs directed to CPU%d\n", cpuid); @@ -443,15 +302,16 @@ static void __init sun4d_load_profile_irqs(void) unsigned int sun4d_build_device_irq(struct platform_device *op, unsigned int real_irq) { - static int pil_to_sbus[] = { - 0, 0, 1, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 0, - }; struct device_node *dp = op->dev.of_node; struct device_node *io_unit, *sbi = dp->parent; const struct linux_prom_registers *regs; + struct sun4d_handler_data *handler_data; + unsigned int pil; + unsigned int irq; int board, slot; int sbusl; + irq = 0; while (sbi) { if (!strcmp(sbi->name, "sbi")) break; @@ -484,7 +344,28 @@ unsigned int sun4d_build_device_irq(struct platform_device *op, sbusl = pil_to_sbus[real_irq]; if (sbusl) - return (((board + 1) << 5) + (sbusl << 2) + slot); + pil = sun4d_encode_irq(board, sbusl, slot); + else + pil = real_irq; + + irq = irq_alloc(real_irq, pil); + if (irq == 0) + goto err_out; + + handler_data = irq_get_handler_data(irq); + if (unlikely(handler_data)) + goto err_out; + + handler_data = kzalloc(sizeof(struct sun4d_handler_data), GFP_ATOMIC); + if (unlikely(!handler_data)) { + prom_printf("IRQ: kzalloc(sun4d_handler_data) failed.\n"); + prom_halt(); + } + handler_data->cpuid = board_to_cpu[board]; + handler_data->real_irq = real_irq; + irq_set_chip_and_handler_name(irq, &sun4d_irq, + handle_level_irq, "level"); + irq_set_handler_data(irq, handler_data); err_out: return real_irq; @@ -518,6 +399,7 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn) { struct device_node *dp; struct resource res; + unsigned int irq; const u32 *reg; int err; @@ -552,9 +434,8 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn) master_l10_counter = &sun4d_timers->l10_cur_count; - err = request_irq(TIMER_IRQ, counter_fn, - (IRQF_DISABLED | SA_STATIC_ALLOC), - "timer", NULL); + irq = sun4d_build_device_irq(NULL, SUN4D_TIMER_IRQ); + err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL); if (err) { prom_printf("sun4d_init_timers: request_irq() failed with %d\n", err); @@ -567,27 +448,16 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn) void __init sun4d_init_sbi_irq(void) { struct device_node *dp; - int target_cpu = 0; + int target_cpu; -#ifdef CONFIG_SMP target_cpu = boot_cpu_id; -#endif - - nsbi = 0; - for_each_node_by_name(dp, "sbi") - nsbi++; - sbus_actions = kzalloc(nsbi * 8 * 4 * sizeof(struct sbus_action), GFP_ATOMIC); - if (!sbus_actions) { - prom_printf("SUN4D: Cannot allocate sbus_actions, halting.\n"); - prom_halt(); - } for_each_node_by_name(dp, "sbi") { int devid = of_getintprop_default(dp, "device-id", 0); int board = of_getintprop_default(dp, "board#", 0); unsigned int mask; set_sbi_tid(devid, target_cpu << 3); - sbus_tid[board] = target_cpu; + board_to_cpu[board] = target_cpu; /* Get rid of pending irqs from PROM */ mask = acquire_sbi(devid, 0xffffffff); @@ -603,12 +473,10 @@ void __init sun4d_init_IRQ(void) { local_irq_disable(); - BTFIXUPSET_CALL(enable_irq, sun4d_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, sun4d_disable_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(clear_clock_irq, sun4d_clear_clock_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, sun4d_load_profile_irq, BTFIXUPCALL_NORM); - sparc_irq_config.init_timers = sun4d_init_timers; + sparc_irq_config.init_timers = sun4d_init_timers; sparc_irq_config.build_device_irq = sun4d_build_device_irq; #ifdef CONFIG_SMP diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index 475d50b96cd..133387980b5 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -32,6 +32,7 @@ static inline unsigned long sun4d_swap(volatile unsigned long *ptr, unsigned lon return val; } +static void smp4d_ipi_init(void); static void smp_setup_percpu_timer(void); static unsigned char cpu_leds[32]; @@ -80,8 +81,6 @@ void __cpuinit smp4d_callin(void) local_flush_cache_all(); local_flush_tlb_all(); - cpu_probe(); - while ((unsigned long)current_set[cpuid] < PAGE_OFFSET) barrier(); @@ -105,7 +104,7 @@ void __cpuinit smp4d_callin(void) local_irq_enable(); /* We don't allow PIL 14 yet */ - while (!cpu_isset(cpuid, smp_commenced_mask)) + while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) barrier(); spin_lock_irqsave(&sun4d_imsk_lock, flags); @@ -120,6 +119,7 @@ void __cpuinit smp4d_callin(void) */ void __init smp4d_boot_cpus(void) { + smp4d_ipi_init(); if (boot_cpu_id) current_set[0] = NULL; smp_setup_percpu_timer(); @@ -191,6 +191,80 @@ void __init smp4d_smp_done(void) sun4d_distribute_irqs(); } +/* Memory structure giving interrupt handler information about IPI generated */ +struct sun4d_ipi_work { + int single; + int msk; + int resched; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct sun4d_ipi_work, sun4d_ipi_work); + +/* Initialize IPIs on the SUN4D SMP machine */ +static void __init smp4d_ipi_init(void) +{ + int cpu; + struct sun4d_ipi_work *work; + + printk(KERN_INFO "smp4d: setup IPI at IRQ %d\n", SUN4D_IPI_IRQ); + + for_each_possible_cpu(cpu) { + work = &per_cpu(sun4d_ipi_work, cpu); + work->single = work->msk = work->resched = 0; + } +} + +void sun4d_ipi_interrupt(void) +{ + struct sun4d_ipi_work *work = &__get_cpu_var(sun4d_ipi_work); + + if (work->single) { + work->single = 0; + smp_call_function_single_interrupt(); + } + if (work->msk) { + work->msk = 0; + smp_call_function_interrupt(); + } + if (work->resched) { + work->resched = 0; + smp_resched_interrupt(); + } +} + +static void smp4d_ipi_single(int cpu) +{ + struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu); + + /* Mark work */ + work->single = 1; + + /* Generate IRQ on the CPU */ + sun4d_send_ipi(cpu, SUN4D_IPI_IRQ); +} + +static void smp4d_ipi_mask_one(int cpu) +{ + struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu); + + /* Mark work */ + work->msk = 1; + + /* Generate IRQ on the CPU */ + sun4d_send_ipi(cpu, SUN4D_IPI_IRQ); +} + +static void smp4d_ipi_resched(int cpu) +{ + struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu); + + /* Mark work */ + work->resched = 1; + + /* Generate IRQ on the CPU (any IRQ will cause resched) */ + sun4d_send_ipi(cpu, SUN4D_IPI_IRQ); +} + static struct smp_funcall { smpfunc_t func; unsigned long arg1; @@ -239,10 +313,10 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, { register int i; - cpu_clear(smp_processor_id(), mask); - cpus_and(mask, cpu_online_map, mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + cpumask_and(&mask, cpu_online_mask, &mask); for (i = 0; i <= high; i++) { - if (cpu_isset(i, mask)) { + if (cpumask_test_cpu(i, &mask)) { ccall_info.processors_in[i] = 0; ccall_info.processors_out[i] = 0; sun4d_send_ipi(i, IRQ_CROSS_CALL); @@ -255,7 +329,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_in[i]) barrier(); @@ -263,7 +337,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_out[i]) barrier(); @@ -356,6 +430,9 @@ void __init sun4d_init_smp(void) BTFIXUPSET_BLACKBOX(load_current, smp4d_blackbox_current); BTFIXUPSET_CALL(smp_cross_call, smp4d_cross_call, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4d_processor_id, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_resched, smp4d_ipi_resched, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_single, smp4d_ipi_single, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_mask_one, smp4d_ipi_mask_one, BTFIXUPCALL_NORM); for (i = 0; i < NR_CPUS; i++) { ccall_info.processors_in[i] = 1; diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c index 69df6257a32..422c16dad1f 100644 --- a/arch/sparc/kernel/sun4m_irq.c +++ b/arch/sparc/kernel/sun4m_irq.c @@ -100,6 +100,11 @@ struct sun4m_irq_percpu __iomem *sun4m_irq_percpu[SUN4M_NCPUS]; struct sun4m_irq_global __iomem *sun4m_irq_global; +struct sun4m_handler_data { + bool percpu; + long mask; +}; + /* Dave Redman (djhr@tadpole.co.uk) * The sun4m interrupt registers. */ @@ -142,9 +147,9 @@ struct sun4m_irq_global __iomem *sun4m_irq_global; #define OBP_INT_LEVEL_VME 0x40 #define SUN4M_TIMER_IRQ (OBP_INT_LEVEL_ONBOARD | 10) -#define SUM4M_PROFILE_IRQ (OBP_INT_LEVEL_ONBOARD | 14) +#define SUN4M_PROFILE_IRQ (OBP_INT_LEVEL_ONBOARD | 14) -static unsigned long irq_mask[0x50] = { +static unsigned long sun4m_imask[0x50] = { /* 0x00 - SMP */ 0, SUN4M_SOFT_INT(1), SUN4M_SOFT_INT(2), SUN4M_SOFT_INT(3), @@ -169,7 +174,7 @@ static unsigned long irq_mask[0x50] = { SUN4M_INT_VIDEO, SUN4M_INT_MODULE, SUN4M_INT_REALTIME, SUN4M_INT_FLOPPY, (SUN4M_INT_SERIAL | SUN4M_INT_KBDMS), - SUN4M_INT_AUDIO, 0, SUN4M_INT_MODULE_ERR, + SUN4M_INT_AUDIO, SUN4M_INT_E14, SUN4M_INT_MODULE_ERR, /* 0x30 - sbus */ 0, 0, SUN4M_INT_SBUS(0), SUN4M_INT_SBUS(1), 0, SUN4M_INT_SBUS(2), 0, SUN4M_INT_SBUS(3), @@ -182,105 +187,110 @@ static unsigned long irq_mask[0x50] = { 0, SUN4M_INT_VME(6), 0, 0 }; -static unsigned long sun4m_get_irqmask(unsigned int irq) +static void sun4m_mask_irq(struct irq_data *data) { - unsigned long mask; - - if (irq < 0x50) - mask = irq_mask[irq]; - else - mask = 0; + struct sun4m_handler_data *handler_data = data->handler_data; + int cpu = smp_processor_id(); - if (!mask) - printk(KERN_ERR "sun4m_get_irqmask: IRQ%d has no valid mask!\n", - irq); + if (handler_data->mask) { + unsigned long flags; - return mask; + local_irq_save(flags); + if (handler_data->percpu) { + sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->set); + } else { + sbus_writel(handler_data->mask, &sun4m_irq_global->mask_set); + } + local_irq_restore(flags); + } } -static void sun4m_disable_irq(unsigned int irq_nr) +static void sun4m_unmask_irq(struct irq_data *data) { - unsigned long mask, flags; + struct sun4m_handler_data *handler_data = data->handler_data; int cpu = smp_processor_id(); - mask = sun4m_get_irqmask(irq_nr); - local_irq_save(flags); - if (irq_nr > 15) - sbus_writel(mask, &sun4m_irq_global->mask_set); - else - sbus_writel(mask, &sun4m_irq_percpu[cpu]->set); - local_irq_restore(flags); -} - -static void sun4m_enable_irq(unsigned int irq_nr) -{ - unsigned long mask, flags; - int cpu = smp_processor_id(); + if (handler_data->mask) { + unsigned long flags; - /* Dreadful floppy hack. When we use 0x2b instead of - * 0x0b the system blows (it starts to whistle!). - * So we continue to use 0x0b. Fixme ASAP. --P3 - */ - if (irq_nr != 0x0b) { - mask = sun4m_get_irqmask(irq_nr); - local_irq_save(flags); - if (irq_nr > 15) - sbus_writel(mask, &sun4m_irq_global->mask_clear); - else - sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear); - local_irq_restore(flags); - } else { local_irq_save(flags); - sbus_writel(SUN4M_INT_FLOPPY, &sun4m_irq_global->mask_clear); + if (handler_data->percpu) { + sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->clear); + } else { + sbus_writel(handler_data->mask, &sun4m_irq_global->mask_clear); + } local_irq_restore(flags); } } -static unsigned long cpu_pil_to_imask[16] = { -/*0*/ 0x00000000, -/*1*/ 0x00000000, -/*2*/ SUN4M_INT_SBUS(0) | SUN4M_INT_VME(0), -/*3*/ SUN4M_INT_SBUS(1) | SUN4M_INT_VME(1), -/*4*/ SUN4M_INT_SCSI, -/*5*/ SUN4M_INT_SBUS(2) | SUN4M_INT_VME(2), -/*6*/ SUN4M_INT_ETHERNET, -/*7*/ SUN4M_INT_SBUS(3) | SUN4M_INT_VME(3), -/*8*/ SUN4M_INT_VIDEO, -/*9*/ SUN4M_INT_SBUS(4) | SUN4M_INT_VME(4) | SUN4M_INT_MODULE_ERR, -/*10*/ SUN4M_INT_REALTIME, -/*11*/ SUN4M_INT_SBUS(5) | SUN4M_INT_VME(5) | SUN4M_INT_FLOPPY, -/*12*/ SUN4M_INT_SERIAL | SUN4M_INT_KBDMS, -/*13*/ SUN4M_INT_SBUS(6) | SUN4M_INT_VME(6) | SUN4M_INT_AUDIO, -/*14*/ SUN4M_INT_E14, -/*15*/ SUN4M_INT_ERROR, -}; +static unsigned int sun4m_startup_irq(struct irq_data *data) +{ + irq_link(data->irq); + sun4m_unmask_irq(data); + return 0; +} -/* We assume the caller has disabled local interrupts when these are called, - * or else very bizarre behavior will result. - */ -static void sun4m_disable_pil_irq(unsigned int pil) +static void sun4m_shutdown_irq(struct irq_data *data) { - sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_set); + sun4m_mask_irq(data); + irq_unlink(data->irq); } -static void sun4m_enable_pil_irq(unsigned int pil) +static struct irq_chip sun4m_irq = { + .name = "sun4m", + .irq_startup = sun4m_startup_irq, + .irq_shutdown = sun4m_shutdown_irq, + .irq_mask = sun4m_mask_irq, + .irq_unmask = sun4m_unmask_irq, +}; + + +static unsigned int sun4m_build_device_irq(struct platform_device *op, + unsigned int real_irq) { - sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_clear); + struct sun4m_handler_data *handler_data; + unsigned int irq; + unsigned int pil; + + if (real_irq >= OBP_INT_LEVEL_VME) { + prom_printf("Bogus sun4m IRQ %u\n", real_irq); + prom_halt(); + } + pil = (real_irq & 0xf); + irq = irq_alloc(real_irq, pil); + + if (irq == 0) + goto out; + + handler_data = irq_get_handler_data(irq); + if (unlikely(handler_data)) + goto out; + + handler_data = kzalloc(sizeof(struct sun4m_handler_data), GFP_ATOMIC); + if (unlikely(!handler_data)) { + prom_printf("IRQ: kzalloc(sun4m_handler_data) failed.\n"); + prom_halt(); + } + + handler_data->mask = sun4m_imask[real_irq]; + handler_data->percpu = real_irq < OBP_INT_LEVEL_ONBOARD; + irq_set_chip_and_handler_name(irq, &sun4m_irq, + handle_level_irq, "level"); + irq_set_handler_data(irq, handler_data); + +out: + return irq; } #ifdef CONFIG_SMP static void sun4m_send_ipi(int cpu, int level) { - unsigned long mask = sun4m_get_irqmask(level); - - sbus_writel(mask, &sun4m_irq_percpu[cpu]->set); + sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->set); } static void sun4m_clear_ipi(int cpu, int level) { - unsigned long mask = sun4m_get_irqmask(level); - - sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear); + sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->clear); } static void sun4m_set_udt(int cpu) @@ -343,7 +353,15 @@ void sun4m_nmi(struct pt_regs *regs) prom_halt(); } -/* Exported for sun4m_smp.c */ +void sun4m_unmask_profile_irq(void) +{ + unsigned long flags; + + local_irq_save(flags); + sbus_writel(sun4m_imask[SUN4M_PROFILE_IRQ], &sun4m_irq_global->mask_clear); + local_irq_restore(flags); +} + void sun4m_clear_profile_irq(int cpu) { sbus_readl(&timers_percpu[cpu]->l14_limit); @@ -358,6 +376,7 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn) { struct device_node *dp = of_find_node_by_name(NULL, "counter"); int i, err, len, num_cpu_timers; + unsigned int irq; const u32 *addr; if (!dp) { @@ -384,8 +403,9 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn) master_l10_counter = &timers_global->l10_count; - err = request_irq(SUN4M_TIMER_IRQ, counter_fn, - (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL); + irq = sun4m_build_device_irq(NULL, SUN4M_TIMER_IRQ); + + err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL); if (err) { printk(KERN_ERR "sun4m_init_timers: Register IRQ error %d.\n", err); @@ -452,14 +472,11 @@ void __init sun4m_init_IRQ(void) if (num_cpu_iregs == 4) sbus_writel(0, &sun4m_irq_global->interrupt_target); - BTFIXUPSET_CALL(enable_irq, sun4m_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, sun4m_disable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(enable_pil_irq, sun4m_enable_pil_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_pil_irq, sun4m_disable_pil_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(clear_clock_irq, sun4m_clear_clock_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, sun4m_load_profile_irq, BTFIXUPCALL_NORM); sparc_irq_config.init_timers = sun4m_init_timers; + sparc_irq_config.build_device_irq = sun4m_build_device_irq; #ifdef CONFIG_SMP BTFIXUPSET_CALL(set_cpu_int, sun4m_send_ipi, BTFIXUPCALL_NORM); diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index 5cc7dc51de3..59476868652 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c @@ -15,6 +15,9 @@ #include "irq.h" #include "kernel.h" +#define IRQ_IPI_SINGLE 12 +#define IRQ_IPI_MASK 13 +#define IRQ_IPI_RESCHED 14 #define IRQ_CROSS_CALL 15 static inline unsigned long @@ -26,6 +29,7 @@ swap_ulong(volatile unsigned long *ptr, unsigned long val) return val; } +static void smp4m_ipi_init(void); static void smp_setup_percpu_timer(void); void __cpuinit smp4m_callin(void) @@ -59,8 +63,6 @@ void __cpuinit smp4m_callin(void) local_flush_cache_all(); local_flush_tlb_all(); - cpu_probe(); - /* Fix idle thread fields. */ __asm__ __volatile__("ld [%0], %%g6\n\t" : : "r" (¤t_set[cpuid]) @@ -70,7 +72,7 @@ void __cpuinit smp4m_callin(void) atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; - while (!cpu_isset(cpuid, smp_commenced_mask)) + while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) mb(); local_irq_enable(); @@ -83,6 +85,7 @@ void __cpuinit smp4m_callin(void) */ void __init smp4m_boot_cpus(void) { + smp4m_ipi_init(); smp_setup_percpu_timer(); local_flush_cache_all(); } @@ -150,18 +153,25 @@ void __init smp4m_smp_done(void) /* Ok, they are spinning and ready to go. */ } -/* At each hardware IRQ, we get this called to forward IRQ reception - * to the next processor. The caller must disable the IRQ level being - * serviced globally so that there are no double interrupts received. - * - * XXX See sparc64 irq.c. - */ -void smp4m_irq_rotate(int cpu) + +/* Initialize IPIs on the SUN4M SMP machine */ +static void __init smp4m_ipi_init(void) +{ +} + +static void smp4m_ipi_resched(int cpu) +{ + set_cpu_int(cpu, IRQ_IPI_RESCHED); +} + +static void smp4m_ipi_single(int cpu) { - int next = cpu_data(cpu).next; + set_cpu_int(cpu, IRQ_IPI_SINGLE); +} - if (next != cpu) - set_irq_udt(next); +static void smp4m_ipi_mask_one(int cpu) +{ + set_cpu_int(cpu, IRQ_IPI_MASK); } static struct smp_funcall { @@ -199,10 +209,10 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, { register int i; - cpu_clear(smp_processor_id(), mask); - cpus_and(mask, cpu_online_map, mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + cpumask_and(&mask, cpu_online_mask, &mask); for (i = 0; i < ncpus; i++) { - if (cpu_isset(i, mask)) { + if (cpumask_test_cpu(i, &mask)) { ccall_info.processors_in[i] = 0; ccall_info.processors_out[i] = 0; set_cpu_int(i, IRQ_CROSS_CALL); @@ -218,7 +228,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_in[i]) barrier(); @@ -226,7 +236,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_out[i]) barrier(); @@ -277,7 +287,7 @@ static void __cpuinit smp_setup_percpu_timer(void) load_profile_irq(cpu, lvl14_resolution); if (cpu == boot_cpu_id) - enable_pil_irq(14); + sun4m_unmask_profile_irq(); } static void __init smp4m_blackbox_id(unsigned *addr) @@ -306,4 +316,7 @@ void __init sun4m_init_smp(void) BTFIXUPSET_BLACKBOX(load_current, smp4m_blackbox_current); BTFIXUPSET_CALL(smp_cross_call, smp4m_cross_call, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4m_processor_id, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_resched, smp4m_ipi_resched, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_single, smp4m_ipi_single, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_mask_one, smp4m_ipi_mask_one, BTFIXUPCALL_NORM); } diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c index 1eb8b00aed7..7408201d7ef 100644 --- a/arch/sparc/kernel/sysfs.c +++ b/arch/sparc/kernel/sysfs.c @@ -103,9 +103,10 @@ static unsigned long run_on_cpu(unsigned long cpu, unsigned long (*func)(unsigned long), unsigned long arg) { - cpumask_t old_affinity = current->cpus_allowed; + cpumask_t old_affinity; unsigned long ret; + cpumask_copy(&old_affinity, tsk_cpus_allowed(current)); /* should return -EINVAL to userspace */ if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) return 0; diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 47ac73c32e8..332c83ff770 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -84,4 +84,4 @@ sys_call_table: /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv /*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init /*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime -/*335*/ .long sys_syncfs +/*335*/ .long sys_syncfs, sys_sendmmsg diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 4f3170c1ef4..43887ca0be0 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -85,7 +85,7 @@ sys_call_table32: /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init /*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime - .word sys_syncfs + .word sys_syncfs, compat_sys_sendmmsg #endif /* CONFIG_COMPAT */ @@ -162,4 +162,4 @@ sys_call_table: /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init /*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime - .word sys_syncfs + .word sys_syncfs, sys_sendmmsg diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 96046a4024c..1060e0672a4 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -228,14 +228,10 @@ static void __init sbus_time_init(void) void __init time_init(void) { -#ifdef CONFIG_PCI - extern void pci_time_init(void); - if (pcic_present()) { + if (pcic_present()) pci_time_init(); - return; - } -#endif - sbus_time_init(); + else + sbus_time_init(); } diff --git a/arch/sparc/kernel/us2e_cpufreq.c b/arch/sparc/kernel/us2e_cpufreq.c index 8f982b76c71..531d54fc982 100644 --- a/arch/sparc/kernel/us2e_cpufreq.c +++ b/arch/sparc/kernel/us2e_cpufreq.c @@ -237,7 +237,7 @@ static unsigned int us2e_freq_get(unsigned int cpu) if (!cpu_online(cpu)) return 0; - cpus_allowed = current->cpus_allowed; + cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(cpu)); clock_tick = sparc64_get_clock_tick(cpu) / 1000; @@ -258,7 +258,7 @@ static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index) if (!cpu_online(cpu)) return; - cpus_allowed = current->cpus_allowed; + cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(cpu)); new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000; diff --git a/arch/sparc/kernel/us3_cpufreq.c b/arch/sparc/kernel/us3_cpufreq.c index f35d1e79454..9a8ceb70083 100644 --- a/arch/sparc/kernel/us3_cpufreq.c +++ b/arch/sparc/kernel/us3_cpufreq.c @@ -85,7 +85,7 @@ static unsigned int us3_freq_get(unsigned int cpu) if (!cpu_online(cpu)) return 0; - cpus_allowed = current->cpus_allowed; + cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(cpu)); reg = read_safari_cfg(); @@ -105,7 +105,7 @@ static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index) if (!cpu_online(cpu)) return; - cpus_allowed = current->cpus_allowed; + cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(cpu)); new_freq = sparc64_get_clock_tick(cpu) / 1000; diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 846d1c4374e..7f01b8fce8b 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -15,7 +15,6 @@ lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o lib-$(CONFIG_SPARC32) += copy_user.o locks.o lib-y += atomic_$(BITS).o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o -lib-$(CONFIG_SPARC32) += rwsem_32.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o diff --git a/arch/sparc/lib/rwsem_32.S b/arch/sparc/lib/rwsem_32.S deleted file mode 100644 index 9675268e7fd..00000000000 --- a/arch/sparc/lib/rwsem_32.S +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Assembly part of rw semaphores. - * - * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com) - */ - -#include <asm/ptrace.h> -#include <asm/psr.h> - - .section .sched.text, "ax" - .align 4 - - .globl ___down_read -___down_read: - rd %psr, %g3 - nop - nop - nop - or %g3, PSR_PIL, %g7 - wr %g7, 0, %psr - nop - nop - nop -#ifdef CONFIG_SMP -1: ldstub [%g1 + 4], %g7 - tst %g7 - bne 1b - ld [%g1], %g7 - sub %g7, 1, %g7 - st %g7, [%g1] - stb %g0, [%g1 + 4] -#else - ld [%g1], %g7 - sub %g7, 1, %g7 - st %g7, [%g1] -#endif - wr %g3, 0, %psr - add %g7, 1, %g7 - nop - nop - subcc %g7, 1, %g7 - bneg 3f - nop -2: jmpl %o7, %g0 - mov %g4, %o7 -3: save %sp, -64, %sp - mov %g1, %l1 - mov %g4, %l4 - bcs 4f - mov %g5, %l5 - call down_read_failed - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba ___down_read - restore %l5, %g0, %g5 -4: call down_read_failed_biased - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba 2b - restore %l5, %g0, %g5 - - .globl ___down_write -___down_write: - rd %psr, %g3 - nop - nop - nop - or %g3, PSR_PIL, %g7 - wr %g7, 0, %psr - sethi %hi(0x01000000), %g2 - nop - nop -#ifdef CONFIG_SMP -1: ldstub [%g1 + 4], %g7 - tst %g7 - bne 1b - ld [%g1], %g7 - sub %g7, %g2, %g7 - st %g7, [%g1] - stb %g0, [%g1 + 4] -#else - ld [%g1], %g7 - sub %g7, %g2, %g7 - st %g7, [%g1] -#endif - wr %g3, 0, %psr - add %g7, %g2, %g7 - nop - nop - subcc %g7, %g2, %g7 - bne 3f - nop -2: jmpl %o7, %g0 - mov %g4, %o7 -3: save %sp, -64, %sp - mov %g1, %l1 - mov %g4, %l4 - bcs 4f - mov %g5, %l5 - call down_write_failed - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba ___down_write - restore %l5, %g0, %g5 -4: call down_write_failed_biased - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba 2b - restore %l5, %g0, %g5 - - .text - .globl ___up_read -___up_read: - rd %psr, %g3 - nop - nop - nop - or %g3, PSR_PIL, %g7 - wr %g7, 0, %psr - nop - nop - nop -#ifdef CONFIG_SMP -1: ldstub [%g1 + 4], %g7 - tst %g7 - bne 1b - ld [%g1], %g7 - add %g7, 1, %g7 - st %g7, [%g1] - stb %g0, [%g1 + 4] -#else - ld [%g1], %g7 - add %g7, 1, %g7 - st %g7, [%g1] -#endif - wr %g3, 0, %psr - nop - nop - nop - cmp %g7, 0 - be 3f - nop -2: jmpl %o7, %g0 - mov %g4, %o7 -3: save %sp, -64, %sp - mov %g1, %l1 - mov %g4, %l4 - mov %g5, %l5 - clr %o1 - call __rwsem_wake - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba 2b - restore %l5, %g0, %g5 - - .globl ___up_write -___up_write: - rd %psr, %g3 - nop - nop - nop - or %g3, PSR_PIL, %g7 - wr %g7, 0, %psr - sethi %hi(0x01000000), %g2 - nop - nop -#ifdef CONFIG_SMP -1: ldstub [%g1 + 4], %g7 - tst %g7 - bne 1b - ld [%g1], %g7 - add %g7, %g2, %g7 - st %g7, [%g1] - stb %g0, [%g1 + 4] -#else - ld [%g1], %g7 - add %g7, %g2, %g7 - st %g7, [%g1] -#endif - wr %g3, 0, %psr - sub %g7, %g2, %g7 - nop - nop - addcc %g7, %g2, %g7 - bcs 3f - nop -2: jmpl %o7, %g0 - mov %g4, %o7 -3: save %sp, -64, %sp - mov %g1, %l1 - mov %g4, %l4 - mov %g5, %l5 - mov %g7, %o1 - call __rwsem_wake - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba 2b - restore %l5, %g0, %g5 diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 2f6ae1d1fb6..e10cd03fab8 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -862,7 +862,7 @@ static void init_node_masks_nonnuma(void) for (i = 0; i < NR_CPUS; i++) numa_cpu_lookup_table[i] = 0; - numa_cpumask_lookup_table[0] = CPU_MASK_ALL; + cpumask_setall(&numa_cpumask_lookup_table[0]); } #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -1080,7 +1080,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md, { u64 arc; - cpus_clear(*mask); + cpumask_clear(mask); mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) { u64 target = mdesc_arc_target(md, arc); @@ -1091,7 +1091,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md, continue; id = mdesc_get_property(md, target, "id", NULL); if (*id < nr_cpu_ids) - cpu_set(*id, *mask); + cpumask_set_cpu(*id, mask); } } @@ -1153,13 +1153,13 @@ static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, numa_parse_mdesc_group_cpus(md, grp, &mask); - for_each_cpu_mask(cpu, mask) + for_each_cpu(cpu, &mask) numa_cpu_lookup_table[cpu] = index; - numa_cpumask_lookup_table[index] = mask; + cpumask_copy(&numa_cpumask_lookup_table[index], &mask); if (numa_debug) { printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index); - for_each_cpu_mask(cpu, mask) + for_each_cpu(cpu, &mask) printk("%d ", cpu); printk("]\n"); } @@ -1218,7 +1218,7 @@ static int __init numa_parse_jbus(void) index = 0; for_each_present_cpu(cpu) { numa_cpu_lookup_table[cpu] = index; - numa_cpumask_lookup_table[index] = cpumask_of_cpu(cpu); + cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu)); node_masks[index].mask = ~((1UL << 36UL) - 1UL); node_masks[index].val = cpu << 36UL; diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 0e103236b75..0e9dec6cadd 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -15,3 +15,4 @@ obj-y += vdso/ obj-$(CONFIG_IA32_EMULATION) += ia32/ obj-y += platform/ +obj-y += net/ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4168e5d8632..880fcb6c86f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -72,6 +72,7 @@ config X86 select GENERIC_IRQ_SHOW select IRQ_FORCED_THREADING select USE_GENERIC_SMP_HELPERS if SMP + select HAVE_BPF_JIT if (X86_64 && NET) config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS) diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 1a58ad89fdf..c04f1b7a913 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -2,8 +2,6 @@ # Arch-specific CryptoAPI modules. # -obj-$(CONFIG_CRYPTO_FPU) += fpu.o - obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o @@ -24,6 +22,6 @@ aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o -aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o +aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2577613fb32..feee8ff1d05 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -94,6 +94,10 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); + +int crypto_fpu_init(void); +void crypto_fpu_exit(void); + #ifdef CONFIG_X86_64 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); @@ -1257,6 +1261,8 @@ static int __init aesni_init(void) return -ENODEV; } + if ((err = crypto_fpu_init())) + goto fpu_err; if ((err = crypto_register_alg(&aesni_alg))) goto aes_err; if ((err = crypto_register_alg(&__aesni_alg))) @@ -1334,6 +1340,7 @@ blk_ecb_err: __aes_err: crypto_unregister_alg(&aesni_alg); aes_err: +fpu_err: return err; } @@ -1363,6 +1370,8 @@ static void __exit aesni_exit(void) crypto_unregister_alg(&blk_ecb_alg); crypto_unregister_alg(&__aesni_alg); crypto_unregister_alg(&aesni_alg); + + crypto_fpu_exit(); } module_init(aesni_init); diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index 1a8f8649c03..98d7a188f46 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c @@ -150,18 +150,12 @@ static struct crypto_template crypto_fpu_tmpl = { .module = THIS_MODULE, }; -static int __init crypto_fpu_module_init(void) +int __init crypto_fpu_init(void) { return crypto_register_template(&crypto_fpu_tmpl); } -static void __exit crypto_fpu_module_exit(void) +void __exit crypto_fpu_exit(void) { crypto_unregister_template(&crypto_fpu_tmpl); } - -module_init(crypto_fpu_module_init); -module_exit(crypto_fpu_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("FPU block cipher wrapper"); diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 849a9d23c71..95f5826be45 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -848,4 +848,5 @@ ia32_sys_call_table: .quad compat_sys_open_by_handle_at .quad compat_sys_clock_adjtime .quad sys_syncfs + .quad compat_sys_sendmmsg /* 345 */ ia32_syscall_end: diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0f521356432..0049211959c 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -14,6 +14,8 @@ #include <asm/desc_defs.h> struct x86_emulate_ctxt; +enum x86_intercept; +enum x86_intercept_stage; struct x86_exception { u8 vector; @@ -24,6 +26,24 @@ struct x86_exception { }; /* + * This struct is used to carry enough information from the instruction + * decoder to main KVM so that a decision can be made whether the + * instruction needs to be intercepted or not. + */ +struct x86_instruction_info { + u8 intercept; /* which intercept */ + u8 rep_prefix; /* rep prefix? */ + u8 modrm_mod; /* mod part of modrm */ + u8 modrm_reg; /* index of register used */ + u8 modrm_rm; /* rm part of modrm */ + u64 src_val; /* value of source operand */ + u8 src_bytes; /* size of source operand */ + u8 dst_bytes; /* size of destination operand */ + u8 ad_bytes; /* size of src/dst address */ + u64 next_rip; /* rip following the instruction */ +}; + +/* * x86_emulate_ops: * * These operations represent the instruction emulator's interface to memory. @@ -62,6 +82,7 @@ struct x86_exception { #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ +#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ struct x86_emulate_ops { /* @@ -71,8 +92,9 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*read_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*read_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, + unsigned int bytes, struct x86_exception *fault); /* @@ -82,8 +104,8 @@ struct x86_emulate_ops { * @val: [OUT] Value write to memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to write to memory. */ - int (*write_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*write_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, struct x86_exception *fault); /* * fetch: Read bytes of standard (non-emulated/special) memory. @@ -92,8 +114,8 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*fetch)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*fetch)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, struct x86_exception *fault); /* @@ -102,11 +124,9 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*read_emulated)(unsigned long addr, - void *val, - unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); + int (*read_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, + struct x86_exception *fault); /* * write_emulated: Write bytes to emulated/special memory area. @@ -115,11 +135,10 @@ struct x86_emulate_ops { * required). * @bytes: [IN ] Number of bytes to write to memory. */ - int (*write_emulated)(unsigned long addr, - const void *val, + int (*write_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *val, unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); + struct x86_exception *fault); /* * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an @@ -129,40 +148,54 @@ struct x86_emulate_ops { * @new: [IN ] Value to write to @addr. * @bytes: [IN ] Number of bytes to access using CMPXCHG. */ - int (*cmpxchg_emulated)(unsigned long addr, + int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *old, const void *new, unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); - - int (*pio_in_emulated)(int size, unsigned short port, void *val, - unsigned int count, struct kvm_vcpu *vcpu); - - int (*pio_out_emulated)(int size, unsigned short port, const void *val, - unsigned int count, struct kvm_vcpu *vcpu); - - bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, - int seg, struct kvm_vcpu *vcpu); - void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3, - int seg, struct kvm_vcpu *vcpu); - u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); - void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); - unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); - void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); - void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); - ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); - int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); - int (*cpl)(struct kvm_vcpu *vcpu); - int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); - int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); - int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); - int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); + struct x86_exception *fault); + void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr); + + int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, void *val, + unsigned int count); + + int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, const void *val, + unsigned int count); + + bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector, + struct desc_struct *desc, u32 *base3, int seg); + void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector, + struct desc_struct *desc, u32 base3, int seg); + unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, + int seg); + void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); + int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); + int (*cpl)(struct x86_emulate_ctxt *ctxt); + int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); + int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); + int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); + int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); + void (*halt)(struct x86_emulate_ctxt *ctxt); + void (*wbinvd)(struct x86_emulate_ctxt *ctxt); + int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); + void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ + void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ + int (*intercept)(struct x86_emulate_ctxt *ctxt, + struct x86_instruction_info *info, + enum x86_intercept_stage stage); }; +typedef u32 __attribute__((vector_size(16))) sse128_t; + /* Type, address-of, and value of an instruction's operand. */ struct operand { - enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; + enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type; unsigned int bytes; union { unsigned long orig_val; @@ -174,11 +207,13 @@ struct operand { ulong ea; unsigned seg; } mem; + unsigned xmm; } addr; union { unsigned long val; u64 val64; char valptr[sizeof(unsigned long) + 2]; + sse128_t vec_val; }; }; @@ -197,6 +232,7 @@ struct read_cache { struct decode_cache { u8 twobyte; u8 b; + u8 intercept; u8 lock_prefix; u8 rep_prefix; u8 op_bytes; @@ -209,6 +245,7 @@ struct decode_cache { u8 seg_override; unsigned int d; int (*execute)(struct x86_emulate_ctxt *ctxt); + int (*check_perm)(struct x86_emulate_ctxt *ctxt); unsigned long regs[NR_VCPU_REGS]; unsigned long eip; /* modrm */ @@ -227,17 +264,15 @@ struct x86_emulate_ctxt { struct x86_emulate_ops *ops; /* Register state before/after emulation. */ - struct kvm_vcpu *vcpu; - unsigned long eflags; unsigned long eip; /* eip before instruction emulation */ /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; - u32 cs_base; /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; + bool guest_mode; /* guest running a nested guest */ bool perm_ok; /* do not check permissions if true */ bool only_vendor_specific_insn; @@ -249,8 +284,8 @@ struct x86_emulate_ctxt { }; /* Repeat String Operation Prefix */ -#define REPE_PREFIX 1 -#define REPNE_PREFIX 2 +#define REPE_PREFIX 0xf3 +#define REPNE_PREFIX 0xf2 /* Execution mode, passed to the emulator. */ #define X86EMUL_MODE_REAL 0 /* Real mode. */ @@ -259,6 +294,69 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ +/* any protected mode */ +#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ + X86EMUL_MODE_PROT64) + +enum x86_intercept_stage { + X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ + X86_ICPT_PRE_EXCEPT, + X86_ICPT_POST_EXCEPT, + X86_ICPT_POST_MEMACCESS, +}; + +enum x86_intercept { + x86_intercept_none, + x86_intercept_cr_read, + x86_intercept_cr_write, + x86_intercept_clts, + x86_intercept_lmsw, + x86_intercept_smsw, + x86_intercept_dr_read, + x86_intercept_dr_write, + x86_intercept_lidt, + x86_intercept_sidt, + x86_intercept_lgdt, + x86_intercept_sgdt, + x86_intercept_lldt, + x86_intercept_sldt, + x86_intercept_ltr, + x86_intercept_str, + x86_intercept_rdtsc, + x86_intercept_rdpmc, + x86_intercept_pushf, + x86_intercept_popf, + x86_intercept_cpuid, + x86_intercept_rsm, + x86_intercept_iret, + x86_intercept_intn, + x86_intercept_invd, + x86_intercept_pause, + x86_intercept_hlt, + x86_intercept_invlpg, + x86_intercept_invlpga, + x86_intercept_vmrun, + x86_intercept_vmload, + x86_intercept_vmsave, + x86_intercept_vmmcall, + x86_intercept_stgi, + x86_intercept_clgi, + x86_intercept_skinit, + x86_intercept_rdtscp, + x86_intercept_icebp, + x86_intercept_wbinvd, + x86_intercept_monitor, + x86_intercept_mwait, + x86_intercept_rdmsr, + x86_intercept_wrmsr, + x86_intercept_in, + x86_intercept_ins, + x86_intercept_out, + x86_intercept_outs, + + nr_x86_intercepts +}; + /* Host execution mode. */ #if defined(CONFIG_X86_32) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 @@ -270,6 +368,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); #define EMULATION_FAILED -1 #define EMULATION_OK 0 #define EMULATION_RESTART 1 +#define EMULATION_INTERCEPTED 2 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); int emulator_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int reason, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c8af0991fdf..d2ac8e2ee89 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -30,14 +30,30 @@ #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_MMIO_SIZE 16 #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define CR0_RESERVED_BITS \ + (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ + | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ + | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) + #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ 0xFFFFFF0000000000ULL) +#define CR4_RESERVED_BITS \ + (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ + | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXSAVE \ + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) + +#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) + + #define INVALID_PAGE (~(hpa_t)0) #define VALID_PAGE(x) ((x) != INVALID_PAGE) @@ -118,6 +134,9 @@ enum kvm_reg { enum kvm_reg_ex { VCPU_EXREG_PDPTR = NR_VCPU_REGS, VCPU_EXREG_CR3, + VCPU_EXREG_RFLAGS, + VCPU_EXREG_CPL, + VCPU_EXREG_SEGMENTS, }; enum { @@ -256,7 +275,7 @@ struct kvm_mmu { struct kvm_mmu_page *sp); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *spte, const void *pte, unsigned long mmu_seq); + u64 *spte, const void *pte); hpa_t root_hpa; int root_level; int shadow_root_level; @@ -340,7 +359,6 @@ struct kvm_vcpu_arch { struct fpu guest_fpu; u64 xcr0; - gva_t mmio_fault_cr2; struct kvm_pio_request pio; void *pio_data; @@ -367,18 +385,22 @@ struct kvm_vcpu_arch { /* emulate context */ struct x86_emulate_ctxt emulate_ctxt; + bool emulate_regs_need_sync_to_vcpu; + bool emulate_regs_need_sync_from_vcpu; gpa_t time; struct pvclock_vcpu_time_info hv_clock; unsigned int hw_tsc_khz; unsigned int time_offset; struct page *time_page; - u64 last_host_tsc; u64 last_guest_tsc; u64 last_kernel_ns; u64 last_tsc_nsec; u64 last_tsc_write; + u32 virtual_tsc_khz; bool tsc_catchup; + u32 tsc_catchup_mult; + s8 tsc_catchup_shift; bool nmi_pending; bool nmi_injected; @@ -448,9 +470,6 @@ struct kvm_arch { u64 last_tsc_nsec; u64 last_tsc_offset; u64 last_tsc_write; - u32 virtual_tsc_khz; - u32 virtual_tsc_mult; - s8 virtual_tsc_shift; struct kvm_xen_hvm_config xen_hvm_config; @@ -502,6 +521,8 @@ struct kvm_vcpu_stat { u32 nmi_injections; }; +struct x86_instruction_info; + struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ @@ -586,9 +607,17 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); + void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); + void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); + + int (*check_intercept)(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage); + const struct trace_print_flags *exit_reasons_str; }; @@ -627,6 +656,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); extern bool tdp_enabled; +/* control of guest tsc rate supported? */ +extern bool kvm_has_tsc_control; +/* minimum supported tsc_khz for guests */ +extern u32 kvm_min_guest_tsc_khz; +/* maximum supported tsc_khz for guests */ +extern u32 kvm_max_guest_tsc_khz; + enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ @@ -645,9 +681,6 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); } -void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); -void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); - void kvm_enable_efer_bits(u64); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); @@ -657,8 +690,6 @@ struct x86_emulate_ctxt; int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); -int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); -int emulate_clts(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); @@ -721,8 +752,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -int kvm_fix_hypercall(struct kvm_vcpu *vcpu); - int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3cce71413d0..485b4f1f079 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -118,6 +118,7 @@ complete list. */ #define MSR_AMD64_PATCH_LEVEL 0x0000008b +#define MSR_AMD64_TSC_RATIO 0xc0000104 #define MSR_AMD64_NB_CFG 0xc001001f #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 99f0ad753f3..99ddd148a76 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -6,7 +6,6 @@ #include <linux/errno.h> #include <linux/compiler.h> #include <linux/thread_info.h> -#include <linux/prefetch.h> #include <linux/string.h> #include <asm/asm.h> #include <asm/page.h> diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 088d09fb161..566e803cc60 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -6,7 +6,6 @@ */ #include <linux/errno.h> #include <linux/thread_info.h> -#include <linux/prefetch.h> #include <linux/string.h> #include <asm/asm.h> #include <asm/page.h> diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 316708d5af9..1c66d30971a 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -6,7 +6,6 @@ */ #include <linux/compiler.h> #include <linux/errno.h> -#include <linux/prefetch.h> #include <linux/lockdep.h> #include <asm/alternative.h> #include <asm/cpufeature.h> diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index a755ef5e597..fb6a625c99b 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -350,10 +350,11 @@ #define __NR_open_by_handle_at 342 #define __NR_clock_adjtime 343 #define __NR_syncfs 344 +#define __NR_sendmmsg 345 #ifdef __KERNEL__ -#define NR_syscalls 345 +#define NR_syscalls 346 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 160fa76bd57..79f90eb15aa 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) #define __NR_syncfs 306 __SYSCALL(__NR_syncfs, sys_syncfs) +#define __NR_sendmmsg 307 +__SYSCALL(__NR_sendmmsg, sys_sendmmsg) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4fd173cd8e5..40a24932a8a 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -601,10 +601,7 @@ long sys_rt_sigreturn(struct pt_regs *regs) goto badframe; sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&set); if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) goto badframe; @@ -682,6 +679,7 @@ static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs) { + sigset_t blocked; int ret; /* Are we from a system call? */ @@ -741,12 +739,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, */ regs->flags &= ~X86_EFLAGS_TF; - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked, sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + sigaddset(&blocked, sig); + set_current_blocked(&blocked); tracehook_signal_handler(sig, info, ka, regs, test_thread_flag(TIF_SINGLESTEP)); diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index abce34d5c79..32cbffb0c49 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -344,3 +344,4 @@ ENTRY(sys_call_table) .long sys_open_by_handle_at .long sys_clock_adjtime .long sys_syncfs + .long sys_sendmmsg /* 345 */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0ad47b819a8..d6e2477feb1 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -73,9 +73,14 @@ #define MemAbs (1<<11) /* Memory operand is absolute displacement */ #define String (1<<12) /* String instruction (rep capable) */ #define Stack (1<<13) /* Stack instruction (push/pop) */ +#define GroupMask (7<<14) /* Opcode uses one of the group mechanisms */ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ -#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ +#define GroupDual (2<<14) /* Alternate decoding of mod == 3 */ +#define Prefix (3<<14) /* Instruction varies with 66/f2/f3 prefix */ +#define RMExt (4<<14) /* Opcode extension in ModRM r/m if mod == 3 */ +#define Sse (1<<17) /* SSE Vector instruction */ /* Misc flags */ +#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ @@ -102,11 +107,14 @@ struct opcode { u32 flags; + u8 intercept; union { int (*execute)(struct x86_emulate_ctxt *ctxt); struct opcode *group; struct group_dual *gdual; + struct gprefix *gprefix; } u; + int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; struct group_dual { @@ -114,6 +122,13 @@ struct group_dual { struct opcode mod3[8]; }; +struct gprefix { + struct opcode pfx_no; + struct opcode pfx_66; + struct opcode pfx_f2; + struct opcode pfx_f3; +}; + /* EFLAGS bit definitions. */ #define EFLG_ID (1<<21) #define EFLG_VIP (1<<20) @@ -248,42 +263,42 @@ struct group_dual { "w", "r", _LO32, "r", "", "r") /* Instruction has three operands and one operand is stored in ECX register */ -#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ - do { \ - unsigned long _tmp; \ - _type _clv = (_cl).val; \ - _type _srcv = (_src).val; \ - _type _dstv = (_dst).val; \ - \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "5", "2") \ - _op _suffix " %4,%1 \n" \ - _POST_EFLAGS("0", "5", "2") \ - : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ - : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ - ); \ - \ - (_cl).val = (unsigned long) _clv; \ - (_src).val = (unsigned long) _srcv; \ - (_dst).val = (unsigned long) _dstv; \ +#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ + do { \ + unsigned long _tmp; \ + _type _clv = (_cl).val; \ + _type _srcv = (_src).val; \ + _type _dstv = (_dst).val; \ + \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "5", "2") \ + _op _suffix " %4,%1 \n" \ + _POST_EFLAGS("0", "5", "2") \ + : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ + : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ + ); \ + \ + (_cl).val = (unsigned long) _clv; \ + (_src).val = (unsigned long) _srcv; \ + (_dst).val = (unsigned long) _dstv; \ } while (0) -#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ - do { \ - switch ((_dst).bytes) { \ - case 2: \ - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "w", unsigned short); \ - break; \ - case 4: \ - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "l", unsigned int); \ - break; \ - case 8: \ - ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "q", unsigned long)); \ - break; \ - } \ +#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ + do { \ + switch ((_dst).bytes) { \ + case 2: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "w", unsigned short); \ + break; \ + case 4: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "l", unsigned int); \ + break; \ + case 8: \ + ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "q", unsigned long)); \ + break; \ + } \ } while (0) #define __emulate_1op(_op, _dst, _eflags, _suffix) \ @@ -346,13 +361,25 @@ struct group_dual { } while (0) /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ -#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ - do { \ - switch((_src).bytes) { \ - case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \ - case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "w"); break; \ - case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \ - case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \ +#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ + do { \ + switch((_src).bytes) { \ + case 1: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "b"); \ + break; \ + case 2: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "w"); \ + break; \ + case 4: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "l"); \ + break; \ + case 8: \ + ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "q")); \ + break; \ } \ } while (0) @@ -388,13 +415,33 @@ struct group_dual { (_type)_x; \ }) -#define insn_fetch_arr(_arr, _size, _eip) \ +#define insn_fetch_arr(_arr, _size, _eip) \ ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ if (rc != X86EMUL_CONTINUE) \ goto done; \ (_eip) += (_size); \ }) +static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, + enum x86_intercept intercept, + enum x86_intercept_stage stage) +{ + struct x86_instruction_info info = { + .intercept = intercept, + .rep_prefix = ctxt->decode.rep_prefix, + .modrm_mod = ctxt->decode.modrm_mod, + .modrm_reg = ctxt->decode.modrm_reg, + .modrm_rm = ctxt->decode.modrm_rm, + .src_val = ctxt->decode.src.val64, + .src_bytes = ctxt->decode.src.bytes, + .dst_bytes = ctxt->decode.dst.bytes, + .ad_bytes = ctxt->decode.ad_bytes, + .next_rip = ctxt->eip, + }; + + return ctxt->ops->intercept(ctxt, &info, stage); +} + static inline unsigned long ad_mask(struct decode_cache *c) { return (1UL << (c->ad_bytes << 3)) - 1; @@ -430,6 +477,13 @@ static inline void jmp_rel(struct decode_cache *c, int rel) register_address_increment(c, &c->eip, rel); } +static u32 desc_limit_scaled(struct desc_struct *desc) +{ + u32 limit = get_desc_limit(desc); + + return desc->g ? (limit << 12) | 0xfff : limit; +} + static void set_seg_override(struct decode_cache *c, int seg) { c->has_seg_override = true; @@ -442,11 +496,10 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) return 0; - return ops->get_cached_segment_base(seg, ctxt->vcpu); + return ops->get_cached_segment_base(ctxt, seg); } static unsigned seg_override(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct decode_cache *c) { if (!c->has_seg_override) @@ -455,18 +508,6 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt, return c->seg_override; } -static ulong linear(struct x86_emulate_ctxt *ctxt, - struct segmented_address addr) -{ - struct decode_cache *c = &ctxt->decode; - ulong la; - - la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; - if (c->ad_bytes != 8) - la &= (u32)-1; - return la; -} - static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, u32 error, bool valid) { @@ -476,11 +517,21 @@ static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, return X86EMUL_PROPAGATE_FAULT; } +static int emulate_db(struct x86_emulate_ctxt *ctxt) +{ + return emulate_exception(ctxt, DB_VECTOR, 0, false); +} + static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) { return emulate_exception(ctxt, GP_VECTOR, err, true); } +static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err) +{ + return emulate_exception(ctxt, SS_VECTOR, err, true); +} + static int emulate_ud(struct x86_emulate_ctxt *ctxt) { return emulate_exception(ctxt, UD_VECTOR, 0, false); @@ -496,6 +547,128 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, DE_VECTOR, 0, false); } +static int emulate_nm(struct x86_emulate_ctxt *ctxt) +{ + return emulate_exception(ctxt, NM_VECTOR, 0, false); +} + +static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) +{ + u16 selector; + struct desc_struct desc; + + ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg); + return selector; +} + +static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector, + unsigned seg) +{ + u16 dummy; + u32 base3; + struct desc_struct desc; + + ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg); + ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg); +} + +static int __linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + unsigned size, bool write, bool fetch, + ulong *linear) +{ + struct decode_cache *c = &ctxt->decode; + struct desc_struct desc; + bool usable; + ulong la; + u32 lim; + u16 sel; + unsigned cpl, rpl; + + la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; + switch (ctxt->mode) { + case X86EMUL_MODE_REAL: + break; + case X86EMUL_MODE_PROT64: + if (((signed long)la << 16) >> 16 != la) + return emulate_gp(ctxt, 0); + break; + default: + usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, + addr.seg); + if (!usable) + goto bad; + /* code segment or read-only data segment */ + if (((desc.type & 8) || !(desc.type & 2)) && write) + goto bad; + /* unreadable code segment */ + if (!fetch && (desc.type & 8) && !(desc.type & 2)) + goto bad; + lim = desc_limit_scaled(&desc); + if ((desc.type & 8) || !(desc.type & 4)) { + /* expand-up segment */ + if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) + goto bad; + } else { + /* exapand-down segment */ + if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) + goto bad; + lim = desc.d ? 0xffffffff : 0xffff; + if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) + goto bad; + } + cpl = ctxt->ops->cpl(ctxt); + rpl = sel & 3; + cpl = max(cpl, rpl); + if (!(desc.type & 8)) { + /* data segment */ + if (cpl > desc.dpl) + goto bad; + } else if ((desc.type & 8) && !(desc.type & 4)) { + /* nonconforming code segment */ + if (cpl != desc.dpl) + goto bad; + } else if ((desc.type & 8) && (desc.type & 4)) { + /* conforming code segment */ + if (cpl < desc.dpl) + goto bad; + } + break; + } + if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : c->ad_bytes != 8) + la &= (u32)-1; + *linear = la; + return X86EMUL_CONTINUE; +bad: + if (addr.seg == VCPU_SREG_SS) + return emulate_ss(ctxt, addr.seg); + else + return emulate_gp(ctxt, addr.seg); +} + +static int linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + unsigned size, bool write, + ulong *linear) +{ + return __linearize(ctxt, addr, size, write, false, linear); +} + + +static int segmented_read_std(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, false, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); +} + static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned long eip, u8 *dest) @@ -505,10 +678,15 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, int size, cur_size; if (eip == fc->end) { + unsigned long linear; + struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip}; cur_size = fc->end - fc->start; size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); - rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, - size, ctxt->vcpu, &ctxt->exception); + rc = __linearize(ctxt, addr, size, false, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + rc = ops->fetch(ctxt, linear, fc->data + cur_size, + size, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; fc->end += size; @@ -551,7 +729,6 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs, } static int read_descriptor(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct segmented_address addr, u16 *size, unsigned long *address, int op_bytes) { @@ -560,13 +737,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, if (op_bytes == 2) op_bytes = 3; *address = 0; - rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2, - ctxt->vcpu, &ctxt->exception); + rc = segmented_read_std(ctxt, addr, size, 2); if (rc != X86EMUL_CONTINUE) return rc; addr.ea += 2; - rc = ops->read_std(linear(ctxt, addr), address, op_bytes, - ctxt->vcpu, &ctxt->exception); + rc = segmented_read_std(ctxt, addr, address, op_bytes); return rc; } @@ -623,7 +798,63 @@ static void fetch_register_operand(struct operand *op) } } -static void decode_register_operand(struct operand *op, +static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) +{ + ctxt->ops->get_fpu(ctxt); + switch (reg) { + case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break; + case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break; + case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break; + case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break; + case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break; + case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break; + case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break; + case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break; +#ifdef CONFIG_X86_64 + case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break; + case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break; + case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break; + case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break; + case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break; + case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break; + case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break; + case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break; +#endif + default: BUG(); + } + ctxt->ops->put_fpu(ctxt); +} + +static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, + int reg) +{ + ctxt->ops->get_fpu(ctxt); + switch (reg) { + case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break; + case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break; + case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break; + case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break; + case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break; + case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break; + case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break; + case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break; +#ifdef CONFIG_X86_64 + case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break; + case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break; + case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break; + case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break; + case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break; + case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break; + case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break; + case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break; +#endif + default: BUG(); + } + ctxt->ops->put_fpu(ctxt); +} + +static void decode_register_operand(struct x86_emulate_ctxt *ctxt, + struct operand *op, struct decode_cache *c, int inhibit_bytereg) { @@ -632,6 +863,15 @@ static void decode_register_operand(struct operand *op, if (!(c->d & ModRM)) reg = (c->b & 7) | ((c->rex_prefix & 1) << 3); + + if (c->d & Sse) { + op->type = OP_XMM; + op->bytes = 16; + op->addr.xmm = reg; + read_sse_reg(ctxt, &op->vec_val, reg); + return; + } + op->type = OP_REG; if ((c->d & ByteOp) && !inhibit_bytereg) { op->addr.reg = decode_register(reg, c->regs, highbyte_regs); @@ -671,6 +911,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes; op->addr.reg = decode_register(c->modrm_rm, c->regs, c->d & ByteOp); + if (c->d & Sse) { + op->type = OP_XMM; + op->bytes = 16; + op->addr.xmm = c->modrm_rm; + read_sse_reg(ctxt, &op->vec_val, c->modrm_rm); + return rc; + } fetch_register_operand(op); return rc; } @@ -819,8 +1066,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, if (mc->pos < mc->end) goto read_cached; - rc = ops->read_emulated(addr, mc->data + mc->end, n, - &ctxt->exception, ctxt->vcpu); + rc = ops->read_emulated(ctxt, addr, mc->data + mc->end, n, + &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; mc->end += n; @@ -834,6 +1081,50 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } +static int segmented_read(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, false, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return read_emulated(ctxt, ctxt->ops, linear, data, size); +} + +static int segmented_write(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + const void *data, + unsigned size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->write_emulated(ctxt, linear, data, size, + &ctxt->exception); +} + +static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + const void *orig_data, const void *data, + unsigned size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data, + size, &ctxt->exception); +} + static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned int size, unsigned short port, @@ -854,7 +1145,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, if (n == 0) n = 1; rc->pos = rc->end = 0; - if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) + if (!ops->pio_in_emulated(ctxt, size, port, rc->data, n)) return 0; rc->end = n * size; } @@ -864,28 +1155,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, return 1; } -static u32 desc_limit_scaled(struct desc_struct *desc) -{ - u32 limit = get_desc_limit(desc); - - return desc->g ? (limit << 12) | 0xfff : limit; -} - static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, u16 selector, struct desc_ptr *dt) { if (selector & 1 << 2) { struct desc_struct desc; + u16 sel; + memset (dt, 0, sizeof *dt); - if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR, - ctxt->vcpu)) + if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) return; dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ dt->address = get_desc_base(&desc); } else - ops->get_gdt(dt, ctxt->vcpu); + ops->get_gdt(ctxt, dt); } /* allowed just for 8 bytes segments */ @@ -903,8 +1188,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (dt.size < index * 8 + 7) return emulate_gp(ctxt, selector & 0xfffc); addr = dt.address + index * 8; - ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, - &ctxt->exception); + ret = ops->read_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); return ret; } @@ -925,8 +1209,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, return emulate_gp(ctxt, selector & 0xfffc); addr = dt.address + index * 8; - ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, - &ctxt->exception); + ret = ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); return ret; } @@ -986,7 +1269,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, rpl = selector & 3; dpl = seg_desc.dpl; - cpl = ops->cpl(ctxt->vcpu); + cpl = ops->cpl(ctxt); switch (seg) { case VCPU_SREG_SS: @@ -1042,8 +1325,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, return ret; } load: - ops->set_segment_selector(selector, seg, ctxt->vcpu); - ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu); + ops->set_segment(ctxt, selector, &seg_desc, 0, seg); return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); @@ -1069,8 +1351,7 @@ static void write_register_operand(struct operand *op) } } -static inline int writeback(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int writeback(struct x86_emulate_ctxt *ctxt) { int rc; struct decode_cache *c = &ctxt->decode; @@ -1081,23 +1362,22 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, break; case OP_MEM: if (c->lock_prefix) - rc = ops->cmpxchg_emulated( - linear(ctxt, c->dst.addr.mem), - &c->dst.orig_val, - &c->dst.val, - c->dst.bytes, - &ctxt->exception, - ctxt->vcpu); + rc = segmented_cmpxchg(ctxt, + c->dst.addr.mem, + &c->dst.orig_val, + &c->dst.val, + c->dst.bytes); else - rc = ops->write_emulated( - linear(ctxt, c->dst.addr.mem), - &c->dst.val, - c->dst.bytes, - &ctxt->exception, - ctxt->vcpu); + rc = segmented_write(ctxt, + c->dst.addr.mem, + &c->dst.val, + c->dst.bytes); if (rc != X86EMUL_CONTINUE) return rc; break; + case OP_XMM: + write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm); + break; case OP_NONE: /* no writeback */ break; @@ -1107,21 +1387,21 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static inline void emulate_push(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_push(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; + struct segmented_address addr; - c->dst.type = OP_MEM; - c->dst.bytes = c->op_bytes; - c->dst.val = c->src.val; register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); - c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]); - c->dst.addr.mem.seg = VCPU_SREG_SS; + addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); + addr.seg = VCPU_SREG_SS; + + /* Disable writeback. */ + c->dst.type = OP_NONE; + return segmented_write(ctxt, addr, &c->src.val, c->op_bytes); } static int emulate_pop(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, void *dest, int len) { struct decode_cache *c = &ctxt->decode; @@ -1130,7 +1410,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); addr.seg = VCPU_SREG_SS; - rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len); + rc = segmented_read(ctxt, addr, dest, len); if (rc != X86EMUL_CONTINUE) return rc; @@ -1138,6 +1418,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, return rc; } +static int em_pop(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + return emulate_pop(ctxt, &c->dst.val, c->op_bytes); +} + static int emulate_popf(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, void *dest, int len) @@ -1145,9 +1432,9 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, int rc; unsigned long val, change_mask; int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - int cpl = ops->cpl(ctxt->vcpu); + int cpl = ops->cpl(ctxt); - rc = emulate_pop(ctxt, ops, &val, len); + rc = emulate_pop(ctxt, &val, len); if (rc != X86EMUL_CONTINUE) return rc; @@ -1179,14 +1466,24 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, return rc; } -static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, int seg) +static int em_popf(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; - c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); + c->dst.type = OP_REG; + c->dst.addr.reg = &ctxt->eflags; + c->dst.bytes = c->op_bytes; + return emulate_popf(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); +} - emulate_push(ctxt, ops); +static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, int seg) +{ + struct decode_cache *c = &ctxt->decode; + + c->src.val = get_segment_selector(ctxt, seg); + + return em_push(ctxt); } static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, @@ -1196,7 +1493,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, unsigned long selector; int rc; - rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); + rc = emulate_pop(ctxt, &selector, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1204,8 +1501,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, return rc; } -static int emulate_pusha(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_pusha(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; unsigned long old_esp = c->regs[VCPU_REGS_RSP]; @@ -1216,23 +1512,25 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt, (reg == VCPU_REGS_RSP) ? (c->src.val = old_esp) : (c->src.val = c->regs[reg]); - emulate_push(ctxt, ops); - - rc = writeback(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; ++reg; } - /* Disable writeback. */ - c->dst.type = OP_NONE; - return rc; } -static int emulate_popa(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_pushf(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->src.val = (unsigned long)ctxt->eflags; + return em_push(ctxt); +} + +static int em_popa(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; @@ -1245,7 +1543,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, --reg; } - rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); + rc = emulate_pop(ctxt, &c->regs[reg], c->op_bytes); if (rc != X86EMUL_CONTINUE) break; --reg; @@ -1265,37 +1563,32 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, /* TODO: Add limit checks */ c->src.val = ctxt->eflags; - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); - c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + c->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = c->eip; - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; - c->dst.type = OP_NONE; - - ops->get_idt(&dt, ctxt->vcpu); + ops->get_idt(ctxt, &dt); eip_addr = dt.address + (irq << 2); cs_addr = dt.address + (irq << 2) + 2; - rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception); + rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; - rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception); + rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; @@ -1339,7 +1632,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, /* TODO: Add stack limit check */ - rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes); + rc = emulate_pop(ctxt, &temp_eip, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1347,12 +1640,12 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, if (temp_eip & ~0xffff) return emulate_gp(ctxt, 0); - rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); + rc = emulate_pop(ctxt, &cs, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes); + rc = emulate_pop(ctxt, &temp_eflags, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1394,15 +1687,31 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, } } -static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_jmp_far(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + unsigned short sel; + + memcpy(&sel, c->src.valptr + c->op_bytes, 2); + + rc = load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS); + if (rc != X86EMUL_CONTINUE) + return rc; + + c->eip = 0; + memcpy(&c->eip, c->src.valptr, c->op_bytes); + return X86EMUL_CONTINUE; +} + +static int em_grp1a(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; - return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); + return emulate_pop(ctxt, &c->dst.val, c->dst.bytes); } -static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) +static int em_grp2(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; switch (c->modrm_reg) { @@ -1429,10 +1738,10 @@ static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags); break; } + return X86EMUL_CONTINUE; } -static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_grp3(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; unsigned long *rax = &c->regs[VCPU_REGS_RAX]; @@ -1471,10 +1780,10 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_grp45(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; + int rc = X86EMUL_CONTINUE; switch (c->modrm_reg) { case 0: /* inc */ @@ -1488,21 +1797,23 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, old_eip = c->eip; c->eip = c->src.val; c->src.val = old_eip; - emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } case 4: /* jmp abs */ c->eip = c->src.val; break; + case 5: /* jmp far */ + rc = em_jmp_far(ctxt); + break; case 6: /* push */ - emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } - return X86EMUL_CONTINUE; + return rc; } -static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_grp9(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; u64 old = c->dst.orig_val64; @@ -1528,12 +1839,12 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, int rc; unsigned long cs; - rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); + rc = emulate_pop(ctxt, &c->eip, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; if (c->op_bytes == 4) c->eip = (u32)c->eip; - rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); + rc = emulate_pop(ctxt, &cs, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); @@ -1562,8 +1873,10 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, struct desc_struct *cs, struct desc_struct *ss) { + u16 selector; + memset(cs, 0, sizeof(struct desc_struct)); - ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu); + ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); memset(ss, 0, sizeof(struct desc_struct)); cs->l = 0; /* will be adjusted later */ @@ -1593,44 +1906,44 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; + u64 efer = 0; /* syscall is not available in real mode */ if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) return emulate_ud(ctxt); + ops->get_msr(ctxt, MSR_EFER, &efer); setup_syscalls_segments(ctxt, ops, &cs, &ss); - ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); + ops->get_msr(ctxt, MSR_STAR, &msr_data); msr_data >>= 32; cs_sel = (u16)(msr_data & 0xfffc); ss_sel = (u16)(msr_data + 8); - if (is_long_mode(ctxt->vcpu)) { + if (efer & EFER_LMA) { cs.d = 0; cs.l = 1; } - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); c->regs[VCPU_REGS_RCX] = c->eip; - if (is_long_mode(ctxt->vcpu)) { + if (efer & EFER_LMA) { #ifdef CONFIG_X86_64 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; - ops->get_msr(ctxt->vcpu, + ops->get_msr(ctxt, ctxt->mode == X86EMUL_MODE_PROT64 ? MSR_LSTAR : MSR_CSTAR, &msr_data); c->eip = msr_data; - ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); + ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); ctxt->eflags &= ~(msr_data | EFLG_RF); #endif } else { /* legacy mode */ - ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); + ops->get_msr(ctxt, MSR_STAR, &msr_data); c->eip = (u32)msr_data; ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); @@ -1646,7 +1959,9 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; + u64 efer = 0; + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); /* inject #GP if in real mode */ if (ctxt->mode == X86EMUL_MODE_REAL) return emulate_gp(ctxt, 0); @@ -1659,7 +1974,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) setup_syscalls_segments(ctxt, ops, &cs, &ss); - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); switch (ctxt->mode) { case X86EMUL_MODE_PROT32: if ((msr_data & 0xfffc) == 0x0) @@ -1676,21 +1991,18 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel &= ~SELECTOR_RPL_MASK; ss_sel = cs_sel + 8; ss_sel &= ~SELECTOR_RPL_MASK; - if (ctxt->mode == X86EMUL_MODE_PROT64 - || is_long_mode(ctxt->vcpu)) { + if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) { cs.d = 0; cs.l = 1; } - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); c->eip = msr_data; - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); c->regs[VCPU_REGS_RSP] = msr_data; return X86EMUL_CONTINUE; @@ -1719,7 +2031,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs.dpl = 3; ss.dpl = 3; - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); switch (usermode) { case X86EMUL_MODE_PROT32: cs_sel = (u16)(msr_data + 16); @@ -1739,10 +2051,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel |= SELECTOR_RPL_MASK; ss_sel |= SELECTOR_RPL_MASK; - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); c->eip = c->regs[VCPU_REGS_RDX]; c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; @@ -1759,7 +2069,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, if (ctxt->mode == X86EMUL_MODE_VM86) return true; iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - return ops->cpl(ctxt->vcpu) > iopl; + return ops->cpl(ctxt) > iopl; } static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, @@ -1769,11 +2079,11 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, struct desc_struct tr_seg; u32 base3; int r; - u16 io_bitmap_ptr, perm, bit_idx = port & 0x7; + u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7; unsigned mask = (1 << len) - 1; unsigned long base; - ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu); + ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR); if (!tr_seg.p) return false; if (desc_limit_scaled(&tr_seg) < 103) @@ -1782,13 +2092,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, #ifdef CONFIG_X86_64 base |= ((u64)base3) << 32; #endif - r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL); + r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL); if (r != X86EMUL_CONTINUE) return false; if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) return false; - r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu, - NULL); + r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL); if (r != X86EMUL_CONTINUE) return false; if ((perm >> bit_idx) & mask) @@ -1829,11 +2138,11 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, tss->si = c->regs[VCPU_REGS_RSI]; tss->di = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); - tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); + tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); + tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, @@ -1858,11 +2167,11 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); + set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR); + set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); /* * Now load segment descriptors. If fault happenes at this stage @@ -1896,7 +2205,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, int ret; u32 new_tss_base = get_desc_base(new_desc); - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -1904,13 +2213,13 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, save_state_to_tss16(ctxt, ops, &tss_seg); - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -1919,10 +2228,10 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, if (old_tss_sel != 0xffff) { tss_seg.prev_task_link = old_tss_sel; - ret = ops->write_std(new_tss_base, + ret = ops->write_std(ctxt, new_tss_base, &tss_seg.prev_task_link, sizeof tss_seg.prev_task_link, - ctxt->vcpu, &ctxt->exception); + &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; @@ -1937,7 +2246,7 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, { struct decode_cache *c = &ctxt->decode; - tss->cr3 = ops->get_cr(3, ctxt->vcpu); + tss->cr3 = ops->get_cr(ctxt, 3); tss->eip = c->eip; tss->eflags = ctxt->eflags; tss->eax = c->regs[VCPU_REGS_RAX]; @@ -1949,13 +2258,13 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, tss->esi = c->regs[VCPU_REGS_RSI]; tss->edi = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); - tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); - tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); - tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); + tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); + tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS); + tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS); + tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, @@ -1965,7 +2274,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, struct decode_cache *c = &ctxt->decode; int ret; - if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) + if (ops->set_cr(ctxt, 3, tss->cr3)) return emulate_gp(ctxt, 0); c->eip = tss->eip; ctxt->eflags = tss->eflags | 2; @@ -1982,13 +2291,13 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); - ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); - ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); + set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); + set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); + set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS); + set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); /* * Now load segment descriptors. If fault happenes at this stage @@ -2028,7 +2337,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, int ret; u32 new_tss_base = get_desc_base(new_desc); - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2036,13 +2345,13 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, save_state_to_tss32(ctxt, ops, &tss_seg); - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2051,10 +2360,10 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, if (old_tss_sel != 0xffff) { tss_seg.prev_task_link = old_tss_sel; - ret = ops->write_std(new_tss_base, + ret = ops->write_std(ctxt, new_tss_base, &tss_seg.prev_task_link, sizeof tss_seg.prev_task_link, - ctxt->vcpu, &ctxt->exception); + &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; @@ -2070,9 +2379,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, { struct desc_struct curr_tss_desc, next_tss_desc; int ret; - u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); + u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); ulong old_tss_base = - ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu); + ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); u32 desc_limit; /* FIXME: old_tss_base == ~0 ? */ @@ -2088,7 +2397,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, if (reason != TASK_SWITCH_IRET) { if ((tss_selector & 3) > next_tss_desc.dpl || - ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) + ops->cpl(ctxt) > next_tss_desc.dpl) return emulate_gp(ctxt, 0); } @@ -2132,9 +2441,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, &next_tss_desc); } - ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); - ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu); - ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); + ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS); + ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR); if (has_error_code) { struct decode_cache *c = &ctxt->decode; @@ -2142,7 +2450,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; c->lock_prefix = 0; c->src.val = (unsigned long) error_code; - emulate_push(ctxt, ops); + ret = em_push(ctxt); } return ret; @@ -2162,13 +2470,10 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, has_error_code, error_code); - if (rc == X86EMUL_CONTINUE) { - rc = writeback(ctxt, ops); - if (rc == X86EMUL_CONTINUE) - ctxt->eip = c->eip; - } + if (rc == X86EMUL_CONTINUE) + ctxt->eip = c->eip; - return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, @@ -2182,12 +2487,6 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, op->addr.mem.seg = seg; } -static int em_push(struct x86_emulate_ctxt *ctxt) -{ - emulate_push(ctxt, ctxt->ops); - return X86EMUL_CONTINUE; -} - static int em_das(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -2234,7 +2533,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) ulong old_eip; int rc; - old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); + old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); old_eip = c->eip; memcpy(&sel, c->src.valptr + c->op_bytes, 2); @@ -2245,20 +2544,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) memcpy(&c->eip, c->src.valptr, c->op_bytes); c->src.val = old_cs; - emulate_push(ctxt, ctxt->ops); - rc = writeback(ctxt, ctxt->ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = old_eip; - emulate_push(ctxt, ctxt->ops); - rc = writeback(ctxt, ctxt->ops); - if (rc != X86EMUL_CONTINUE) - return rc; - - c->dst.type = OP_NONE; - - return X86EMUL_CONTINUE; + return em_push(ctxt); } static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) @@ -2269,13 +2560,79 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) c->dst.type = OP_REG; c->dst.addr.reg = &c->eip; c->dst.bytes = c->op_bytes; - rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); + rc = emulate_pop(ctxt, &c->dst.val, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val); return X86EMUL_CONTINUE; } +static int em_add(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_or(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_adc(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_sbb(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_and(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_sub(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_xor(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_cmp(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + static int em_imul(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -2306,13 +2663,10 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) static int em_rdtsc(struct x86_emulate_ctxt *ctxt) { - unsigned cpl = ctxt->ops->cpl(ctxt->vcpu); struct decode_cache *c = &ctxt->decode; u64 tsc = 0; - if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) - return emulate_gp(ctxt, 0); - ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); + ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); c->regs[VCPU_REGS_RAX] = (u32)tsc; c->regs[VCPU_REGS_RDX] = tsc >> 32; return X86EMUL_CONTINUE; @@ -2325,22 +2679,375 @@ static int em_mov(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_movdqu(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes); + return X86EMUL_CONTINUE; +} + +static int em_invlpg(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + ulong linear; + + rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear); + if (rc == X86EMUL_CONTINUE) + ctxt->ops->invlpg(ctxt, linear); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_clts(struct x86_emulate_ctxt *ctxt) +{ + ulong cr0; + + cr0 = ctxt->ops->get_cr(ctxt, 0); + cr0 &= ~X86_CR0_TS; + ctxt->ops->set_cr(ctxt, 0, cr0); + return X86EMUL_CONTINUE; +} + +static int em_vmcall(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + + if (c->modrm_mod != 3 || c->modrm_rm != 1) + return X86EMUL_UNHANDLEABLE; + + rc = ctxt->ops->fix_hypercall(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + + /* Let the processor re-execute the fixed hypercall */ + c->eip = ctxt->eip; + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_lgdt(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + struct desc_ptr desc_ptr; + int rc; + + rc = read_descriptor(ctxt, c->src.addr.mem, + &desc_ptr.size, &desc_ptr.address, + c->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->set_gdt(ctxt, &desc_ptr); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_vmmcall(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + + rc = ctxt->ops->fix_hypercall(ctxt); + + /* Disable writeback. */ + c->dst.type = OP_NONE; + return rc; +} + +static int em_lidt(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + struct desc_ptr desc_ptr; + int rc; + + rc = read_descriptor(ctxt, c->src.addr.mem, + &desc_ptr.size, &desc_ptr.address, + c->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->set_idt(ctxt, &desc_ptr); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_smsw(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->dst.bytes = 2; + c->dst.val = ctxt->ops->get_cr(ctxt, 0); + return X86EMUL_CONTINUE; +} + +static int em_lmsw(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul) + | (c->src.val & 0x0f)); + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static bool valid_cr(int nr) +{ + switch (nr) { + case 0: + case 2 ... 4: + case 8: + return true; + default: + return false; + } +} + +static int check_cr_read(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + if (!valid_cr(c->modrm_reg)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_cr_write(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + u64 new_val = c->src.val64; + int cr = c->modrm_reg; + u64 efer = 0; + + static u64 cr_reserved_bits[] = { + 0xffffffff00000000ULL, + 0, 0, 0, /* CR3 checked later */ + CR4_RESERVED_BITS, + 0, 0, 0, + CR8_RESERVED_BITS, + }; + + if (!valid_cr(cr)) + return emulate_ud(ctxt); + + if (new_val & cr_reserved_bits[cr]) + return emulate_gp(ctxt, 0); + + switch (cr) { + case 0: { + u64 cr4; + if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) || + ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD))) + return emulate_gp(ctxt, 0); + + cr4 = ctxt->ops->get_cr(ctxt, 4); + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + + if ((new_val & X86_CR0_PG) && (efer & EFER_LME) && + !(cr4 & X86_CR4_PAE)) + return emulate_gp(ctxt, 0); + + break; + } + case 3: { + u64 rsvd = 0; + + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + if (efer & EFER_LMA) + rsvd = CR3_L_MODE_RESERVED_BITS; + else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE) + rsvd = CR3_PAE_RESERVED_BITS; + else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG) + rsvd = CR3_NONPAE_RESERVED_BITS; + + if (new_val & rsvd) + return emulate_gp(ctxt, 0); + + break; + } + case 4: { + u64 cr4; + + cr4 = ctxt->ops->get_cr(ctxt, 4); + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + + if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) + return emulate_gp(ctxt, 0); + + break; + } + } + + return X86EMUL_CONTINUE; +} + +static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) +{ + unsigned long dr7; + + ctxt->ops->get_dr(ctxt, 7, &dr7); + + /* Check if DR7.Global_Enable is set */ + return dr7 & (1 << 13); +} + +static int check_dr_read(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int dr = c->modrm_reg; + u64 cr4; + + if (dr > 7) + return emulate_ud(ctxt); + + cr4 = ctxt->ops->get_cr(ctxt, 4); + if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) + return emulate_ud(ctxt); + + if (check_dr7_gd(ctxt)) + return emulate_db(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_dr_write(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + u64 new_val = c->src.val64; + int dr = c->modrm_reg; + + if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL)) + return emulate_gp(ctxt, 0); + + return check_dr_read(ctxt); +} + +static int check_svme(struct x86_emulate_ctxt *ctxt) +{ + u64 efer; + + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + + if (!(efer & EFER_SVME)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_svme_pa(struct x86_emulate_ctxt *ctxt) +{ + u64 rax = ctxt->decode.regs[VCPU_REGS_RAX]; + + /* Valid physical address? */ + if (rax & 0xffff000000000000ULL) + return emulate_gp(ctxt, 0); + + return check_svme(ctxt); +} + +static int check_rdtsc(struct x86_emulate_ctxt *ctxt) +{ + u64 cr4 = ctxt->ops->get_cr(ctxt, 4); + + if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_rdpmc(struct x86_emulate_ctxt *ctxt) +{ + u64 cr4 = ctxt->ops->get_cr(ctxt, 4); + u64 rcx = ctxt->decode.regs[VCPU_REGS_RCX]; + + if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || + (rcx > 3)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + +static int check_perm_in(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->dst.bytes = min(c->dst.bytes, 4u); + if (!emulator_io_permited(ctxt, ctxt->ops, c->src.val, c->dst.bytes)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + +static int check_perm_out(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->src.bytes = min(c->src.bytes, 4u); + if (!emulator_io_permited(ctxt, ctxt->ops, c->dst.val, c->src.bytes)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + #define D(_y) { .flags = (_y) } +#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } +#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ + .check_perm = (_p) } #define N D(0) +#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } -#define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } +#define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } +#define II(_f, _e, _i) \ + { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } +#define IIP(_f, _e, _i, _p) \ + { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \ + .check_perm = (_p) } +#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } #define D2bv(_f) D((_f) | ByteOp), D(_f) +#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) -#define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM), \ - D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \ - D2bv(((_f) & ~Lock) | DstAcc | SrcImm) +#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ + I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ + I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) +static struct opcode group7_rm1[] = { + DI(SrcNone | ModRM | Priv, monitor), + DI(SrcNone | ModRM | Priv, mwait), + N, N, N, N, N, N, +}; + +static struct opcode group7_rm3[] = { + DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), + II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall), + DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), + DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), + DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme), +}; + +static struct opcode group7_rm7[] = { + N, + DIP(SrcNone | ModRM, rdtscp, check_rdtsc), + N, N, N, N, N, N, +}; static struct opcode group1[] = { - X7(D(Lock)), N + I(Lock, em_add), + I(Lock, em_or), + I(Lock, em_adc), + I(Lock, em_sbb), + I(Lock, em_and), + I(Lock, em_sub), + I(Lock, em_xor), + I(0, em_cmp), }; static struct opcode group1A[] = { @@ -2366,16 +3073,28 @@ static struct opcode group5[] = { D(SrcMem | ModRM | Stack), N, }; +static struct opcode group6[] = { + DI(ModRM | Prot, sldt), + DI(ModRM | Prot, str), + DI(ModRM | Prot | Priv, lldt), + DI(ModRM | Prot | Priv, ltr), + N, N, N, N, +}; + static struct group_dual group7 = { { - N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv), - D(SrcNone | ModRM | DstMem | Mov), N, - D(SrcMem16 | ModRM | Mov | Priv), - D(SrcMem | ModRM | ByteOp | Priv | NoAccess), + DI(ModRM | Mov | DstMem | Priv, sgdt), + DI(ModRM | Mov | DstMem | Priv, sidt), + II(ModRM | SrcMem | Priv, em_lgdt, lgdt), + II(ModRM | SrcMem | Priv, em_lidt, lidt), + II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, + II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), + II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg), }, { - D(SrcNone | ModRM | Priv | VendorSpecific), N, - N, D(SrcNone | ModRM | Priv | VendorSpecific), - D(SrcNone | ModRM | DstMem | Mov), N, - D(SrcMem16 | ModRM | Mov | Priv), N, + I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall), + EXT(0, group7_rm1), + N, EXT(0, group7_rm3), + II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, + II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7), } }; static struct opcode group8[] = { @@ -2394,35 +3113,40 @@ static struct opcode group11[] = { I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), }; +static struct gprefix pfx_0f_6f_0f_7f = { + N, N, N, I(Sse, em_movdqu), +}; + static struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ - D6ALU(Lock), + I6ALU(Lock, em_add), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x08 - 0x0F */ - D6ALU(Lock), + I6ALU(Lock, em_or), D(ImplicitOps | Stack | No64), N, /* 0x10 - 0x17 */ - D6ALU(Lock), + I6ALU(Lock, em_adc), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x18 - 0x1F */ - D6ALU(Lock), + I6ALU(Lock, em_sbb), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x20 - 0x27 */ - D6ALU(Lock), N, N, + I6ALU(Lock, em_and), N, N, /* 0x28 - 0x2F */ - D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das), + I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), /* 0x30 - 0x37 */ - D6ALU(Lock), N, N, + I6ALU(Lock, em_xor), N, N, /* 0x38 - 0x3F */ - D6ALU(0), N, N, + I6ALU(0, em_cmp), N, N, /* 0x40 - 0x4F */ X16(D(DstReg)), /* 0x50 - 0x57 */ X8(I(SrcReg | Stack, em_push)), /* 0x58 - 0x5F */ - X8(D(DstReg | Stack)), + X8(I(DstReg | Stack, em_pop)), /* 0x60 - 0x67 */ - D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), + I(ImplicitOps | Stack | No64, em_pusha), + I(ImplicitOps | Stack | No64, em_popa), N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , N, N, N, N, /* 0x68 - 0x6F */ @@ -2430,8 +3154,8 @@ static struct opcode opcode_table[256] = { I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), I(SrcImmByte | Mov | Stack, em_push), I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), - D2bv(DstDI | Mov | String), /* insb, insw/insd */ - D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */ + D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */ + D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ X16(D(SrcImmByte)), /* 0x80 - 0x87 */ @@ -2446,21 +3170,22 @@ static struct opcode opcode_table[256] = { D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg), D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A), /* 0x90 - 0x97 */ - X8(D(SrcAcc | DstReg)), + DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)), /* 0x98 - 0x9F */ D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), I(SrcImmFAddr | No64, em_call_far), N, - D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N, + II(ImplicitOps | Stack, em_pushf, pushf), + II(ImplicitOps | Stack, em_popf, popf), N, N, /* 0xA0 - 0xA7 */ I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), I2bv(SrcSI | DstDI | Mov | String, em_mov), - D2bv(SrcSI | DstDI | String), + I2bv(SrcSI | DstDI | String, em_cmp), /* 0xA8 - 0xAF */ D2bv(DstAcc | SrcImm), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - D2bv(SrcAcc | DstDI | String), + I2bv(SrcAcc | DstDI | String, em_cmp), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ @@ -2473,7 +3198,8 @@ static struct opcode opcode_table[256] = { G(ByteOp, group11), G(0, group11), /* 0xC8 - 0xCF */ N, N, N, D(ImplicitOps | Stack), - D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps), + D(ImplicitOps), DI(SrcImmByte, intn), + D(ImplicitOps | No64), DI(ImplicitOps, iret), /* 0xD0 - 0xD7 */ D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), N, N, N, N, @@ -2481,14 +3207,17 @@ static struct opcode opcode_table[256] = { N, N, N, N, N, N, N, N, /* 0xE0 - 0xE7 */ X4(D(SrcImmByte)), - D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte), + D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in), + D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out), /* 0xE8 - 0xEF */ D(SrcImm | Stack), D(SrcImm | ImplicitOps), D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), - D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps), + D2bvIP(SrcNone | DstAcc, in, check_perm_in), + D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out), /* 0xF0 - 0xF7 */ - N, N, N, N, - D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3), + N, DI(ImplicitOps, icebp), N, N, + DI(ImplicitOps | Priv, hlt), D(ImplicitOps), + G(ByteOp, group3), G(0, group3), /* 0xF8 - 0xFF */ D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), @@ -2496,20 +3225,24 @@ static struct opcode opcode_table[256] = { static struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ - N, GD(0, &group7), N, N, - N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, - D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, + G(0, group6), GD(0, &group7), N, N, + N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N, + DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM), N, N, /* 0x10 - 0x1F */ N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, /* 0x20 - 0x2F */ - D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264), - D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264), + DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), + DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), + DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write), + DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write), N, N, N, N, N, N, N, N, N, N, N, N, /* 0x30 - 0x3F */ - D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), - D(ImplicitOps | Priv), N, + DI(ImplicitOps | Priv, wrmsr), + IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), + DI(ImplicitOps | Priv, rdmsr), + DIP(ImplicitOps | Priv, rdpmc, check_rdpmc), D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), N, N, N, N, N, N, N, N, N, N, @@ -2518,21 +3251,27 @@ static struct opcode twobyte_table[256] = { /* 0x50 - 0x5F */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 0x60 - 0x6F */ - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f), /* 0x70 - 0x7F */ - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f), /* 0x80 - 0x8F */ X16(D(SrcImm)), /* 0x90 - 0x9F */ X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), /* 0xA0 - 0xA7 */ D(ImplicitOps | Stack), D(ImplicitOps | Stack), - N, D(DstMem | SrcReg | ModRM | BitOp), + DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), D(DstMem | SrcReg | Src2ImmByte | ModRM), D(DstMem | SrcReg | Src2CL | ModRM), N, N, /* 0xA8 - 0xAF */ D(ImplicitOps | Stack), D(ImplicitOps | Stack), - N, D(DstMem | SrcReg | ModRM | BitOp | Lock), + DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), D(DstMem | SrcReg | Src2ImmByte | ModRM), D(DstMem | SrcReg | Src2CL | ModRM), D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), @@ -2564,10 +3303,13 @@ static struct opcode twobyte_table[256] = { #undef G #undef GD #undef I +#undef GP +#undef EXT #undef D2bv +#undef D2bvIP #undef I2bv -#undef D6ALU +#undef I6ALU static unsigned imm_size(struct decode_cache *c) { @@ -2625,8 +3367,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; int mode = ctxt->mode; - int def_op_bytes, def_ad_bytes, dual, goffset; - struct opcode opcode, *g_mod012, *g_mod3; + int def_op_bytes, def_ad_bytes, goffset, simd_prefix; + bool op_prefix = false; + struct opcode opcode; struct operand memop = { .type = OP_NONE }; c->eip = ctxt->eip; @@ -2634,7 +3377,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) c->fetch.end = c->fetch.start + insn_len; if (insn_len > 0) memcpy(c->fetch.data, insn, insn_len); - ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); switch (mode) { case X86EMUL_MODE_REAL: @@ -2662,6 +3404,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) for (;;) { switch (c->b = insn_fetch(u8, 1, c->eip)) { case 0x66: /* operand-size override */ + op_prefix = true; /* switch between 2/4 bytes */ c->op_bytes = def_op_bytes ^ 6; break; @@ -2692,10 +3435,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) c->lock_prefix = 1; break; case 0xf2: /* REPNE/REPNZ */ - c->rep_prefix = REPNE_PREFIX; - break; case 0xf3: /* REP/REPE/REPZ */ - c->rep_prefix = REPE_PREFIX; + c->rep_prefix = c->b; break; default: goto done_prefixes; @@ -2722,29 +3463,49 @@ done_prefixes: } c->d = opcode.flags; - if (c->d & Group) { - dual = c->d & GroupDual; - c->modrm = insn_fetch(u8, 1, c->eip); - --c->eip; - - if (c->d & GroupDual) { - g_mod012 = opcode.u.gdual->mod012; - g_mod3 = opcode.u.gdual->mod3; - } else - g_mod012 = g_mod3 = opcode.u.group; - - c->d &= ~(Group | GroupDual); - - goffset = (c->modrm >> 3) & 7; + while (c->d & GroupMask) { + switch (c->d & GroupMask) { + case Group: + c->modrm = insn_fetch(u8, 1, c->eip); + --c->eip; + goffset = (c->modrm >> 3) & 7; + opcode = opcode.u.group[goffset]; + break; + case GroupDual: + c->modrm = insn_fetch(u8, 1, c->eip); + --c->eip; + goffset = (c->modrm >> 3) & 7; + if ((c->modrm >> 6) == 3) + opcode = opcode.u.gdual->mod3[goffset]; + else + opcode = opcode.u.gdual->mod012[goffset]; + break; + case RMExt: + goffset = c->modrm & 7; + opcode = opcode.u.group[goffset]; + break; + case Prefix: + if (c->rep_prefix && op_prefix) + return X86EMUL_UNHANDLEABLE; + simd_prefix = op_prefix ? 0x66 : c->rep_prefix; + switch (simd_prefix) { + case 0x00: opcode = opcode.u.gprefix->pfx_no; break; + case 0x66: opcode = opcode.u.gprefix->pfx_66; break; + case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break; + case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; + } + break; + default: + return X86EMUL_UNHANDLEABLE; + } - if ((c->modrm >> 6) == 3) - opcode = g_mod3[goffset]; - else - opcode = g_mod012[goffset]; + c->d &= ~GroupMask; c->d |= opcode.flags; } c->execute = opcode.u.execute; + c->check_perm = opcode.check_perm; + c->intercept = opcode.intercept; /* Unrecognised? */ if (c->d == 0 || (c->d & Undefined)) @@ -2763,6 +3524,9 @@ done_prefixes: c->op_bytes = 4; } + if (c->d & Sse) + c->op_bytes = 16; + /* ModRM and SIB bytes. */ if (c->d & ModRM) { rc = decode_modrm(ctxt, ops, &memop); @@ -2776,7 +3540,7 @@ done_prefixes: if (!c->has_seg_override) set_seg_override(c, VCPU_SREG_DS); - memop.addr.mem.seg = seg_override(ctxt, ops, c); + memop.addr.mem.seg = seg_override(ctxt, c); if (memop.type == OP_MEM && c->ad_bytes != 8) memop.addr.mem.ea = (u32)memop.addr.mem.ea; @@ -2792,7 +3556,7 @@ done_prefixes: case SrcNone: break; case SrcReg: - decode_register_operand(&c->src, c, 0); + decode_register_operand(ctxt, &c->src, c, 0); break; case SrcMem16: memop.bytes = 2; @@ -2836,7 +3600,7 @@ done_prefixes: c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->src.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSI]); - c->src.addr.mem.seg = seg_override(ctxt, ops, c), + c->src.addr.mem.seg = seg_override(ctxt, c); c->src.val = 0; break; case SrcImmFAddr: @@ -2883,7 +3647,7 @@ done_prefixes: /* Decode and fetch the destination operand: register or memory. */ switch (c->d & DstMask) { case DstReg: - decode_register_operand(&c->dst, c, + decode_register_operand(ctxt, &c->dst, c, c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); break; case DstImmUByte: @@ -2926,7 +3690,7 @@ done_prefixes: } done: - return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) @@ -2979,12 +3743,51 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + if ((c->d & Sse) + && ((ops->get_cr(ctxt, 0) & X86_CR0_EM) + || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { + rc = emulate_ud(ctxt); + goto done; + } + + if ((c->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { + rc = emulate_nm(ctxt); + goto done; + } + + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_PRE_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } + /* Privileged instruction can be executed only in CPL=0 */ - if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { + if ((c->d & Priv) && ops->cpl(ctxt)) { rc = emulate_gp(ctxt, 0); goto done; } + /* Instruction can only be executed in protected mode */ + if ((c->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) { + rc = emulate_ud(ctxt); + goto done; + } + + /* Do instruction specific permission checks */ + if (c->check_perm) { + rc = c->check_perm(ctxt); + if (rc != X86EMUL_CONTINUE) + goto done; + } + + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_POST_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if (c->rep_prefix && (c->d & String)) { /* All REP prefixes have the same first termination condition */ if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { @@ -2994,16 +3797,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { - rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem), - c->src.valptr, c->src.bytes); + rc = segmented_read(ctxt, c->src.addr.mem, + c->src.valptr, c->src.bytes); if (rc != X86EMUL_CONTINUE) goto done; c->src.orig_val64 = c->src.val64; } if (c->src2.type == OP_MEM) { - rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem), - &c->src2.val, c->src2.bytes); + rc = segmented_read(ctxt, c->src2.addr.mem, + &c->src2.val, c->src2.bytes); if (rc != X86EMUL_CONTINUE) goto done; } @@ -3014,7 +3817,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { /* optimisation - avoid slow emulated read if Mov */ - rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem), + rc = segmented_read(ctxt, c->dst.addr.mem, &c->dst.val, c->dst.bytes); if (rc != X86EMUL_CONTINUE) goto done; @@ -3023,6 +3826,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_POST_MEMACCESS); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if (c->execute) { rc = c->execute(ctxt); if (rc != X86EMUL_CONTINUE) @@ -3034,75 +3844,33 @@ special_insn: goto twobyte_insn; switch (c->b) { - case 0x00 ... 0x05: - add: /* add */ - emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); - break; case 0x06: /* push es */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); break; case 0x07: /* pop es */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); break; - case 0x08 ... 0x0d: - or: /* or */ - emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); - break; case 0x0e: /* push cs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); - break; - case 0x10 ... 0x15: - adc: /* adc */ - emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); break; case 0x16: /* push ss */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); break; case 0x17: /* pop ss */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); break; - case 0x18 ... 0x1d: - sbb: /* sbb */ - emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); - break; case 0x1e: /* push ds */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); break; case 0x1f: /* pop ds */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); break; - case 0x20 ... 0x25: - and: /* and */ - emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); - break; - case 0x28 ... 0x2d: - sub: /* sub */ - emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); - break; - case 0x30 ... 0x35: - xor: /* xor */ - emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); - break; - case 0x38 ... 0x3d: - cmp: /* cmp */ - emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); - break; case 0x40 ... 0x47: /* inc r16/r32 */ emulate_1op("inc", c->dst, ctxt->eflags); break; case 0x48 ... 0x4f: /* dec r16/r32 */ emulate_1op("dec", c->dst, ctxt->eflags); break; - case 0x58 ... 0x5f: /* pop reg */ - pop_instruction: - rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); - break; - case 0x60: /* pusha */ - rc = emulate_pusha(ctxt, ops); - break; - case 0x61: /* popa */ - rc = emulate_popa(ctxt, ops); - break; case 0x63: /* movsxd */ if (ctxt->mode != X86EMUL_MODE_PROT64) goto cannot_emulate; @@ -3121,26 +3889,6 @@ special_insn: if (test_cc(c->b, ctxt->eflags)) jmp_rel(c, c->src.val); break; - case 0x80 ... 0x83: /* Grp1 */ - switch (c->modrm_reg) { - case 0: - goto add; - case 1: - goto or; - case 2: - goto adc; - case 3: - goto sbb; - case 4: - goto and; - case 5: - goto sub; - case 6: - goto xor; - case 7: - goto cmp; - } - break; case 0x84 ... 0x85: test: emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); @@ -3162,7 +3910,7 @@ special_insn: rc = emulate_ud(ctxt); goto done; } - c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); + c->dst.val = get_segment_selector(ctxt, c->modrm_reg); break; case 0x8d: /* lea r16/r32, m */ c->dst.val = c->src.addr.mem.ea; @@ -3187,7 +3935,7 @@ special_insn: break; } case 0x8f: /* pop (sole member of Grp1a) */ - rc = emulate_grp1a(ctxt, ops); + rc = em_grp1a(ctxt); break; case 0x90 ... 0x97: /* nop / xchg reg, rax */ if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) @@ -3200,31 +3948,17 @@ special_insn: case 8: c->dst.val = (s32)c->dst.val; break; } break; - case 0x9c: /* pushf */ - c->src.val = (unsigned long) ctxt->eflags; - emulate_push(ctxt, ops); - break; - case 0x9d: /* popf */ - c->dst.type = OP_REG; - c->dst.addr.reg = &ctxt->eflags; - c->dst.bytes = c->op_bytes; - rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); - break; - case 0xa6 ... 0xa7: /* cmps */ - c->dst.type = OP_NONE; /* Disable writeback. */ - goto cmp; case 0xa8 ... 0xa9: /* test ax, imm */ goto test; - case 0xae ... 0xaf: /* scas */ - goto cmp; case 0xc0 ... 0xc1: - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xc3: /* ret */ c->dst.type = OP_REG; c->dst.addr.reg = &c->eip; c->dst.bytes = c->op_bytes; - goto pop_instruction; + rc = em_pop(ctxt); + break; case 0xc4: /* les */ rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES); break; @@ -3252,11 +3986,11 @@ special_insn: rc = emulate_iret(ctxt, ops); break; case 0xd0 ... 0xd1: /* Grp2 */ - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xd2 ... 0xd3: /* Grp2 */ c->src.val = c->regs[VCPU_REGS_RCX]; - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xe0 ... 0xe2: /* loop/loopz/loopnz */ register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); @@ -3278,23 +4012,14 @@ special_insn: long int rel = c->src.val; c->src.val = (unsigned long) c->eip; jmp_rel(c, rel); - emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } case 0xe9: /* jmp rel */ goto jmp; - case 0xea: { /* jmp far */ - unsigned short sel; - jump_far: - memcpy(&sel, c->src.valptr + c->op_bytes, 2); - - if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS)) - goto done; - - c->eip = 0; - memcpy(&c->eip, c->src.valptr, c->op_bytes); + case 0xea: /* jmp far */ + rc = em_jmp_far(ctxt); break; - } case 0xeb: jmp: /* jmp rel short */ jmp_rel(c, c->src.val); @@ -3304,11 +4029,6 @@ special_insn: case 0xed: /* in (e/r)ax,dx */ c->src.val = c->regs[VCPU_REGS_RDX]; do_io_in: - c->dst.bytes = min(c->dst.bytes, 4u); - if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { - rc = emulate_gp(ctxt, 0); - goto done; - } if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, &c->dst.val)) goto done; /* IO is needed */ @@ -3317,25 +4037,19 @@ special_insn: case 0xef: /* out dx,(e/r)ax */ c->dst.val = c->regs[VCPU_REGS_RDX]; do_io_out: - c->src.bytes = min(c->src.bytes, 4u); - if (!emulator_io_permited(ctxt, ops, c->dst.val, - c->src.bytes)) { - rc = emulate_gp(ctxt, 0); - goto done; - } - ops->pio_out_emulated(c->src.bytes, c->dst.val, - &c->src.val, 1, ctxt->vcpu); + ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val, + &c->src.val, 1); c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf4: /* hlt */ - ctxt->vcpu->arch.halt_request = 1; + ctxt->ops->halt(ctxt); break; case 0xf5: /* cmc */ /* complement carry flag from eflags reg */ ctxt->eflags ^= EFLG_CF; break; case 0xf6 ... 0xf7: /* Grp3 */ - rc = emulate_grp3(ctxt, ops); + rc = em_grp3(ctxt); break; case 0xf8: /* clc */ ctxt->eflags &= ~EFLG_CF; @@ -3366,13 +4080,11 @@ special_insn: ctxt->eflags |= EFLG_DF; break; case 0xfe: /* Grp4 */ - grp45: - rc = emulate_grp45(ctxt, ops); + rc = em_grp45(ctxt); break; case 0xff: /* Grp5 */ - if (c->modrm_reg == 5) - goto jump_far; - goto grp45; + rc = em_grp45(ctxt); + break; default: goto cannot_emulate; } @@ -3381,7 +4093,7 @@ special_insn: goto done; writeback: - rc = writeback(ctxt, ops); + rc = writeback(ctxt); if (rc != X86EMUL_CONTINUE) goto done; @@ -3392,7 +4104,7 @@ writeback: c->dst.type = saved_dst_type; if ((c->d & SrcMask) == SrcSI) - string_addr_inc(ctxt, seg_override(ctxt, ops, c), + string_addr_inc(ctxt, seg_override(ctxt, c), VCPU_REGS_RSI, &c->src); if ((c->d & DstMask) == DstDI) @@ -3427,115 +4139,34 @@ writeback: done: if (rc == X86EMUL_PROPAGATE_FAULT) ctxt->have_exception = true; + if (rc == X86EMUL_INTERCEPTED) + return EMULATION_INTERCEPTED; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; twobyte_insn: switch (c->b) { - case 0x01: /* lgdt, lidt, lmsw */ - switch (c->modrm_reg) { - u16 size; - unsigned long address; - - case 0: /* vmcall */ - if (c->modrm_mod != 3 || c->modrm_rm != 1) - goto cannot_emulate; - - rc = kvm_fix_hypercall(ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) - goto done; - - /* Let the processor re-execute the fixed hypercall */ - c->eip = ctxt->eip; - /* Disable writeback. */ - c->dst.type = OP_NONE; - break; - case 2: /* lgdt */ - rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &size, &address, c->op_bytes); - if (rc != X86EMUL_CONTINUE) - goto done; - realmode_lgdt(ctxt->vcpu, size, address); - /* Disable writeback. */ - c->dst.type = OP_NONE; - break; - case 3: /* lidt/vmmcall */ - if (c->modrm_mod == 3) { - switch (c->modrm_rm) { - case 1: - rc = kvm_fix_hypercall(ctxt->vcpu); - break; - default: - goto cannot_emulate; - } - } else { - rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &size, &address, - c->op_bytes); - if (rc != X86EMUL_CONTINUE) - goto done; - realmode_lidt(ctxt->vcpu, size, address); - } - /* Disable writeback. */ - c->dst.type = OP_NONE; - break; - case 4: /* smsw */ - c->dst.bytes = 2; - c->dst.val = ops->get_cr(0, ctxt->vcpu); - break; - case 6: /* lmsw */ - ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) | - (c->src.val & 0x0f), ctxt->vcpu); - c->dst.type = OP_NONE; - break; - case 5: /* not defined */ - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - case 7: /* invlpg*/ - emulate_invlpg(ctxt->vcpu, - linear(ctxt, c->src.addr.mem)); - /* Disable writeback. */ - c->dst.type = OP_NONE; - break; - default: - goto cannot_emulate; - } - break; case 0x05: /* syscall */ rc = emulate_syscall(ctxt, ops); break; case 0x06: - emulate_clts(ctxt->vcpu); + rc = em_clts(ctxt); break; case 0x09: /* wbinvd */ - kvm_emulate_wbinvd(ctxt->vcpu); + (ctxt->ops->wbinvd)(ctxt); break; case 0x08: /* invd */ case 0x0d: /* GrpP (prefetch) */ case 0x18: /* Grp16 (prefetch/nop) */ break; case 0x20: /* mov cr, reg */ - switch (c->modrm_reg) { - case 1: - case 5 ... 7: - case 9 ... 15: - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } - c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); + c->dst.val = ops->get_cr(ctxt, c->modrm_reg); break; case 0x21: /* mov from dr to reg */ - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && - (c->modrm_reg == 4 || c->modrm_reg == 5)) { - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } - ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); + ops->get_dr(ctxt, c->modrm_reg, &c->dst.val); break; case 0x22: /* mov reg, cr */ - if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) { + if (ops->set_cr(ctxt, c->modrm_reg, c->src.val)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; @@ -3543,16 +4174,9 @@ twobyte_insn: c->dst.type = OP_NONE; break; case 0x23: /* mov from reg to dr */ - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && - (c->modrm_reg == 4 || c->modrm_reg == 5)) { - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } - - if (ops->set_dr(c->modrm_reg, c->src.val & + if (ops->set_dr(ctxt, c->modrm_reg, c->src.val & ((ctxt->mode == X86EMUL_MODE_PROT64) ? - ~0ULL : ~0U), ctxt->vcpu) < 0) { + ~0ULL : ~0U)) < 0) { /* #UD condition is already handled by the code above */ emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; @@ -3565,7 +4189,7 @@ twobyte_insn: /* wrmsr */ msr_data = (u32)c->regs[VCPU_REGS_RAX] | ((u64)c->regs[VCPU_REGS_RDX] << 32); - if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { + if (ops->set_msr(ctxt, c->regs[VCPU_REGS_RCX], msr_data)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; @@ -3574,7 +4198,7 @@ twobyte_insn: break; case 0x32: /* rdmsr */ - if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { + if (ops->get_msr(ctxt, c->regs[VCPU_REGS_RCX], &msr_data)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; @@ -3603,7 +4227,7 @@ twobyte_insn: c->dst.val = test_cc(c->b, ctxt->eflags); break; case 0xa0: /* push fs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); break; case 0xa1: /* pop fs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); @@ -3620,7 +4244,7 @@ twobyte_insn: emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); break; case 0xa8: /* push gs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); break; case 0xa9: /* pop gs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); @@ -3727,7 +4351,7 @@ twobyte_insn: (u64) c->src.val; break; case 0xc7: /* Grp9 (cmpxchg8b) */ - rc = emulate_grp9(ctxt, ops); + rc = em_grp9(ctxt); break; default: goto cannot_emulate; @@ -3739,5 +4363,5 @@ twobyte_insn: goto writeback; cannot_emulate: - return -1; + return EMULATION_FAILED; } diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 46d08ca0b48..51a97426e79 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -33,7 +33,6 @@ struct kvm_kpit_state { }; struct kvm_pit { - unsigned long base_addresss; struct kvm_io_device dev; struct kvm_io_device speaker_dev; struct kvm *kvm; @@ -51,7 +50,6 @@ struct kvm_pit { #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 #define KVM_PIT_CHANNEL_MASK 0x3 -void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start); struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); void kvm_free_pit(struct kvm *kvm); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ba910d14941..53e2d084bff 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -75,7 +75,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm); void kvm_destroy_pic(struct kvm *kvm); int kvm_pic_read_irq(struct kvm *kvm); void kvm_pic_update_irq(struct kvm_pic *s); -void kvm_pic_clear_isr_ack(struct kvm *kvm); static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) { @@ -100,7 +99,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_timers(struct kvm_vcpu *vcpu); -int pit_has_pending_timer(struct kvm_vcpu *vcpu); int apic_has_pending_timer(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 22fae7593ee..28418054b88 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1206,7 +1206,7 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) static void nonpaging_update_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, - const void *pte, unsigned long mmu_seq) + const void *pte) { WARN_ON(1); } @@ -3163,9 +3163,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, } static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp, - u64 *spte, - const void *new, unsigned long mmu_seq) + struct kvm_mmu_page *sp, u64 *spte, + const void *new) { if (sp->role.level != PT_PAGE_TABLE_LEVEL) { ++vcpu->kvm->stat.mmu_pde_zapped; @@ -3173,7 +3172,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, } ++vcpu->kvm->stat.mmu_pte_updated; - vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq); + vcpu->arch.mmu.update_pte(vcpu, sp, spte, new); } static bool need_remote_flush(u64 old, u64 new) @@ -3229,7 +3228,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_mmu_page *sp; struct hlist_node *node; LIST_HEAD(invalid_list); - unsigned long mmu_seq; u64 entry, gentry, *spte; unsigned pte_size, page_offset, misaligned, quadrant, offset; int level, npte, invlpg_counter, r, flooded = 0; @@ -3271,9 +3269,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, break; } - mmu_seq = vcpu->kvm->mmu_notifier_seq; - smp_rmb(); - spin_lock(&vcpu->kvm->mmu_lock); if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) gentry = 0; @@ -3345,8 +3340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (gentry && !((sp->role.word ^ vcpu->arch.mmu.base_role.word) & mask.word)) - mmu_pte_write_new_pte(vcpu, sp, spte, &gentry, - mmu_seq); + mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); if (!remote_flush && need_remote_flush(entry, *spte)) remote_flush = true; ++spte; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index c6397795d86..6c4dc010c4c 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -78,15 +78,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; } -static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, - gfn_t table_gfn, unsigned index, - pt_element_t orig_pte, pt_element_t new_pte) +static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + pt_element_t __user *ptep_user, unsigned index, + pt_element_t orig_pte, pt_element_t new_pte) { + int npages; pt_element_t ret; pt_element_t *table; struct page *page; - page = gfn_to_page(kvm, table_gfn); + npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); + /* Check if the user is doing something meaningless. */ + if (unlikely(npages != 1)) + return -EFAULT; table = kmap_atomic(page, KM_USER0); ret = CMPXCHG(&table[index], orig_pte, new_pte); @@ -117,6 +121,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, gva_t addr, u32 access) { pt_element_t pte; + pt_element_t __user *ptep_user; gfn_t table_gfn; unsigned index, pt_access, uninitialized_var(pte_access); gpa_t pte_gpa; @@ -152,6 +157,9 @@ walk: pt_access = ACC_ALL; for (;;) { + gfn_t real_gfn; + unsigned long host_addr; + index = PT_INDEX(addr, walker->level); table_gfn = gpte_to_gfn(pte); @@ -160,43 +168,64 @@ walk: walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte, - offset, sizeof(pte), - PFERR_USER_MASK|PFERR_WRITE_MASK)) { + real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), + PFERR_USER_MASK|PFERR_WRITE_MASK); + if (unlikely(real_gfn == UNMAPPED_GVA)) { + present = false; + break; + } + real_gfn = gpa_to_gfn(real_gfn); + + host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + if (unlikely(kvm_is_error_hva(host_addr))) { + present = false; + break; + } + + ptep_user = (pt_element_t __user *)((void *)host_addr + offset); + if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) { present = false; break; } trace_kvm_mmu_paging_element(pte, walker->level); - if (!is_present_gpte(pte)) { + if (unlikely(!is_present_gpte(pte))) { present = false; break; } - if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) { + if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, + walker->level))) { rsvd_fault = true; break; } - if (write_fault && !is_writable_pte(pte)) - if (user_fault || is_write_protection(vcpu)) - eperm = true; + if (unlikely(write_fault && !is_writable_pte(pte) + && (user_fault || is_write_protection(vcpu)))) + eperm = true; - if (user_fault && !(pte & PT_USER_MASK)) + if (unlikely(user_fault && !(pte & PT_USER_MASK))) eperm = true; #if PTTYPE == 64 - if (fetch_fault && (pte & PT64_NX_MASK)) + if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) eperm = true; #endif - if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { + if (!eperm && !rsvd_fault + && unlikely(!(pte & PT_ACCESSED_MASK))) { + int ret; trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); - if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, - index, pte, pte|PT_ACCESSED_MASK)) + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, + pte, pte|PT_ACCESSED_MASK); + if (unlikely(ret < 0)) { + present = false; + break; + } else if (ret) goto walk; + mark_page_dirty(vcpu->kvm, table_gfn); pte |= PT_ACCESSED_MASK; } @@ -241,17 +270,21 @@ walk: --walker->level; } - if (!present || eperm || rsvd_fault) + if (unlikely(!present || eperm || rsvd_fault)) goto error; - if (write_fault && !is_dirty_gpte(pte)) { - bool ret; + if (write_fault && unlikely(!is_dirty_gpte(pte))) { + int ret; trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, - pte|PT_DIRTY_MASK); - if (ret) + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, + pte, pte|PT_DIRTY_MASK); + if (unlikely(ret < 0)) { + present = false; + goto error; + } else if (ret) goto walk; + mark_page_dirty(vcpu->kvm, table_gfn); pte |= PT_DIRTY_MASK; walker->ptes[walker->level - 1] = pte; @@ -325,7 +358,7 @@ no_present: } static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *spte, const void *pte, unsigned long mmu_seq) + u64 *spte, const void *pte) { pt_element_t gpte; unsigned pte_access; @@ -342,8 +375,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, kvm_release_pfn_clean(pfn); return; } - if (mmu_notifier_retry(vcpu, mmu_seq)) - return; /* * we call mmu_set_spte() with host_writable = true because that diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6bb15d583e4..506e4fe23ad 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -63,6 +63,10 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +#define TSC_RATIO_RSVD 0xffffff0000000000ULL +#define TSC_RATIO_MIN 0x0000000000000001ULL +#define TSC_RATIO_MAX 0x000000ffffffffffULL + static bool erratum_383_found __read_mostly; static const u32 host_save_user_msrs[] = { @@ -93,14 +97,6 @@ struct nested_state { /* A VMEXIT is required but not yet emulated */ bool exit_required; - /* - * If we vmexit during an instruction emulation we need this to restore - * the l1 guest rip after the emulation - */ - unsigned long vmexit_rip; - unsigned long vmexit_rsp; - unsigned long vmexit_rax; - /* cache for intercepts of the guest */ u32 intercept_cr; u32 intercept_dr; @@ -144,8 +140,13 @@ struct vcpu_svm { unsigned int3_injected; unsigned long int3_rip; u32 apf_reason; + + u64 tsc_ratio; }; +static DEFINE_PER_CPU(u64, current_tsc_ratio); +#define TSC_RATIO_DEFAULT 0x0100000000ULL + #define MSR_INVALID 0xffffffffU static struct svm_direct_access_msrs { @@ -190,6 +191,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm); static int nested_svm_vmexit(struct vcpu_svm *svm); static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); +static u64 __scale_tsc(u64 ratio, u64 tsc); enum { VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, @@ -376,7 +378,6 @@ struct svm_cpu_data { }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); -static uint32_t svm_features; struct svm_init_data { int cpu; @@ -569,6 +570,10 @@ static int has_svm(void) static void svm_hardware_disable(void *garbage) { + /* Make sure we clean up behind us */ + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); + cpu_svm_disable(); } @@ -610,6 +615,11 @@ static int svm_hardware_enable(void *garbage) wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); + __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; + } + svm_init_erratum_383(); return 0; @@ -791,6 +801,23 @@ static __init int svm_hardware_setup(void) if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) kvm_enable_efer_bits(EFER_FFXSR); + if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { + u64 max; + + kvm_has_tsc_control = true; + + /* + * Make sure the user can only configure tsc_khz values that + * fit into a signed integer. + * A min value is not calculated needed because it will always + * be 1 on all machines and a value of 0 is used to disable + * tsc-scaling for the vcpu. + */ + max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); + + kvm_max_guest_tsc_khz = max; + } + if (nested) { printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); @@ -802,8 +829,6 @@ static __init int svm_hardware_setup(void) goto err; } - svm_features = cpuid_edx(SVM_CPUID_FUNC); - if (!boot_cpu_has(X86_FEATURE_NPT)) npt_enabled = false; @@ -854,6 +879,64 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } +static u64 __scale_tsc(u64 ratio, u64 tsc) +{ + u64 mult, frac, _tsc; + + mult = ratio >> 32; + frac = ratio & ((1ULL << 32) - 1); + + _tsc = tsc; + _tsc *= mult; + _tsc += (tsc >> 32) * frac; + _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; + + return _tsc; +} + +static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 _tsc = tsc; + + if (svm->tsc_ratio != TSC_RATIO_DEFAULT) + _tsc = __scale_tsc(svm->tsc_ratio, tsc); + + return _tsc; +} + +static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 ratio; + u64 khz; + + /* TSC scaling supported? */ + if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) + return; + + /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ + if (user_tsc_khz == 0) { + vcpu->arch.virtual_tsc_khz = 0; + svm->tsc_ratio = TSC_RATIO_DEFAULT; + return; + } + + khz = user_tsc_khz; + + /* TSC scaling required - calculate ratio */ + ratio = khz << 32; + do_div(ratio, tsc_khz); + + if (ratio == 0 || ratio & TSC_RATIO_RSVD) { + WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); + return; + } + vcpu->arch.virtual_tsc_khz = user_tsc_khz; + svm->tsc_ratio = ratio; +} + static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); @@ -880,6 +963,15 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } +static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + u64 tsc; + + tsc = svm_scale_tsc(vcpu, native_read_tsc()); + + return target_tsc - tsc; +} + static void init_vmcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -975,7 +1067,7 @@ static void init_vmcb(struct vcpu_svm *svm) svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; save->dr7 = 0x400; - save->rflags = 2; + kvm_set_rflags(&svm->vcpu, 2); save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; @@ -1048,6 +1140,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto out; } + svm->tsc_ratio = TSC_RATIO_DEFAULT; + err = kvm_vcpu_init(&svm->vcpu, kvm, id); if (err) goto free_svm; @@ -1141,6 +1235,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); + + if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && + svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) { + __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio; + wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); + } } static void svm_vcpu_put(struct kvm_vcpu *vcpu) @@ -1365,31 +1465,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); - if (is_guest_mode(vcpu)) { - /* - * We are here because we run in nested mode, the host kvm - * intercepts cr0 writes but the l1 hypervisor does not. - * But the L1 hypervisor may intercept selective cr0 writes. - * This needs to be checked here. - */ - unsigned long old, new; - - /* Remove bits that would trigger a real cr0 write intercept */ - old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; - new = cr0 & SVM_CR0_SELECTIVE_MASK; - - if (old == new) { - /* cr0 write with ts and mp unchanged */ - svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; - if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) { - svm->nested.vmexit_rip = kvm_rip_read(vcpu); - svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); - svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); - return; - } - } - } - #ifdef CONFIG_X86_64 if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { @@ -2127,7 +2202,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); nested_vmcb->save.cr2 = vmcb->save.cr2; nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; - nested_vmcb->save.rflags = vmcb->save.rflags; + nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); nested_vmcb->save.rip = vmcb->save.rip; nested_vmcb->save.rsp = vmcb->save.rsp; nested_vmcb->save.rax = vmcb->save.rax; @@ -2184,7 +2259,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.ds = hsave->save.ds; svm->vmcb->save.gdtr = hsave->save.gdtr; svm->vmcb->save.idtr = hsave->save.idtr; - svm->vmcb->save.rflags = hsave->save.rflags; + kvm_set_rflags(&svm->vcpu, hsave->save.rflags); svm_set_efer(&svm->vcpu, hsave->save.efer); svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); svm_set_cr4(&svm->vcpu, hsave->save.cr4); @@ -2312,7 +2387,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) hsave->save.efer = svm->vcpu.arch.efer; hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); hsave->save.cr4 = svm->vcpu.arch.cr4; - hsave->save.rflags = vmcb->save.rflags; + hsave->save.rflags = kvm_get_rflags(&svm->vcpu); hsave->save.rip = kvm_rip_read(&svm->vcpu); hsave->save.rsp = vmcb->save.rsp; hsave->save.rax = vmcb->save.rax; @@ -2323,7 +2398,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) copy_vmcb_control_area(hsave, vmcb); - if (svm->vmcb->save.rflags & X86_EFLAGS_IF) + if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) svm->vcpu.arch.hflags |= HF_HIF_MASK; else svm->vcpu.arch.hflags &= ~HF_HIF_MASK; @@ -2341,7 +2416,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->vmcb->save.ds = nested_vmcb->save.ds; svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; svm->vmcb->save.idtr = nested_vmcb->save.idtr; - svm->vmcb->save.rflags = nested_vmcb->save.rflags; + kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); @@ -2443,13 +2518,13 @@ static int vmload_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + nested_svm_vmloadsave(nested_vmcb, svm->vmcb); nested_svm_unmap(page); @@ -2464,13 +2539,13 @@ static int vmsave_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + nested_svm_vmloadsave(svm->vmcb, nested_vmcb); nested_svm_unmap(page); @@ -2676,6 +2751,29 @@ static int emulate_on_interception(struct vcpu_svm *svm) return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } +bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) +{ + unsigned long cr0 = svm->vcpu.arch.cr0; + bool ret = false; + u64 intercept; + + intercept = svm->nested.intercept; + + if (!is_guest_mode(&svm->vcpu) || + (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) + return false; + + cr0 &= ~SVM_CR0_SELECTIVE_MASK; + val &= ~SVM_CR0_SELECTIVE_MASK; + + if (cr0 ^ val) { + svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; + ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); + } + + return ret; +} + #define CR_VALID (1ULL << 63) static int cr_interception(struct vcpu_svm *svm) @@ -2699,7 +2797,11 @@ static int cr_interception(struct vcpu_svm *svm) val = kvm_register_read(&svm->vcpu, reg); switch (cr) { case 0: - err = kvm_set_cr0(&svm->vcpu, val); + if (!check_selective_cr0_intercepted(svm, val)) + err = kvm_set_cr0(&svm->vcpu, val); + else + return 1; + break; case 3: err = kvm_set_cr3(&svm->vcpu, val); @@ -2744,23 +2846,6 @@ static int cr_interception(struct vcpu_svm *svm) return 1; } -static int cr0_write_interception(struct vcpu_svm *svm) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - int r; - - r = cr_interception(svm); - - if (svm->nested.vmexit_rip) { - kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); - kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp); - kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax); - svm->nested.vmexit_rip = 0; - } - - return r; -} - static int dr_interception(struct vcpu_svm *svm) { int reg, dr; @@ -2813,7 +2898,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) case MSR_IA32_TSC: { struct vmcb *vmcb = get_host_vmcb(svm); - *data = vmcb->control.tsc_offset + native_read_tsc(); + *data = vmcb->control.tsc_offset + + svm_scale_tsc(vcpu, native_read_tsc()); + break; } case MSR_STAR: @@ -3048,7 +3135,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR4] = cr_interception, [SVM_EXIT_READ_CR8] = cr_interception, [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, - [SVM_EXIT_WRITE_CR0] = cr0_write_interception, + [SVM_EXIT_WRITE_CR0] = cr_interception, [SVM_EXIT_WRITE_CR3] = cr_interception, [SVM_EXIT_WRITE_CR4] = cr_interception, [SVM_EXIT_WRITE_CR8] = cr8_write_interception, @@ -3104,97 +3191,109 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_NPF] = pf_interception, }; -void dump_vmcb(struct kvm_vcpu *vcpu) +static void dump_vmcb(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; pr_err("VMCB Control Area:\n"); - pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff); - pr_err("cr_write: %04x\n", control->intercept_cr >> 16); - pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff); - pr_err("dr_write: %04x\n", control->intercept_dr >> 16); - pr_err("exceptions: %08x\n", control->intercept_exceptions); - pr_err("intercepts: %016llx\n", control->intercept); - pr_err("pause filter count: %d\n", control->pause_filter_count); - pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); - pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); - pr_err("tsc_offset: %016llx\n", control->tsc_offset); - pr_err("asid: %d\n", control->asid); - pr_err("tlb_ctl: %d\n", control->tlb_ctl); - pr_err("int_ctl: %08x\n", control->int_ctl); - pr_err("int_vector: %08x\n", control->int_vector); - pr_err("int_state: %08x\n", control->int_state); - pr_err("exit_code: %08x\n", control->exit_code); - pr_err("exit_info1: %016llx\n", control->exit_info_1); - pr_err("exit_info2: %016llx\n", control->exit_info_2); - pr_err("exit_int_info: %08x\n", control->exit_int_info); - pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); - pr_err("nested_ctl: %lld\n", control->nested_ctl); - pr_err("nested_cr3: %016llx\n", control->nested_cr3); - pr_err("event_inj: %08x\n", control->event_inj); - pr_err("event_inj_err: %08x\n", control->event_inj_err); - pr_err("lbr_ctl: %lld\n", control->lbr_ctl); - pr_err("next_rip: %016llx\n", control->next_rip); + pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); + pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); + pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); + pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); + pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); + pr_err("%-20s%016llx\n", "intercepts:", control->intercept); + pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); + pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); + pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); + pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); + pr_err("%-20s%d\n", "asid:", control->asid); + pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl); + pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); + pr_err("%-20s%08x\n", "int_vector:", control->int_vector); + pr_err("%-20s%08x\n", "int_state:", control->int_state); + pr_err("%-20s%08x\n", "exit_code:", control->exit_code); + pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); + pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); + pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); + pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); + pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); + pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); + pr_err("%-20s%08x\n", "event_inj:", control->event_inj); + pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); + pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); + pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); pr_err("VMCB State Save Area:\n"); - pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", - save->es.selector, save->es.attrib, - save->es.limit, save->es.base); - pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", - save->cs.selector, save->cs.attrib, - save->cs.limit, save->cs.base); - pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", - save->ss.selector, save->ss.attrib, - save->ss.limit, save->ss.base); - pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", - save->ds.selector, save->ds.attrib, - save->ds.limit, save->ds.base); - pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", - save->fs.selector, save->fs.attrib, - save->fs.limit, save->fs.base); - pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", - save->gs.selector, save->gs.attrib, - save->gs.limit, save->gs.base); - pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->gdtr.selector, save->gdtr.attrib, - save->gdtr.limit, save->gdtr.base); - pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->ldtr.selector, save->ldtr.attrib, - save->ldtr.limit, save->ldtr.base); - pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->idtr.selector, save->idtr.attrib, - save->idtr.limit, save->idtr.base); - pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", - save->tr.selector, save->tr.attrib, - save->tr.limit, save->tr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "es:", + save->es.selector, save->es.attrib, + save->es.limit, save->es.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "cs:", + save->cs.selector, save->cs.attrib, + save->cs.limit, save->cs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ss:", + save->ss.selector, save->ss.attrib, + save->ss.limit, save->ss.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ds:", + save->ds.selector, save->ds.attrib, + save->ds.limit, save->ds.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "fs:", + save->fs.selector, save->fs.attrib, + save->fs.limit, save->fs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "gs:", + save->gs.selector, save->gs.attrib, + save->gs.limit, save->gs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "gdtr:", + save->gdtr.selector, save->gdtr.attrib, + save->gdtr.limit, save->gdtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ldtr:", + save->ldtr.selector, save->ldtr.attrib, + save->ldtr.limit, save->ldtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "idtr:", + save->idtr.selector, save->idtr.attrib, + save->idtr.limit, save->idtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "tr:", + save->tr.selector, save->tr.attrib, + save->tr.limit, save->tr.base); pr_err("cpl: %d efer: %016llx\n", save->cpl, save->efer); - pr_err("cr0: %016llx cr2: %016llx\n", - save->cr0, save->cr2); - pr_err("cr3: %016llx cr4: %016llx\n", - save->cr3, save->cr4); - pr_err("dr6: %016llx dr7: %016llx\n", - save->dr6, save->dr7); - pr_err("rip: %016llx rflags: %016llx\n", - save->rip, save->rflags); - pr_err("rsp: %016llx rax: %016llx\n", - save->rsp, save->rax); - pr_err("star: %016llx lstar: %016llx\n", - save->star, save->lstar); - pr_err("cstar: %016llx sfmask: %016llx\n", - save->cstar, save->sfmask); - pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", - save->kernel_gs_base, save->sysenter_cs); - pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", - save->sysenter_esp, save->sysenter_eip); - pr_err("gpat: %016llx dbgctl: %016llx\n", - save->g_pat, save->dbgctl); - pr_err("br_from: %016llx br_to: %016llx\n", - save->br_from, save->br_to); - pr_err("excp_from: %016llx excp_to: %016llx\n", - save->last_excp_from, save->last_excp_to); - + pr_err("%-15s %016llx %-13s %016llx\n", + "cr0:", save->cr0, "cr2:", save->cr2); + pr_err("%-15s %016llx %-13s %016llx\n", + "cr3:", save->cr3, "cr4:", save->cr4); + pr_err("%-15s %016llx %-13s %016llx\n", + "dr6:", save->dr6, "dr7:", save->dr7); + pr_err("%-15s %016llx %-13s %016llx\n", + "rip:", save->rip, "rflags:", save->rflags); + pr_err("%-15s %016llx %-13s %016llx\n", + "rsp:", save->rsp, "rax:", save->rax); + pr_err("%-15s %016llx %-13s %016llx\n", + "star:", save->star, "lstar:", save->lstar); + pr_err("%-15s %016llx %-13s %016llx\n", + "cstar:", save->cstar, "sfmask:", save->sfmask); + pr_err("%-15s %016llx %-13s %016llx\n", + "kernel_gs_base:", save->kernel_gs_base, + "sysenter_cs:", save->sysenter_cs); + pr_err("%-15s %016llx %-13s %016llx\n", + "sysenter_esp:", save->sysenter_esp, + "sysenter_eip:", save->sysenter_eip); + pr_err("%-15s %016llx %-13s %016llx\n", + "gpat:", save->g_pat, "dbgctl:", save->dbgctl); + pr_err("%-15s %016llx %-13s %016llx\n", + "br_from:", save->br_from, "br_to:", save->br_to); + pr_err("%-15s %016llx %-13s %016llx\n", + "excp_from:", save->last_excp_from, + "excp_to:", save->last_excp_to); } static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) @@ -3384,7 +3483,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) return 0; - ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); + ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); if (is_guest_mode(vcpu)) return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); @@ -3871,6 +3970,186 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) update_cr0_intercept(svm); } +#define PRE_EX(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_PRE_EXCEPT, } +#define POST_EX(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_POST_EXCEPT, } +#define POST_MEM(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_POST_MEMACCESS, } + +static struct __x86_intercept { + u32 exit_code; + enum x86_intercept_stage stage; +} x86_intercept_map[] = { + [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), + [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), + [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), + [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), + [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ), + [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ), + [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE), + [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE), + [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ), + [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), + [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), + [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), + [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN), + [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL), + [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD), + [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE), + [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI), + [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), + [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), + [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), + [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), + [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), + [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), + [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG), + [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD), + [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD), + [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR), + [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC), + [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR), + [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), + [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), + [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), + [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE), + [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF), + [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF), + [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT), + [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), + [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), + [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), + [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO), +}; + +#undef PRE_EX +#undef POST_EX +#undef POST_MEM + +static int svm_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + struct vcpu_svm *svm = to_svm(vcpu); + int vmexit, ret = X86EMUL_CONTINUE; + struct __x86_intercept icpt_info; + struct vmcb *vmcb = svm->vmcb; + + if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) + goto out; + + icpt_info = x86_intercept_map[info->intercept]; + + if (stage != icpt_info.stage) + goto out; + + switch (icpt_info.exit_code) { + case SVM_EXIT_READ_CR0: + if (info->intercept == x86_intercept_cr_read) + icpt_info.exit_code += info->modrm_reg; + break; + case SVM_EXIT_WRITE_CR0: { + unsigned long cr0, val; + u64 intercept; + + if (info->intercept == x86_intercept_cr_write) + icpt_info.exit_code += info->modrm_reg; + + if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) + break; + + intercept = svm->nested.intercept; + + if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) + break; + + cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; + val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; + + if (info->intercept == x86_intercept_lmsw) { + cr0 &= 0xfUL; + val &= 0xfUL; + /* lmsw can't clear PE - catch this here */ + if (cr0 & X86_CR0_PE) + val |= X86_CR0_PE; + } + + if (cr0 ^ val) + icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; + + break; + } + case SVM_EXIT_READ_DR0: + case SVM_EXIT_WRITE_DR0: + icpt_info.exit_code += info->modrm_reg; + break; + case SVM_EXIT_MSR: + if (info->intercept == x86_intercept_wrmsr) + vmcb->control.exit_info_1 = 1; + else + vmcb->control.exit_info_1 = 0; + break; + case SVM_EXIT_PAUSE: + /* + * We get this for NOP only, but pause + * is rep not, check this here + */ + if (info->rep_prefix != REPE_PREFIX) + goto out; + case SVM_EXIT_IOIO: { + u64 exit_info; + u32 bytes; + + exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16; + + if (info->intercept == x86_intercept_in || + info->intercept == x86_intercept_ins) { + exit_info |= SVM_IOIO_TYPE_MASK; + bytes = info->src_bytes; + } else { + bytes = info->dst_bytes; + } + + if (info->intercept == x86_intercept_outs || + info->intercept == x86_intercept_ins) + exit_info |= SVM_IOIO_STR_MASK; + + if (info->rep_prefix) + exit_info |= SVM_IOIO_REP_MASK; + + bytes = min(bytes, 4u); + + exit_info |= bytes << SVM_IOIO_SIZE_SHIFT; + + exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1); + + vmcb->control.exit_info_1 = exit_info; + vmcb->control.exit_info_2 = info->next_rip; + + break; + } + default: + break; + } + + vmcb->control.next_rip = info->next_rip; + vmcb->control.exit_code = icpt_info.exit_code; + vmexit = nested_svm_exit_handled(svm); + + ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED + : X86EMUL_CONTINUE; + +out: + return ret; +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -3952,10 +4231,14 @@ static struct kvm_x86_ops svm_x86_ops = { .has_wbinvd_exit = svm_has_wbinvd_exit, + .set_tsc_khz = svm_set_tsc_khz, .write_tsc_offset = svm_write_tsc_offset, .adjust_tsc_offset = svm_adjust_tsc_offset, + .compute_tsc_offset = svm_compute_tsc_offset, .set_tdp_cr3 = set_tdp_cr3, + + .check_intercept = svm_check_intercept, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5b4cdcbd154..4c3fa0f6746 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -128,8 +128,11 @@ struct vcpu_vmx { unsigned long host_rsp; int launched; u8 fail; + u8 cpl; + bool nmi_known_unmasked; u32 exit_intr_info; u32 idt_vectoring_info; + ulong rflags; struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; @@ -159,6 +162,10 @@ struct vcpu_vmx { u32 ar; } tr, es, ds, fs, gs; } rmode; + struct { + u32 bitmask; /* 4 bits per segment (1 bit per field) */ + struct kvm_save_segment seg[8]; + } segment_cache; int vpid; bool emulation_required; @@ -171,6 +178,15 @@ struct vcpu_vmx { bool rdtscp_enabled; }; +enum segment_cache_field { + SEG_FIELD_SEL = 0, + SEG_FIELD_BASE = 1, + SEG_FIELD_LIMIT = 2, + SEG_FIELD_AR = 3, + + SEG_FIELD_NR = 4 +}; + static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) { return container_of(vcpu, struct vcpu_vmx, vcpu); @@ -643,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask) vmcs_writel(field, vmcs_readl(field) | mask); } +static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +{ + vmx->segment_cache.bitmask = 0; +} + +static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, + unsigned field) +{ + bool ret; + u32 mask = 1 << (seg * SEG_FIELD_NR + field); + + if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) { + vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS); + vmx->segment_cache.bitmask = 0; + } + ret = vmx->segment_cache.bitmask & mask; + vmx->segment_cache.bitmask |= mask; + return ret; +} + +static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) +{ + u16 *p = &vmx->segment_cache.seg[seg].selector; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) + *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); + return *p; +} + +static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) +{ + ulong *p = &vmx->segment_cache.seg[seg].base; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) + *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); + return *p; +} + +static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) +{ + u32 *p = &vmx->segment_cache.seg[seg].limit; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) + *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); + return *p; +} + +static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) +{ + u32 *p = &vmx->segment_cache.seg[seg].ar; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) + *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); + return *p; +} + static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; @@ -970,17 +1042,24 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { unsigned long rflags, save_rflags; - rflags = vmcs_readl(GUEST_RFLAGS); - if (to_vmx(vcpu)->rmode.vm86_active) { - rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; - save_rflags = to_vmx(vcpu)->rmode.save_rflags; - rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) { + __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); + rflags = vmcs_readl(GUEST_RFLAGS); + if (to_vmx(vcpu)->rmode.vm86_active) { + rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; + save_rflags = to_vmx(vcpu)->rmode.save_rflags; + rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + } + to_vmx(vcpu)->rflags = rflags; } - return rflags; + return to_vmx(vcpu)->rflags; } static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { + __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); + to_vmx(vcpu)->rflags = rflags; if (to_vmx(vcpu)->rmode.vm86_active) { to_vmx(vcpu)->rmode.save_rflags = rflags; rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; @@ -1053,7 +1132,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, } if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE) + int inc_eip = 0; + if (kvm_exception_is_soft(nr)) + inc_eip = vcpu->arch.event_exit_inst_len; + if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } @@ -1151,6 +1233,16 @@ static u64 guest_read_tsc(void) } /* + * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ + * ioctl. In this case the call-back should update internal vmx state to make + * the changes effective. + */ +static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) +{ + /* Nothing to do here */ +} + +/* * writes 'offset' into guest's timestamp counter offset register */ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) @@ -1164,6 +1256,11 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) vmcs_write64(TSC_OFFSET, offset + adjustment); } +static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + return target_tsc - native_read_tsc(); +} + /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -1243,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) break; #ifdef CONFIG_X86_64 case MSR_FS_BASE: + vmx_segment_cache_clear(vmx); vmcs_writel(GUEST_FS_BASE, data); break; case MSR_GS_BASE: + vmx_segment_cache_clear(vmx); vmcs_writel(GUEST_GS_BASE, data); break; case MSR_KERNEL_GS_BASE: @@ -1689,6 +1788,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx->emulation_required = 1; vmx->rmode.vm86_active = 0; + vmx_segment_cache_clear(vmx); + vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); @@ -1712,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); + vmx_segment_cache_clear(vmx); + vmcs_write16(GUEST_SS_SELECTOR, 0); vmcs_write32(GUEST_SS_AR_BYTES, 0x93); @@ -1775,6 +1878,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); } + vmx_segment_cache_clear(vmx); + vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); @@ -1851,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu) { u32 guest_tr_ar; + vmx_segment_cache_clear(to_vmx(vcpu)); + guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { printk(KERN_DEBUG "%s: tss fixup for long mode. \n", @@ -1998,6 +2105,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); } static u64 construct_eptp(unsigned long root_hpa) @@ -2053,7 +2161,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; struct kvm_save_segment *save; u32 ar; @@ -2075,13 +2182,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->limit = save->limit; ar = save->ar; if (seg == VCPU_SREG_TR - || var->selector == vmcs_read16(sf->selector)) + || var->selector == vmx_read_guest_seg_selector(vmx, seg)) goto use_saved_rmode_seg; } - var->base = vmcs_readl(sf->base); - var->limit = vmcs_read32(sf->limit); - var->selector = vmcs_read16(sf->selector); - ar = vmcs_read32(sf->ar_bytes); + var->base = vmx_read_guest_seg_base(vmx, seg); + var->limit = vmx_read_guest_seg_limit(vmx, seg); + var->selector = vmx_read_guest_seg_selector(vmx, seg); + ar = vmx_read_guest_seg_ar(vmx, seg); use_saved_rmode_seg: if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) ar = 0; @@ -2098,27 +2205,37 @@ use_saved_rmode_seg: static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) { - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; struct kvm_segment s; if (to_vmx(vcpu)->rmode.vm86_active) { vmx_get_segment(vcpu, &s, seg); return s.base; } - return vmcs_readl(sf->base); + return vmx_read_guest_seg_base(to_vmx(vcpu), seg); } -static int vmx_get_cpl(struct kvm_vcpu *vcpu) +static int __vmx_get_cpl(struct kvm_vcpu *vcpu) { if (!is_protmode(vcpu)) return 0; - if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ + if (!is_long_mode(vcpu) + && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ return 3; - return vmcs_read16(GUEST_CS_SELECTOR) & 3; + return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; } +static int vmx_get_cpl(struct kvm_vcpu *vcpu) +{ + if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { + __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); + to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu); + } + return to_vmx(vcpu)->cpl; +} + + static u32 vmx_segment_access_rights(struct kvm_segment *var) { u32 ar; @@ -2148,6 +2265,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; u32 ar; + vmx_segment_cache_clear(vmx); + if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { vmcs_write16(sf->selector, var->selector); vmx->rmode.tr.selector = var->selector; @@ -2184,11 +2303,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, ar |= 0x1; /* Accessed */ vmcs_write32(sf->ar_bytes, ar); + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) { - u32 ar = vmcs_read32(GUEST_CS_AR_BYTES); + u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); *db = (ar >> 14) & 1; *l = (ar >> 13) & 1; @@ -2775,6 +2895,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) if (ret != 0) goto out; + vmx_segment_cache_clear(vmx); + seg_setup(VCPU_SREG_CS); /* * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode @@ -2904,7 +3026,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) ++vcpu->stat.irq_injections; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE) + int inc_eip = 0; + if (vcpu->arch.interrupt.soft) + inc_eip = vcpu->arch.event_exit_inst_len; + if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } @@ -2937,8 +3062,9 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) } ++vcpu->stat.nmi_injections; + vmx->nmi_known_unmasked = false; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE) + if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } @@ -2961,6 +3087,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) { if (!cpu_has_virtual_nmis()) return to_vmx(vcpu)->soft_vnmi_blocked; + if (to_vmx(vcpu)->nmi_known_unmasked) + return false; return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; } @@ -2974,6 +3102,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) vmx->vnmi_blocked_time = 0; } } else { + vmx->nmi_known_unmasked = !masked; if (masked) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); @@ -3091,7 +3220,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) enum emulation_result er; vect_info = vmx->idt_vectoring_info; - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + intr_info = vmx->exit_intr_info; if (is_machine_check(intr_info)) return handle_machine_check(vcpu); @@ -3122,7 +3251,6 @@ static int handle_exception(struct kvm_vcpu *vcpu) } error_code = 0; - rip = kvm_rip_read(vcpu); if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { @@ -3169,6 +3297,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) vmx->vcpu.arch.event_exit_inst_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); kvm_run->exit_reason = KVM_EXIT_DEBUG; + rip = kvm_rip_read(vcpu); kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; kvm_run->debug.arch.exception = ex_no; break; @@ -3505,9 +3634,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) switch (type) { case INTR_TYPE_NMI_INTR: vcpu->arch.nmi_injected = false; - if (cpu_has_virtual_nmis()) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); + vmx_set_nmi_mask(vcpu, true); break; case INTR_TYPE_EXT_INTR: case INTR_TYPE_SOFT_INTR: @@ -3867,12 +3994,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { - u32 exit_intr_info = vmx->exit_intr_info; + u32 exit_intr_info; + + if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY + || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)) + return; + + vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + exit_intr_info = vmx->exit_intr_info; /* Handle machine checks before interrupts are enabled */ - if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) - || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI - && is_machine_check(exit_intr_info))) + if (is_machine_check(exit_intr_info)) kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ @@ -3886,7 +4018,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { - u32 exit_intr_info = vmx->exit_intr_info; + u32 exit_intr_info; bool unblock_nmi; u8 vector; bool idtv_info_valid; @@ -3894,6 +4026,13 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; if (cpu_has_virtual_nmis()) { + if (vmx->nmi_known_unmasked) + return; + /* + * Can't use vmx->exit_intr_info since we're not sure what + * the exit reason is. + */ + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; vector = exit_intr_info & INTR_INFO_VECTOR_MASK; /* @@ -3910,6 +4049,10 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) vector != DF_VECTOR && !idtv_info_valid) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + else + vmx->nmi_known_unmasked = + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) + & GUEST_INTR_STATE_NMI); } else if (unlikely(vmx->soft_vnmi_blocked)) vmx->vnmi_blocked_time += ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); @@ -3946,8 +4089,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, * Clear bit "block by NMI" before VM entry if a NMI * delivery faulted. */ - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); + vmx_set_nmi_mask(&vmx->vcpu, false); break; case INTR_TYPE_SOFT_EXCEPTION: vmx->vcpu.arch.event_exit_inst_len = @@ -4124,7 +4266,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ); vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) + | (1 << VCPU_EXREG_RFLAGS) + | (1 << VCPU_EXREG_CPL) | (1 << VCPU_EXREG_PDPTR) + | (1 << VCPU_EXREG_SEGMENTS) | (1 << VCPU_EXREG_CR3)); vcpu->arch.regs_dirty = 0; @@ -4134,7 +4279,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->launched = 1; vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); vmx_complete_atomic_exit(vmx); vmx_recover_nmi_blocking(vmx); @@ -4195,8 +4339,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vcpu; vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + err = -ENOMEM; if (!vmx->guest_msrs) { - err = -ENOMEM; goto uninit_vcpu; } @@ -4215,7 +4359,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (err) goto free_vmcs; if (vm_need_virtualize_apic_accesses(kvm)) - if (alloc_apic_access_page(kvm) != 0) + err = alloc_apic_access_page(kvm); + if (err) goto free_vmcs; if (enable_ept) { @@ -4368,6 +4513,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { } +static int vmx_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + return X86EMUL_CONTINUE; +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -4449,10 +4601,14 @@ static struct kvm_x86_ops vmx_x86_ops = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, + .set_tsc_khz = vmx_set_tsc_khz, .write_tsc_offset = vmx_write_tsc_offset, .adjust_tsc_offset = vmx_adjust_tsc_offset, + .compute_tsc_offset = vmx_compute_tsc_offset, .set_tdp_cr3 = vmx_set_cr3, + + .check_intercept = vmx_check_intercept, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 934b4c6b0bf..77c9d8673dc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -60,22 +60,12 @@ #include <asm/div64.h> #define MAX_IO_MSRS 256 -#define CR0_RESERVED_BITS \ - (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ - | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ - | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) -#define CR4_RESERVED_BITS \ - (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ - | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ - | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXSAVE \ - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) - -#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) - #define KVM_MAX_MCE_BANKS 32 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) +#define emul_to_vcpu(ctxt) \ + container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) + /* EFER defaults: * - enable syscall per default because its emulated by KVM * - enable LME and LMA per default on 64 bit KVM @@ -100,6 +90,11 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); int ignore_msrs = 0; module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); +bool kvm_has_tsc_control; +EXPORT_SYMBOL_GPL(kvm_has_tsc_control); +u32 kvm_max_guest_tsc_khz; +EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); + #define KVM_NR_SHARED_MSRS 16 struct kvm_shared_msrs_global { @@ -157,6 +152,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { u64 __read_mostly host_xcr0; +int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); + static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { int i; @@ -361,8 +358,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) void kvm_inject_nmi(struct kvm_vcpu *vcpu) { - kvm_make_request(KVM_REQ_NMI, vcpu); kvm_make_request(KVM_REQ_EVENT, vcpu); + vcpu->arch.nmi_pending = 1; } EXPORT_SYMBOL_GPL(kvm_inject_nmi); @@ -982,7 +979,15 @@ static inline int kvm_tsc_changes_freq(void) return ret; } -static inline u64 nsec_to_cycles(u64 nsec) +static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.virtual_tsc_khz) + return vcpu->arch.virtual_tsc_khz; + else + return __this_cpu_read(cpu_tsc_khz); +} + +static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) { u64 ret; @@ -990,25 +995,24 @@ static inline u64 nsec_to_cycles(u64 nsec) if (kvm_tsc_changes_freq()) printk_once(KERN_WARNING "kvm: unreliable cycle conversion on adjustable rate TSC\n"); - ret = nsec * __this_cpu_read(cpu_tsc_khz); + ret = nsec * vcpu_tsc_khz(vcpu); do_div(ret, USEC_PER_SEC); return ret; } -static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz) +static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz) { /* Compute a scale to convert nanoseconds in TSC cycles */ kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, - &kvm->arch.virtual_tsc_shift, - &kvm->arch.virtual_tsc_mult); - kvm->arch.virtual_tsc_khz = this_tsc_khz; + &vcpu->arch.tsc_catchup_shift, + &vcpu->arch.tsc_catchup_mult); } static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) { u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, - vcpu->kvm->arch.virtual_tsc_mult, - vcpu->kvm->arch.virtual_tsc_shift); + vcpu->arch.tsc_catchup_mult, + vcpu->arch.tsc_catchup_shift); tsc += vcpu->arch.last_tsc_write; return tsc; } @@ -1021,7 +1025,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) s64 sdiff; raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); - offset = data - native_read_tsc(); + offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); ns = get_kernel_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; sdiff = data - kvm->arch.last_tsc_write; @@ -1037,13 +1041,13 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) * In that case, for a reliable TSC, we can match TSC offsets, * or make a best guest using elapsed value. */ - if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) && + if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) && elapsed < 5ULL * NSEC_PER_SEC) { if (!check_tsc_unstable()) { offset = kvm->arch.last_tsc_offset; pr_debug("kvm: matched tsc offset for %llu\n", data); } else { - u64 delta = nsec_to_cycles(elapsed); + u64 delta = nsec_to_cycles(vcpu, elapsed); offset += delta; pr_debug("kvm: adjusted tsc offset by %llu\n", delta); } @@ -1075,8 +1079,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) local_irq_save(flags); kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); kernel_ns = get_kernel_ns(); - this_tsc_khz = __this_cpu_read(cpu_tsc_khz); - + this_tsc_khz = vcpu_tsc_khz(v); if (unlikely(this_tsc_khz == 0)) { local_irq_restore(flags); kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); @@ -1993,6 +1996,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_X86_ROBUST_SINGLESTEP: case KVM_CAP_XSAVE: case KVM_CAP_ASYNC_PF: + case KVM_CAP_GET_TSC_KHZ: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2019,6 +2023,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_XCRS: r = cpu_has_xsave; break; + case KVM_CAP_TSC_CONTROL: + r = kvm_has_tsc_control; + break; default: r = 0; break; @@ -2120,8 +2127,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops->vcpu_load(vcpu, cpu); if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { /* Make sure TSC doesn't go backwards */ - s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : - native_read_tsc() - vcpu->arch.last_host_tsc; + s64 tsc_delta; + u64 tsc; + + kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc); + tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : + tsc - vcpu->arch.last_guest_tsc; + if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); if (check_tsc_unstable()) { @@ -2139,7 +2151,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); - vcpu->arch.last_host_tsc = native_read_tsc(); + kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); } static int is_efer_nx(void) @@ -2324,6 +2336,12 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + /* cpuid 0xC0000001.edx */ + const u32 kvm_supported_word5_x86_features = + F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | + F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | + F(PMM) | F(PMM_EN); + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); do_cpuid_1_ent(entry, function, index); @@ -2418,6 +2436,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | (1 << KVM_FEATURE_NOP_IO_DELAY) | (1 << KVM_FEATURE_CLOCKSOURCE2) | + (1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); entry->ebx = 0; entry->ecx = 0; @@ -2432,6 +2451,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= kvm_supported_word6_x86_features; cpuid_mask(&entry->ecx, 6); break; + /*Add support for Centaur's CPUID instruction*/ + case 0xC0000000: + /*Just support up to 0xC0000004 now*/ + entry->eax = min(entry->eax, 0xC0000004); + break; + case 0xC0000001: + entry->edx &= kvm_supported_word5_x86_features; + cpuid_mask(&entry->edx, 5); + break; + case 0xC0000002: + case 0xC0000003: + case 0xC0000004: + /*Now nothing to do, reserved for the future*/ + break; } kvm_x86_ops->set_supported_cpuid(function, entry); @@ -2478,6 +2511,26 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, if (nent >= cpuid->nent) goto out_free; + /* Add support for Centaur's CPUID instruction. */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) { + do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0, + &nent, cpuid->nent); + + r = -E2BIG; + if (nent >= cpuid->nent) + goto out_free; + + limit = cpuid_entries[nent - 1].eax; + for (func = 0xC0000001; + func <= limit && nent < cpuid->nent; ++func) + do_cpuid_ent(&cpuid_entries[nent], func, 0, + &nent, cpuid->nent); + + r = -E2BIG; + if (nent >= cpuid->nent) + goto out_free; + } + do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, cpuid->nent); @@ -3046,6 +3099,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); break; } + case KVM_SET_TSC_KHZ: { + u32 user_tsc_khz; + + r = -EINVAL; + if (!kvm_has_tsc_control) + break; + + user_tsc_khz = (u32)arg; + + if (user_tsc_khz >= kvm_max_guest_tsc_khz) + goto out; + + kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); + + r = 0; + goto out; + } + case KVM_GET_TSC_KHZ: { + r = -EIO; + if (check_tsc_unstable()) + goto out; + + r = vcpu_tsc_khz(vcpu); + + goto out; + } default: r = -EINVAL; } @@ -3595,20 +3674,43 @@ static void kvm_init_msr_list(void) static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, const void *v) { - if (vcpu->arch.apic && - !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) - return 0; + int handled = 0; + int n; - return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); + do { + n = min(len, 8); + if (!(vcpu->arch.apic && + !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v)) + && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + break; + handled += n; + addr += n; + len -= n; + v += n; + } while (len); + + return handled; } static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) { - if (vcpu->arch.apic && - !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) - return 0; + int handled = 0; + int n; + + do { + n = min(len, 8); + if (!(vcpu->arch.apic && + !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v)) + && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + break; + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); + handled += n; + addr += n; + len -= n; + v += n; + } while (len); - return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); + return handled; } static void kvm_set_segment(struct kvm_vcpu *vcpu, @@ -3703,37 +3805,43 @@ out: } /* used for instruction fetching */ -static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access | PFERR_FETCH_MASK, exception); } -static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } -static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); } -static int kvm_write_guest_virt_system(gva_t addr, void *val, +static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); void *data = val; int r = X86EMUL_CONTINUE; @@ -3761,13 +3869,15 @@ out: return r; } -static int emulator_read_emulated(unsigned long addr, +static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); gpa_t gpa; + int handled; if (vcpu->mmio_read_completed) { memcpy(val, vcpu->mmio_data, bytes); @@ -3786,7 +3896,7 @@ static int emulator_read_emulated(unsigned long addr, if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) goto mmio; - if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception) + if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) == X86EMUL_CONTINUE) return X86EMUL_CONTINUE; @@ -3794,18 +3904,24 @@ mmio: /* * Is this MMIO handled locally? */ - if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val); + handled = vcpu_mmio_read(vcpu, gpa, bytes, val); + + if (handled == bytes) return X86EMUL_CONTINUE; - } + + gpa += handled; + bytes -= handled; + val += handled; trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); vcpu->mmio_needed = 1; vcpu->run->exit_reason = KVM_EXIT_MMIO; vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; - vcpu->run->mmio.len = vcpu->mmio_size = bytes; + vcpu->mmio_size = bytes; + vcpu->run->mmio.len = min(vcpu->mmio_size, 8); vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; + vcpu->mmio_index = 0; return X86EMUL_IO_NEEDED; } @@ -3829,6 +3945,7 @@ static int emulator_write_emulated_onepage(unsigned long addr, struct kvm_vcpu *vcpu) { gpa_t gpa; + int handled; gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); @@ -3847,25 +3964,35 @@ mmio: /* * Is this MMIO handled locally? */ - if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) + handled = vcpu_mmio_write(vcpu, gpa, bytes, val); + if (handled == bytes) return X86EMUL_CONTINUE; + gpa += handled; + bytes -= handled; + val += handled; + vcpu->mmio_needed = 1; + memcpy(vcpu->mmio_data, val, bytes); vcpu->run->exit_reason = KVM_EXIT_MMIO; vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; - vcpu->run->mmio.len = vcpu->mmio_size = bytes; + vcpu->mmio_size = bytes; + vcpu->run->mmio.len = min(vcpu->mmio_size, 8); vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; - memcpy(vcpu->run->mmio.data, val, bytes); + memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); + vcpu->mmio_index = 0; return X86EMUL_CONTINUE; } -int emulator_write_emulated(unsigned long addr, +int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *val, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + /* Crossing a page boundary? */ if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { int rc, now; @@ -3893,13 +4020,14 @@ int emulator_write_emulated(unsigned long addr, (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) #endif -static int emulator_cmpxchg_emulated(unsigned long addr, +static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *old, const void *new, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); gpa_t gpa; struct page *page; char *kaddr; @@ -3955,7 +4083,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, emul_write: printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); - return emulator_write_emulated(addr, new, bytes, exception, vcpu); + return emulator_write_emulated(ctxt, addr, new, bytes, exception); } static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) @@ -3974,9 +4102,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) } -static int emulator_pio_in_emulated(int size, unsigned short port, void *val, - unsigned int count, struct kvm_vcpu *vcpu) +static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, void *val, + unsigned int count) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + if (vcpu->arch.pio.count) goto data_avail; @@ -4004,10 +4135,12 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val, return 0; } -static int emulator_pio_out_emulated(int size, unsigned short port, - const void *val, unsigned int count, - struct kvm_vcpu *vcpu) +static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, + const void *val, unsigned int count) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + trace_kvm_pio(1, port, size, count); vcpu->arch.pio.port = port; @@ -4037,10 +4170,9 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) return kvm_x86_ops->get_segment_base(vcpu, seg); } -int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) +static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) { - kvm_mmu_invlpg(vcpu, address); - return X86EMUL_CONTINUE; + kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); } int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) @@ -4062,22 +4194,20 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); -int emulate_clts(struct kvm_vcpu *vcpu) +static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) { - kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); - kvm_x86_ops->fpu_activate(vcpu); - return X86EMUL_CONTINUE; + kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); } -int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) +int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) { - return _kvm_get_dr(vcpu, dr, dest); + return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); } -int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) +int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) { - return __kvm_set_dr(vcpu, dr, value); + return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); } static u64 mk_cr_64(u64 curr_cr, u32 new_val) @@ -4085,8 +4215,9 @@ static u64 mk_cr_64(u64 curr_cr, u32 new_val) return (curr_cr & ~((1ULL << 32) - 1)) | new_val; } -static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) +static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); unsigned long value; switch (cr) { @@ -4113,8 +4244,9 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) return value; } -static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) +static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int res = 0; switch (cr) { @@ -4141,33 +4273,45 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) return res; } -static int emulator_get_cpl(struct kvm_vcpu *vcpu) +static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) +{ + return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); +} + +static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) +{ + kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt); +} + +static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) { - return kvm_x86_ops->get_cpl(vcpu); + kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt); } -static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) +static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) { - kvm_x86_ops->get_gdt(vcpu, dt); + kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt); } -static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) +static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) { - kvm_x86_ops->get_idt(vcpu, dt); + kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt); } -static unsigned long emulator_get_cached_segment_base(int seg, - struct kvm_vcpu *vcpu) +static unsigned long emulator_get_cached_segment_base( + struct x86_emulate_ctxt *ctxt, int seg) { - return get_segment_base(vcpu, seg); + return get_segment_base(emul_to_vcpu(ctxt), seg); } -static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, - int seg, struct kvm_vcpu *vcpu) +static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, + struct desc_struct *desc, u32 *base3, + int seg) { struct kvm_segment var; - kvm_get_segment(vcpu, &var, seg); + kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); + *selector = var.selector; if (var.unusable) return false; @@ -4192,14 +4336,14 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, return true; } -static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, - int seg, struct kvm_vcpu *vcpu) +static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, + struct desc_struct *desc, u32 base3, + int seg) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); struct kvm_segment var; - /* needed to preserve selector */ - kvm_get_segment(vcpu, &var, seg); - + var.selector = selector; var.base = get_desc_base(desc); #ifdef CONFIG_X86_64 var.base |= ((u64)base3) << 32; @@ -4223,22 +4367,44 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, return; } -static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) +static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 *pdata) { - struct kvm_segment kvm_seg; + return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); +} - kvm_get_segment(vcpu, &kvm_seg, seg); - return kvm_seg.selector; +static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 data) +{ + return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); } -static void emulator_set_segment_selector(u16 sel, int seg, - struct kvm_vcpu *vcpu) +static void emulator_halt(struct x86_emulate_ctxt *ctxt) { - struct kvm_segment kvm_seg; + emul_to_vcpu(ctxt)->arch.halt_request = 1; +} - kvm_get_segment(vcpu, &kvm_seg, seg); - kvm_seg.selector = sel; - kvm_set_segment(vcpu, &kvm_seg, seg); +static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) +{ + preempt_disable(); + kvm_load_guest_fpu(emul_to_vcpu(ctxt)); + /* + * CR0.TS may reference the host fpu state, not the guest fpu state, + * so it may be clear at this point. + */ + clts(); +} + +static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) +{ + preempt_enable(); +} + +static int emulator_intercept(struct x86_emulate_ctxt *ctxt, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); } static struct x86_emulate_ops emulate_ops = { @@ -4248,22 +4414,29 @@ static struct x86_emulate_ops emulate_ops = { .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated, + .invlpg = emulator_invlpg, .pio_in_emulated = emulator_pio_in_emulated, .pio_out_emulated = emulator_pio_out_emulated, - .get_cached_descriptor = emulator_get_cached_descriptor, - .set_cached_descriptor = emulator_set_cached_descriptor, - .get_segment_selector = emulator_get_segment_selector, - .set_segment_selector = emulator_set_segment_selector, + .get_segment = emulator_get_segment, + .set_segment = emulator_set_segment, .get_cached_segment_base = emulator_get_cached_segment_base, .get_gdt = emulator_get_gdt, .get_idt = emulator_get_idt, + .set_gdt = emulator_set_gdt, + .set_idt = emulator_set_idt, .get_cr = emulator_get_cr, .set_cr = emulator_set_cr, .cpl = emulator_get_cpl, .get_dr = emulator_get_dr, .set_dr = emulator_set_dr, - .set_msr = kvm_set_msr, - .get_msr = kvm_get_msr, + .set_msr = emulator_set_msr, + .get_msr = emulator_get_msr, + .halt = emulator_halt, + .wbinvd = emulator_wbinvd, + .fix_hypercall = emulator_fix_hypercall, + .get_fpu = emulator_get_fpu, + .put_fpu = emulator_put_fpu, + .intercept = emulator_intercept, }; static void cache_all_regs(struct kvm_vcpu *vcpu) @@ -4305,12 +4478,17 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; int cs_db, cs_l; + /* + * TODO: fix emulate.c to use guest_read/write_register + * instead of direct ->regs accesses, can save hundred cycles + * on Intel for instructions that don't read/change RSP, for + * for example. + */ cache_all_regs(vcpu); kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); - vcpu->arch.emulate_ctxt.vcpu = vcpu; - vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); + vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); vcpu->arch.emulate_ctxt.mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : @@ -4318,11 +4496,13 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) ? X86EMUL_MODE_VM86 : cs_l ? X86EMUL_MODE_PROT64 : cs_db ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu); memset(c, 0, sizeof(struct decode_cache)); memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) +int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) { struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; int ret; @@ -4331,7 +4511,8 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) vcpu->arch.emulate_ctxt.decode.op_bytes = 2; vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; - vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip; + vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip + + inc_eip; ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); if (ret != X86EMUL_CONTINUE) @@ -4340,7 +4521,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) vcpu->arch.emulate_ctxt.eip = c->eip; memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); if (irq == NMI_VECTOR) vcpu->arch.nmi_pending = false; @@ -4402,16 +4583,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, { int r; struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; + bool writeback = true; kvm_clear_exception_queue(vcpu); - vcpu->arch.mmio_fault_cr2 = cr2; - /* - * TODO: fix emulate.c to use guest_read/write_register - * instead of direct ->regs accesses, can save hundred cycles - * on Intel for instructions that don't read/change RSP, for - * for example. - */ - cache_all_regs(vcpu); if (!(emulation_type & EMULTYPE_NO_DECODE)) { init_emulate_ctxt(vcpu); @@ -4442,13 +4616,19 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DONE; } - /* this is needed for vmware backdor interface to work since it + /* this is needed for vmware backdoor interface to work since it changes registers values during IO operation */ - memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { + vcpu->arch.emulate_regs_need_sync_from_vcpu = false; + memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + } restart: r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); + if (r == EMULATION_INTERCEPTED) + return EMULATE_DONE; + if (r == EMULATION_FAILED) { if (reexecute_instruction(vcpu, cr2)) return EMULATE_DONE; @@ -4462,21 +4642,28 @@ restart: } else if (vcpu->arch.pio.count) { if (!vcpu->arch.pio.in) vcpu->arch.pio.count = 0; + else + writeback = false; r = EMULATE_DO_MMIO; } else if (vcpu->mmio_needed) { - if (vcpu->mmio_is_write) - vcpu->mmio_needed = 0; + if (!vcpu->mmio_is_write) + writeback = false; r = EMULATE_DO_MMIO; } else if (r == EMULATION_RESTART) goto restart; else r = EMULATE_DONE; - toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); - kvm_make_request(KVM_REQ_EVENT, vcpu); - memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); - kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); + if (writeback) { + toggle_interruptibility(vcpu, + vcpu->arch.emulate_ctxt.interruptibility); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_make_request(KVM_REQ_EVENT, vcpu); + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); + } else + vcpu->arch.emulate_regs_need_sync_to_vcpu = true; return r; } @@ -4485,7 +4672,8 @@ EXPORT_SYMBOL_GPL(x86_emulate_instruction); int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) { unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); - int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); + int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, + size, port, &val, 1); /* do not return to emulator after return from userspace */ vcpu->arch.pio.count = 0; return ret; @@ -4879,8 +5067,9 @@ out: } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); -int kvm_fix_hypercall(struct kvm_vcpu *vcpu) +int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); char instruction[3]; unsigned long rip = kvm_rip_read(vcpu); @@ -4893,21 +5082,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) kvm_x86_ops->patch_hypercall(vcpu, instruction); - return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); -} - -void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) -{ - struct desc_ptr dt = { limit, base }; - - kvm_x86_ops->set_gdt(vcpu, &dt); -} - -void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) -{ - struct desc_ptr dt = { limit, base }; - - kvm_x86_ops->set_idt(vcpu, &dt); + return emulator_write_emulated(&vcpu->arch.emulate_ctxt, + rip, instruction, 3, NULL); } static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) @@ -5170,6 +5346,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; + bool nmi_pending; bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && vcpu->run->request_interrupt_window; @@ -5207,19 +5384,25 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 1; goto out; } - if (kvm_check_request(KVM_REQ_NMI, vcpu)) - vcpu->arch.nmi_pending = true; } r = kvm_mmu_reload(vcpu); if (unlikely(r)) goto out; + /* + * An NMI can be injected between local nmi_pending read and + * vcpu->arch.nmi_pending read inside inject_pending_event(). + * But in that case, KVM_REQ_EVENT will be set, which makes + * the race described above benign. + */ + nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending); + if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { inject_pending_event(vcpu); /* enable NMI/IRQ window open exits if needed */ - if (vcpu->arch.nmi_pending) + if (nmi_pending) kvm_x86_ops->enable_nmi_window(vcpu); else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) kvm_x86_ops->enable_irq_window(vcpu); @@ -5399,6 +5582,41 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) return r; } +static int complete_mmio(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + int r; + + if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) + return 1; + + if (vcpu->mmio_needed) { + vcpu->mmio_needed = 0; + if (!vcpu->mmio_is_write) + memcpy(vcpu->mmio_data + vcpu->mmio_index, + run->mmio.data, 8); + vcpu->mmio_index += 8; + if (vcpu->mmio_index < vcpu->mmio_size) { + run->exit_reason = KVM_EXIT_MMIO; + run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; + memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); + run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8); + run->mmio.is_write = vcpu->mmio_is_write; + vcpu->mmio_needed = 1; + return 0; + } + if (vcpu->mmio_is_write) + return 1; + vcpu->mmio_read_completed = 1; + } + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + if (r != EMULATE_DONE) + return 0; + return 1; +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -5425,20 +5643,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } - if (vcpu->arch.pio.count || vcpu->mmio_needed) { - if (vcpu->mmio_needed) { - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); - vcpu->mmio_read_completed = 1; - vcpu->mmio_needed = 0; - } - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - if (r != EMULATE_DONE) { - r = 0; - goto out; - } - } + r = complete_mmio(vcpu); + if (r <= 0) + goto out; + if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) kvm_register_write(vcpu, VCPU_REGS_RAX, kvm_run->hypercall.ret); @@ -5455,6 +5663,18 @@ out: int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { + /* + * We are here if userspace calls get_regs() in the middle of + * instruction emulation. Registers state needs to be copied + * back from emulation context to vcpu. Usrapace shouldn't do + * that usually, but some bad designed PV devices (vmware + * backdoor interface) need this to work + */ + struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + } regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); @@ -5482,6 +5702,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + vcpu->arch.emulate_regs_need_sync_from_vcpu = true; + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); @@ -5592,7 +5815,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); return EMULATE_DONE; } @@ -5974,8 +6197,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) } vcpu->arch.pio_data = page_address(page); - if (!kvm->arch.virtual_tsc_khz) - kvm_arch_set_tsc_khz(kvm, max_tsc_khz); + kvm_init_tsc_catchup(vcpu, max_tsc_khz); r = kvm_mmu_create(vcpu); if (r < 0) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c600da830ce..e407ed3df81 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -77,7 +77,7 @@ static inline u32 bit(int bitno) void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); +int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 20e3f8702d1..bcb394dfbb3 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -12,6 +12,7 @@ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ #include <linux/perf_event.h> /* perf_sw_event */ #include <linux/hugetlb.h> /* hstate_index_to_shift */ +#include <linux/prefetch.h> /* prefetchw */ #include <asm/traps.h> /* dotraplinkage, ... */ #include <asm/pgalloc.h> /* pgd_*(), ... */ diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile new file mode 100644 index 00000000000..90568c33ddb --- /dev/null +++ b/arch/x86/net/Makefile @@ -0,0 +1,4 @@ +# +# Arch-specific network modules +# +obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S new file mode 100644 index 00000000000..66870223f8c --- /dev/null +++ b/arch/x86/net/bpf_jit.S @@ -0,0 +1,140 @@ +/* bpf_jit.S : BPF JIT helper functions + * + * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/linkage.h> +#include <asm/dwarf2.h> + +/* + * Calling convention : + * rdi : skb pointer + * esi : offset of byte(s) to fetch in skb (can be scratched) + * r8 : copy of skb->data + * r9d : hlen = skb->len - skb->data_len + */ +#define SKBDATA %r8 + +sk_load_word_ind: + .globl sk_load_word_ind + + add %ebx,%esi /* offset += X */ +# test %esi,%esi /* if (offset < 0) goto bpf_error; */ + js bpf_error + +sk_load_word: + .globl sk_load_word + + mov %r9d,%eax # hlen + sub %esi,%eax # hlen - offset + cmp $3,%eax + jle bpf_slow_path_word + mov (SKBDATA,%rsi),%eax + bswap %eax /* ntohl() */ + ret + + +sk_load_half_ind: + .globl sk_load_half_ind + + add %ebx,%esi /* offset += X */ + js bpf_error + +sk_load_half: + .globl sk_load_half + + mov %r9d,%eax + sub %esi,%eax # hlen - offset + cmp $1,%eax + jle bpf_slow_path_half + movzwl (SKBDATA,%rsi),%eax + rol $8,%ax # ntohs() + ret + +sk_load_byte_ind: + .globl sk_load_byte_ind + add %ebx,%esi /* offset += X */ + js bpf_error + +sk_load_byte: + .globl sk_load_byte + + cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */ + jle bpf_slow_path_byte + movzbl (SKBDATA,%rsi),%eax + ret + +/** + * sk_load_byte_msh - BPF_S_LDX_B_MSH helper + * + * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf) + * Must preserve A accumulator (%eax) + * Inputs : %esi is the offset value, already known positive + */ +ENTRY(sk_load_byte_msh) + CFI_STARTPROC + cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */ + jle bpf_slow_path_byte_msh + movzbl (SKBDATA,%rsi),%ebx + and $15,%bl + shl $2,%bl + ret + CFI_ENDPROC +ENDPROC(sk_load_byte_msh) + +bpf_error: +# force a return 0 from jit handler + xor %eax,%eax + mov -8(%rbp),%rbx + leaveq + ret + +/* rsi contains offset and can be scratched */ +#define bpf_slow_path_common(LEN) \ + push %rdi; /* save skb */ \ + push %r9; \ + push SKBDATA; \ +/* rsi already has offset */ \ + mov $LEN,%ecx; /* len */ \ + lea -12(%rbp),%rdx; \ + call skb_copy_bits; \ + test %eax,%eax; \ + pop SKBDATA; \ + pop %r9; \ + pop %rdi + + +bpf_slow_path_word: + bpf_slow_path_common(4) + js bpf_error + mov -12(%rbp),%eax + bswap %eax + ret + +bpf_slow_path_half: + bpf_slow_path_common(2) + js bpf_error + mov -12(%rbp),%ax + rol $8,%ax + movzwl %ax,%eax + ret + +bpf_slow_path_byte: + bpf_slow_path_common(1) + js bpf_error + movzbl -12(%rbp),%eax + ret + +bpf_slow_path_byte_msh: + xchg %eax,%ebx /* dont lose A , X is about to be scratched */ + bpf_slow_path_common(1) + js bpf_error + movzbl -12(%rbp),%eax + and $15,%al + shl $2,%al + xchg %eax,%ebx + ret diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c new file mode 100644 index 00000000000..bfab3fa10ed --- /dev/null +++ b/arch/x86/net/bpf_jit_comp.c @@ -0,0 +1,654 @@ +/* bpf_jit_comp.c : BPF JIT compiler + * + * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/moduleloader.h> +#include <asm/cacheflush.h> +#include <linux/netdevice.h> +#include <linux/filter.h> + +/* + * Conventions : + * EAX : BPF A accumulator + * EBX : BPF X accumulator + * RDI : pointer to skb (first argument given to JIT function) + * RBP : frame pointer (even if CONFIG_FRAME_POINTER=n) + * ECX,EDX,ESI : scratch registers + * r9d : skb->len - skb->data_len (headlen) + * r8 : skb->data + * -8(RBP) : saved RBX value + * -16(RBP)..-80(RBP) : BPF_MEMWORDS values + */ +int bpf_jit_enable __read_mostly; + +/* + * assembly code in arch/x86/net/bpf_jit.S + */ +extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; +extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; + +static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) +{ + if (len == 1) + *ptr = bytes; + else if (len == 2) + *(u16 *)ptr = bytes; + else { + *(u32 *)ptr = bytes; + barrier(); + } + return ptr + len; +} + +#define EMIT(bytes, len) do { prog = emit_code(prog, bytes, len); } while (0) + +#define EMIT1(b1) EMIT(b1, 1) +#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) +#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) +#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) +#define EMIT1_off32(b1, off) do { EMIT1(b1); EMIT(off, 4);} while (0) + +#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */ +#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */ + +static inline bool is_imm8(int value) +{ + return value <= 127 && value >= -128; +} + +static inline bool is_near(int offset) +{ + return offset <= 127 && offset >= -128; +} + +#define EMIT_JMP(offset) \ +do { \ + if (offset) { \ + if (is_near(offset)) \ + EMIT2(0xeb, offset); /* jmp .+off8 */ \ + else \ + EMIT1_off32(0xe9, offset); /* jmp .+off32 */ \ + } \ +} while (0) + +/* list of x86 cond jumps opcodes (. + s8) + * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) + */ +#define X86_JB 0x72 +#define X86_JAE 0x73 +#define X86_JE 0x74 +#define X86_JNE 0x75 +#define X86_JBE 0x76 +#define X86_JA 0x77 + +#define EMIT_COND_JMP(op, offset) \ +do { \ + if (is_near(offset)) \ + EMIT2(op, offset); /* jxx .+off8 */ \ + else { \ + EMIT2(0x0f, op + 0x10); \ + EMIT(offset, 4); /* jxx .+off32 */ \ + } \ +} while (0) + +#define COND_SEL(CODE, TOP, FOP) \ + case CODE: \ + t_op = TOP; \ + f_op = FOP; \ + goto cond_branch + + +#define SEEN_DATAREF 1 /* might call external helpers */ +#define SEEN_XREG 2 /* ebx is used */ +#define SEEN_MEM 4 /* use mem[] for temporary storage */ + +static inline void bpf_flush_icache(void *start, void *end) +{ + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + smp_wmb(); + flush_icache_range((unsigned long)start, (unsigned long)end); + set_fs(old_fs); +} + + +void bpf_jit_compile(struct sk_filter *fp) +{ + u8 temp[64]; + u8 *prog; + unsigned int proglen, oldproglen = 0; + int ilen, i; + int t_offset, f_offset; + u8 t_op, f_op, seen = 0, pass; + u8 *image = NULL; + u8 *func; + int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */ + unsigned int cleanup_addr; /* epilogue code offset */ + unsigned int *addrs; + const struct sock_filter *filter = fp->insns; + int flen = fp->len; + + if (!bpf_jit_enable) + return; + + addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL); + if (addrs == NULL) + return; + + /* Before first pass, make a rough estimation of addrs[] + * each bpf instruction is translated to less than 64 bytes + */ + for (proglen = 0, i = 0; i < flen; i++) { + proglen += 64; + addrs[i] = proglen; + } + cleanup_addr = proglen; /* epilogue address */ + + for (pass = 0; pass < 10; pass++) { + /* no prologue/epilogue for trivial filters (RET something) */ + proglen = 0; + prog = temp; + + if (seen) { + EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */ + EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */ + /* note : must save %rbx in case bpf_error is hit */ + if (seen & (SEEN_XREG | SEEN_DATAREF)) + EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */ + if (seen & SEEN_XREG) + CLEAR_X(); /* make sure we dont leek kernel memory */ + + /* + * If this filter needs to access skb data, + * loads r9 and r8 with : + * r9 = skb->len - skb->data_len + * r8 = skb->data + */ + if (seen & SEEN_DATAREF) { + if (offsetof(struct sk_buff, len) <= 127) + /* mov off8(%rdi),%r9d */ + EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len)); + else { + /* mov off32(%rdi),%r9d */ + EMIT3(0x44, 0x8b, 0x8f); + EMIT(offsetof(struct sk_buff, len), 4); + } + if (is_imm8(offsetof(struct sk_buff, data_len))) + /* sub off8(%rdi),%r9d */ + EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len)); + else { + EMIT3(0x44, 0x2b, 0x8f); + EMIT(offsetof(struct sk_buff, data_len), 4); + } + + if (is_imm8(offsetof(struct sk_buff, data))) + /* mov off8(%rdi),%r8 */ + EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data)); + else { + /* mov off32(%rdi),%r8 */ + EMIT3(0x4c, 0x8b, 0x87); + EMIT(offsetof(struct sk_buff, data), 4); + } + } + } + + switch (filter[0].code) { + case BPF_S_RET_K: + case BPF_S_LD_W_LEN: + case BPF_S_ANC_PROTOCOL: + case BPF_S_ANC_IFINDEX: + case BPF_S_ANC_MARK: + case BPF_S_ANC_RXHASH: + case BPF_S_ANC_CPU: + case BPF_S_ANC_QUEUE: + case BPF_S_LD_W_ABS: + case BPF_S_LD_H_ABS: + case BPF_S_LD_B_ABS: + /* first instruction sets A register (or is RET 'constant') */ + break; + default: + /* make sure we dont leak kernel information to user */ + CLEAR_A(); /* A = 0 */ + } + + for (i = 0; i < flen; i++) { + unsigned int K = filter[i].k; + + switch (filter[i].code) { + case BPF_S_ALU_ADD_X: /* A += X; */ + seen |= SEEN_XREG; + EMIT2(0x01, 0xd8); /* add %ebx,%eax */ + break; + case BPF_S_ALU_ADD_K: /* A += K; */ + if (!K) + break; + if (is_imm8(K)) + EMIT3(0x83, 0xc0, K); /* add imm8,%eax */ + else + EMIT1_off32(0x05, K); /* add imm32,%eax */ + break; + case BPF_S_ALU_SUB_X: /* A -= X; */ + seen |= SEEN_XREG; + EMIT2(0x29, 0xd8); /* sub %ebx,%eax */ + break; + case BPF_S_ALU_SUB_K: /* A -= K */ + if (!K) + break; + if (is_imm8(K)) + EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */ + else + EMIT1_off32(0x2d, K); /* sub imm32,%eax */ + break; + case BPF_S_ALU_MUL_X: /* A *= X; */ + seen |= SEEN_XREG; + EMIT3(0x0f, 0xaf, 0xc3); /* imul %ebx,%eax */ + break; + case BPF_S_ALU_MUL_K: /* A *= K */ + if (is_imm8(K)) + EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */ + else { + EMIT2(0x69, 0xc0); /* imul imm32,%eax */ + EMIT(K, 4); + } + break; + case BPF_S_ALU_DIV_X: /* A /= X; */ + seen |= SEEN_XREG; + EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ + if (pc_ret0 != -1) + EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4)); + else { + EMIT_COND_JMP(X86_JNE, 2 + 5); + CLEAR_A(); + EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */ + } + EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */ + break; + case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ + EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */ + EMIT(K, 4); + EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */ + break; + case BPF_S_ALU_AND_X: + seen |= SEEN_XREG; + EMIT2(0x21, 0xd8); /* and %ebx,%eax */ + break; + case BPF_S_ALU_AND_K: + if (K >= 0xFFFFFF00) { + EMIT2(0x24, K & 0xFF); /* and imm8,%al */ + } else if (K >= 0xFFFF0000) { + EMIT2(0x66, 0x25); /* and imm16,%ax */ + EMIT2(K, 2); + } else { + EMIT1_off32(0x25, K); /* and imm32,%eax */ + } + break; + case BPF_S_ALU_OR_X: + seen |= SEEN_XREG; + EMIT2(0x09, 0xd8); /* or %ebx,%eax */ + break; + case BPF_S_ALU_OR_K: + if (is_imm8(K)) + EMIT3(0x83, 0xc8, K); /* or imm8,%eax */ + else + EMIT1_off32(0x0d, K); /* or imm32,%eax */ + break; + case BPF_S_ALU_LSH_X: /* A <<= X; */ + seen |= SEEN_XREG; + EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ + break; + case BPF_S_ALU_LSH_K: + if (K == 0) + break; + else if (K == 1) + EMIT2(0xd1, 0xe0); /* shl %eax */ + else + EMIT3(0xc1, 0xe0, K); + break; + case BPF_S_ALU_RSH_X: /* A >>= X; */ + seen |= SEEN_XREG; + EMIT4(0x89, 0xd9, 0xd3, 0xe8); /* mov %ebx,%ecx; shr %cl,%eax */ + break; + case BPF_S_ALU_RSH_K: /* A >>= K; */ + if (K == 0) + break; + else if (K == 1) + EMIT2(0xd1, 0xe8); /* shr %eax */ + else + EMIT3(0xc1, 0xe8, K); + break; + case BPF_S_ALU_NEG: + EMIT2(0xf7, 0xd8); /* neg %eax */ + break; + case BPF_S_RET_K: + if (!K) { + if (pc_ret0 == -1) + pc_ret0 = i; + CLEAR_A(); + } else { + EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ + } + /* fallinto */ + case BPF_S_RET_A: + if (seen) { + if (i != flen - 1) { + EMIT_JMP(cleanup_addr - addrs[i]); + break; + } + if (seen & SEEN_XREG) + EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */ + EMIT1(0xc9); /* leaveq */ + } + EMIT1(0xc3); /* ret */ + break; + case BPF_S_MISC_TAX: /* X = A */ + seen |= SEEN_XREG; + EMIT2(0x89, 0xc3); /* mov %eax,%ebx */ + break; + case BPF_S_MISC_TXA: /* A = X */ + seen |= SEEN_XREG; + EMIT2(0x89, 0xd8); /* mov %ebx,%eax */ + break; + case BPF_S_LD_IMM: /* A = K */ + if (!K) + CLEAR_A(); + else + EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ + break; + case BPF_S_LDX_IMM: /* X = K */ + seen |= SEEN_XREG; + if (!K) + CLEAR_X(); + else + EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */ + break; + case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */ + seen |= SEEN_MEM; + EMIT3(0x8b, 0x45, 0xf0 - K*4); + break; + case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */ + seen |= SEEN_XREG | SEEN_MEM; + EMIT3(0x8b, 0x5d, 0xf0 - K*4); + break; + case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */ + seen |= SEEN_MEM; + EMIT3(0x89, 0x45, 0xf0 - K*4); + break; + case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ + seen |= SEEN_XREG | SEEN_MEM; + EMIT3(0x89, 0x5d, 0xf0 - K*4); + break; + case BPF_S_LD_W_LEN: /* A = skb->len; */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + if (is_imm8(offsetof(struct sk_buff, len))) + /* mov off8(%rdi),%eax */ + EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len)); + else { + EMIT2(0x8b, 0x87); + EMIT(offsetof(struct sk_buff, len), 4); + } + break; + case BPF_S_LDX_W_LEN: /* X = skb->len; */ + seen |= SEEN_XREG; + if (is_imm8(offsetof(struct sk_buff, len))) + /* mov off8(%rdi),%ebx */ + EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len)); + else { + EMIT2(0x8b, 0x9f); + EMIT(offsetof(struct sk_buff, len), 4); + } + break; + case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); + if (is_imm8(offsetof(struct sk_buff, protocol))) { + /* movzwl off8(%rdi),%eax */ + EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol)); + } else { + EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ + EMIT(offsetof(struct sk_buff, protocol), 4); + } + EMIT2(0x86, 0xc4); /* ntohs() : xchg %al,%ah */ + break; + case BPF_S_ANC_IFINDEX: + if (is_imm8(offsetof(struct sk_buff, dev))) { + /* movq off8(%rdi),%rax */ + EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev)); + } else { + EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */ + EMIT(offsetof(struct sk_buff, dev), 4); + } + EMIT3(0x48, 0x85, 0xc0); /* test %rax,%rax */ + EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6)); + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + EMIT2(0x8b, 0x80); /* mov off32(%rax),%eax */ + EMIT(offsetof(struct net_device, ifindex), 4); + break; + case BPF_S_ANC_MARK: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + if (is_imm8(offsetof(struct sk_buff, mark))) { + /* mov off8(%rdi),%eax */ + EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark)); + } else { + EMIT2(0x8b, 0x87); + EMIT(offsetof(struct sk_buff, mark), 4); + } + break; + case BPF_S_ANC_RXHASH: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); + if (is_imm8(offsetof(struct sk_buff, rxhash))) { + /* mov off8(%rdi),%eax */ + EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash)); + } else { + EMIT2(0x8b, 0x87); + EMIT(offsetof(struct sk_buff, rxhash), 4); + } + break; + case BPF_S_ANC_QUEUE: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + if (is_imm8(offsetof(struct sk_buff, queue_mapping))) { + /* movzwl off8(%rdi),%eax */ + EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping)); + } else { + EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ + EMIT(offsetof(struct sk_buff, queue_mapping), 4); + } + break; + case BPF_S_ANC_CPU: +#ifdef CONFIG_SMP + EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */ + EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */ +#else + CLEAR_A(); +#endif + break; + case BPF_S_LD_W_ABS: + func = sk_load_word; +common_load: seen |= SEEN_DATAREF; + if ((int)K < 0) + goto out; + t_offset = func - (image + addrs[i]); + EMIT1_off32(0xbe, K); /* mov imm32,%esi */ + EMIT1_off32(0xe8, t_offset); /* call */ + break; + case BPF_S_LD_H_ABS: + func = sk_load_half; + goto common_load; + case BPF_S_LD_B_ABS: + func = sk_load_byte; + goto common_load; + case BPF_S_LDX_B_MSH: + if ((int)K < 0) { + if (pc_ret0 != -1) { + EMIT_JMP(addrs[pc_ret0] - addrs[i]); + break; + } + CLEAR_A(); + EMIT_JMP(cleanup_addr - addrs[i]); + break; + } + seen |= SEEN_DATAREF | SEEN_XREG; + t_offset = sk_load_byte_msh - (image + addrs[i]); + EMIT1_off32(0xbe, K); /* mov imm32,%esi */ + EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */ + break; + case BPF_S_LD_W_IND: + func = sk_load_word_ind; +common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; + t_offset = func - (image + addrs[i]); + EMIT1_off32(0xbe, K); /* mov imm32,%esi */ + EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */ + break; + case BPF_S_LD_H_IND: + func = sk_load_half_ind; + goto common_load_ind; + case BPF_S_LD_B_IND: + func = sk_load_byte_ind; + goto common_load_ind; + case BPF_S_JMP_JA: + t_offset = addrs[i + K] - addrs[i]; + EMIT_JMP(t_offset); + break; + COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE); + COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB); + COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE); + COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE); + COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE); + COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB); + COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE); + COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE); + +cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; + t_offset = addrs[i + filter[i].jt] - addrs[i]; + + /* same targets, can avoid doing the test :) */ + if (filter[i].jt == filter[i].jf) { + EMIT_JMP(t_offset); + break; + } + + switch (filter[i].code) { + case BPF_S_JMP_JGT_X: + case BPF_S_JMP_JGE_X: + case BPF_S_JMP_JEQ_X: + seen |= SEEN_XREG; + EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */ + break; + case BPF_S_JMP_JSET_X: + seen |= SEEN_XREG; + EMIT2(0x85, 0xd8); /* test %ebx,%eax */ + break; + case BPF_S_JMP_JEQ_K: + if (K == 0) { + EMIT2(0x85, 0xc0); /* test %eax,%eax */ + break; + } + case BPF_S_JMP_JGT_K: + case BPF_S_JMP_JGE_K: + if (K <= 127) + EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */ + else + EMIT1_off32(0x3d, K); /* cmp imm32,%eax */ + break; + case BPF_S_JMP_JSET_K: + if (K <= 0xFF) + EMIT2(0xa8, K); /* test imm8,%al */ + else if (!(K & 0xFFFF00FF)) + EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */ + else if (K <= 0xFFFF) { + EMIT2(0x66, 0xa9); /* test imm16,%ax */ + EMIT(K, 2); + } else { + EMIT1_off32(0xa9, K); /* test imm32,%eax */ + } + break; + } + if (filter[i].jt != 0) { + if (filter[i].jf) + t_offset += is_near(f_offset) ? 2 : 6; + EMIT_COND_JMP(t_op, t_offset); + if (filter[i].jf) + EMIT_JMP(f_offset); + break; + } + EMIT_COND_JMP(f_op, f_offset); + break; + default: + /* hmm, too complex filter, give up with jit compiler */ + goto out; + } + ilen = prog - temp; + if (image) { + if (unlikely(proglen + ilen > oldproglen)) { + pr_err("bpb_jit_compile fatal error\n"); + kfree(addrs); + module_free(NULL, image); + return; + } + memcpy(image + proglen, temp, ilen); + } + proglen += ilen; + addrs[i] = proglen; + prog = temp; + } + /* last bpf instruction is always a RET : + * use it to give the cleanup instruction(s) addr + */ + cleanup_addr = proglen - 1; /* ret */ + if (seen) + cleanup_addr -= 1; /* leaveq */ + if (seen & SEEN_XREG) + cleanup_addr -= 4; /* mov -8(%rbp),%rbx */ + + if (image) { + WARN_ON(proglen != oldproglen); + break; + } + if (proglen == oldproglen) { + image = module_alloc(max_t(unsigned int, + proglen, + sizeof(struct work_struct))); + if (!image) + goto out; + } + oldproglen = proglen; + } + if (bpf_jit_enable > 1) + pr_err("flen=%d proglen=%u pass=%d image=%p\n", + flen, proglen, pass, image); + + if (image) { + if (bpf_jit_enable > 1) + print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS, + 16, 1, image, proglen, false); + + bpf_flush_icache(image, image + proglen); + + fp->bpf_func = (void *)image; + } +out: + kfree(addrs); + return; +} + +static void jit_free_defer(struct work_struct *arg) +{ + module_free(NULL, arg); +} + +/* run from softirq, we must use a work_struct to call + * module_free() from process context + */ +void bpf_jit_free(struct sk_filter *fp) +{ + if (fp->bpf_func != sk_run_filter) { + struct work_struct *work = (struct work_struct *)fp->bpf_func; + + INIT_WORK(work, jit_free_defer); + schedule_work(work); + } +} |