summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/vmlinux.lds.S1
-rw-r--r--arch/ia64/hp/common/sba_iommu.c1
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S1
-rw-r--r--arch/ia64/kvm/vti.h26
-rw-r--r--arch/ia64/mm/fault.c1
-rw-r--r--arch/ia64/oprofile/backtrace.c2
-rw-r--r--arch/m32r/kernel/vmlinux.lds.S1
-rw-r--r--arch/m68k/kernel/vmlinux-std.lds2
-rw-r--r--arch/m68k/kernel/vmlinux-sun3.lds1
-rw-r--r--arch/mips/kernel/vmlinux.lds.S1
-rw-r--r--arch/mips/txx9/generic/setup_tx4939.c21
-rw-r--r--arch/parisc/include/asm/cacheflush.h5
-rw-r--r--arch/parisc/include/asm/pgtable.h9
-rw-r--r--arch/parisc/include/asm/unistd.h10
-rw-r--r--arch/parisc/kernel/cache.c13
-rw-r--r--arch/parisc/kernel/entry.S3
-rw-r--r--arch/parisc/kernel/head.S5
-rw-r--r--arch/parisc/kernel/module.c10
-rw-r--r--arch/parisc/kernel/pacache.S6
-rw-r--r--arch/parisc/kernel/sys_parisc32.c8
-rw-r--r--arch/parisc/kernel/syscall_table.S6
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S4
-rw-r--r--arch/parisc/mm/init.c260
-rw-r--r--arch/powerpc/Kconfig6
-rw-r--r--arch/powerpc/Kconfig.debug5
-rw-r--r--arch/powerpc/boot/Makefile6
-rw-r--r--arch/powerpc/boot/crt0.S116
-rw-r--r--arch/powerpc/boot/dts/p1020rdb.dts332
-rw-r--r--arch/powerpc/boot/dts/p1020rdb_camp_core0.dts213
-rw-r--r--arch/powerpc/boot/dts/p1020rdb_camp_core1.dts148
-rw-r--r--arch/powerpc/boot/dts/p1020si.dtsi377
-rw-r--r--arch/powerpc/boot/dts/p1022ds.dts106
-rw-r--r--arch/powerpc/boot/dts/p2020ds.dts374
-rw-r--r--arch/powerpc/boot/dts/p2020rdb.dts378
-rw-r--r--arch/powerpc/boot/dts/p2020rdb_camp_core0.dts245
-rw-r--r--arch/powerpc/boot/dts/p2020rdb_camp_core1.dts150
-rw-r--r--arch/powerpc/boot/dts/p2020si.dtsi382
-rw-r--r--arch/powerpc/boot/dts/p4080ds.dts86
-rw-r--r--arch/powerpc/boot/epapr.c66
-rwxr-xr-xarch/powerpc/boot/wrapper19
-rw-r--r--arch/powerpc/boot/zImage.coff.lds.S6
-rw-r--r--arch/powerpc/boot/zImage.lds.S57
-rw-r--r--arch/powerpc/configs/83xx/mpc8313_rdb_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc8315_rdb_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/mpc8540_ads_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/mpc8560_ads_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/mpc85xx_cds_defconfig1
-rw-r--r--arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig1
-rw-r--r--arch/powerpc/configs/c2k_defconfig4
-rw-r--r--arch/powerpc/configs/e55xx_smp_defconfig39
-rw-r--r--arch/powerpc/configs/mpc85xx_defconfig1
-rw-r--r--arch/powerpc/configs/mpc85xx_smp_defconfig1
-rw-r--r--arch/powerpc/configs/mpc86xx_defconfig1
-rw-r--r--arch/powerpc/configs/pmac32_defconfig4
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig4
-rw-r--r--arch/powerpc/configs/ps3_defconfig4
-rw-r--r--arch/powerpc/configs/pseries_defconfig8
-rw-r--r--arch/powerpc/include/asm/cputable.h55
-rw-r--r--arch/powerpc/include/asm/cputhreads.h12
-rw-r--r--arch/powerpc/include/asm/dbell.h3
-rw-r--r--arch/powerpc/include/asm/emulated_ops.h4
-rw-r--r--arch/powerpc/include/asm/exception-64s.h113
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h15
-rw-r--r--arch/powerpc/include/asm/firmware.h3
-rw-r--r--arch/powerpc/include/asm/hvcall.h12
-rw-r--r--arch/powerpc/include/asm/io-workarounds.h (renamed from arch/powerpc/platforms/cell/io-workarounds.h)1
-rw-r--r--arch/powerpc/include/asm/io.h33
-rw-r--r--arch/powerpc/include/asm/io_event_irq.h54
-rw-r--r--arch/powerpc/include/asm/irq.h18
-rw-r--r--arch/powerpc/include/asm/kexec.h2
-rw-r--r--arch/powerpc/include/asm/kvm.h184
-rw-r--r--arch/powerpc/include/asm/kvm_44x.h1
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_e500.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h5
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h9
-rw-r--r--arch/powerpc/include/asm/lppaca.h2
-rw-r--r--arch/powerpc/include/asm/machdep.h22
-rw-r--r--arch/powerpc/include/asm/mmu-book3e.h20
-rw-r--r--arch/powerpc/include/asm/mmu-hash64.h6
-rw-r--r--arch/powerpc/include/asm/mmu.h52
-rw-r--r--arch/powerpc/include/asm/mmu_context.h12
-rw-r--r--arch/powerpc/include/asm/mpic.h5
-rw-r--r--arch/powerpc/include/asm/pSeries_reconfig.h5
-rw-r--r--arch/powerpc/include/asm/paca.h11
-rw-r--r--arch/powerpc/include/asm/page_64.h21
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h13
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h35
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h1
-rw-r--r--arch/powerpc/include/asm/processor.h4
-rw-r--r--arch/powerpc/include/asm/reg.h104
-rw-r--r--arch/powerpc/include/asm/reg_a2.h165
-rw-r--r--arch/powerpc/include/asm/reg_booke.h10
-rw-r--r--arch/powerpc/include/asm/rtas.h45
-rw-r--r--arch/powerpc/include/asm/scom.h156
-rw-r--r--arch/powerpc/include/asm/smp.h38
-rw-r--r--arch/powerpc/include/asm/systbl.h1
-rw-r--r--arch/powerpc/include/asm/system.h2
-rw-r--r--arch/powerpc/include/asm/tlbflush.h2
-rw-r--r--arch/powerpc/include/asm/udbg.h1
-rw-r--r--arch/powerpc/include/asm/unistd.h3
-rw-r--r--arch/powerpc/include/asm/wsp.h14
-rw-r--r--arch/powerpc/include/asm/xics.h142
-rw-r--r--arch/powerpc/kernel/Makefile6
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kernel/cpu_setup_a2.S114
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S3
-rw-r--r--arch/powerpc/kernel/cpu_setup_power7.S91
-rw-r--r--arch/powerpc/kernel/cputable.c66
-rw-r--r--arch/powerpc/kernel/crash.c91
-rw-r--r--arch/powerpc/kernel/dbell.c65
-rw-r--r--arch/powerpc/kernel/entry_64.S27
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S202
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S216
-rw-r--r--arch/powerpc/kernel/head_32.S22
-rw-r--r--arch/powerpc/kernel/head_64.S49
-rw-r--r--arch/powerpc/kernel/idle_power7.S97
-rw-r--r--arch/powerpc/kernel/io-workarounds.c (renamed from arch/powerpc/platforms/cell/io-workarounds.c)31
-rw-r--r--arch/powerpc/kernel/irq.c166
-rw-r--r--arch/powerpc/kernel/kgdb.c2
-rw-r--r--arch/powerpc/kernel/lparcfg.c53
-rw-r--r--arch/powerpc/kernel/misc_32.S11
-rw-r--r--arch/powerpc/kernel/misc_64.S13
-rw-r--r--arch/powerpc/kernel/paca.c30
-rw-r--r--arch/powerpc/kernel/pci_dn.c3
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c5
-rw-r--r--arch/powerpc/kernel/process.c20
-rw-r--r--arch/powerpc/kernel/prom.c64
-rw-r--r--arch/powerpc/kernel/prom_init.c30
-rw-r--r--arch/powerpc/kernel/rtas.c4
-rw-r--r--arch/powerpc/kernel/setup-common.c22
-rw-r--r--arch/powerpc/kernel/setup_32.c1
-rw-r--r--arch/powerpc/kernel/setup_64.c44
-rw-r--r--arch/powerpc/kernel/signal_64.c4
-rw-r--r--arch/powerpc/kernel/smp.c138
-rw-r--r--arch/powerpc/kernel/sysfs.c38
-rw-r--r--arch/powerpc/kernel/traps.c28
-rw-r--r--arch/powerpc/kernel/udbg.c2
-rw-r--r--arch/powerpc/kernel/udbg_16550.c51
-rw-r--r--arch/powerpc/kernel/vector.S2
-rw-r--r--arch/powerpc/kvm/44x.c10
-rw-r--r--arch/powerpc/kvm/44x_emulate.c2
-rw-r--r--arch/powerpc/kvm/book3s.c2
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S13
-rw-r--r--arch/powerpc/kvm/book3s_segment.S12
-rw-r--r--arch/powerpc/kvm/booke.c154
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S1
-rw-r--r--arch/powerpc/kvm/e500.c76
-rw-r--r--arch/powerpc/kvm/e500_emulate.c7
-rw-r--r--arch/powerpc/kvm/e500_tlb.c13
-rw-r--r--arch/powerpc/kvm/emulate.c15
-rw-r--r--arch/powerpc/kvm/powerpc.c21
-rw-r--r--arch/powerpc/kvm/timing.c31
-rw-r--r--arch/powerpc/lib/alloc.c8
-rw-r--r--arch/powerpc/lib/copypage_64.S7
-rw-r--r--arch/powerpc/lib/devres.c6
-rw-r--r--arch/powerpc/lib/sstep.c62
-rw-r--r--arch/powerpc/mm/hash_low_64.S8
-rw-r--r--arch/powerpc/mm/hash_native_64.c18
-rw-r--r--arch/powerpc/mm/hash_utils_64.c62
-rw-r--r--arch/powerpc/mm/hugetlbpage.c2
-rw-r--r--arch/powerpc/mm/mmu_context_hash64.c214
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c18
-rw-r--r--arch/powerpc/mm/numa.c17
-rw-r--r--arch/powerpc/mm/pgtable_32.c12
-rw-r--r--arch/powerpc/mm/pgtable_64.c15
-rw-r--r--arch/powerpc/mm/slb.c10
-rw-r--r--arch/powerpc/mm/slb_low.S8
-rw-r--r--arch/powerpc/mm/stab.c2
-rw-r--r--arch/powerpc/platforms/44x/iss4xx.c6
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads_cpld.c10
-rw-r--r--arch/powerpc/platforms/52xx/media5200.c4
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pic.c83
-rw-r--r--arch/powerpc/platforms/82xx/pq2ads-pci-pic.c12
-rw-r--r--arch/powerpc/platforms/85xx/smp.c12
-rw-r--r--arch/powerpc/platforms/85xx/socrates_fpga_pic.c26
-rw-r--r--arch/powerpc/platforms/86xx/gef_pic.c10
-rw-r--r--arch/powerpc/platforms/86xx/mpc8610_hpcd.c99
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_smp.c6
-rw-r--r--arch/powerpc/platforms/8xx/m8xx_setup.c2
-rw-r--r--arch/powerpc/platforms/Kconfig31
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype24
-rw-r--r--arch/powerpc/platforms/Makefile1
-rw-r--r--arch/powerpc/platforms/cell/Kconfig4
-rw-r--r--arch/powerpc/platforms/cell/Makefile9
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c3
-rw-r--r--arch/powerpc/platforms/cell/beat_interrupt.c27
-rw-r--r--arch/powerpc/platforms/cell/beat_interrupt.h3
-rw-r--r--arch/powerpc/platforms/cell/beat_smp.c124
-rw-r--r--arch/powerpc/platforms/cell/cbe_regs.c11
-rw-r--r--arch/powerpc/platforms/cell/celleb_pci.c25
-rw-r--r--arch/powerpc/platforms/cell/celleb_pci.h3
-rw-r--r--arch/powerpc/platforms/cell/celleb_setup.c4
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c16
-rw-r--r--arch/powerpc/platforms/cell/qpace_setup.c1
-rw-r--r--arch/powerpc/platforms/cell/setup.c4
-rw-r--r--arch/powerpc/platforms/cell/smp.c37
-rw-r--r--arch/powerpc/platforms/cell/spider-pci.c3
-rw-r--r--arch/powerpc/platforms/cell/spider-pic.c21
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c2
-rw-r--r--arch/powerpc/platforms/chrp/smp.c4
-rw-r--r--arch/powerpc/platforms/embedded6xx/flipper-pic.c15
-rw-r--r--arch/powerpc/platforms/embedded6xx/hlwd-pic.c15
-rw-r--r--arch/powerpc/platforms/iseries/Kconfig4
-rw-r--r--arch/powerpc/platforms/iseries/exception.S62
-rw-r--r--arch/powerpc/platforms/iseries/irq.c13
-rw-r--r--arch/powerpc/platforms/iseries/setup.c9
-rw-r--r--arch/powerpc/platforms/iseries/smp.c45
-rw-r--r--arch/powerpc/platforms/iseries/smp.h6
-rw-r--r--arch/powerpc/platforms/powermac/Kconfig11
-rw-r--r--arch/powerpc/platforms/powermac/pic.c25
-rw-r--r--arch/powerpc/platforms/powermac/pic.h11
-rw-r--r--arch/powerpc/platforms/powermac/pmac.h1
-rw-r--r--arch/powerpc/platforms/powermac/smp.c97
-rw-r--r--arch/powerpc/platforms/ps3/interrupt.c8
-rw-r--r--arch/powerpc/platforms/ps3/smp.c22
-rw-r--r--arch/powerpc/platforms/ps3/spu.c4
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig23
-rw-r--r--arch/powerpc/platforms/pseries/Makefile2
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c20
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c82
-rw-r--r--arch/powerpc/platforms/pseries/eeh_driver.c22
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c5
-rw-r--r--arch/powerpc/platforms/pseries/io_event_irq.c231
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c117
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c5
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c48
-rw-r--r--arch/powerpc/platforms/pseries/plpar_wrappers.h27
-rw-r--r--arch/powerpc/platforms/pseries/ras.c6
-rw-r--r--arch/powerpc/platforms/pseries/setup.c50
-rw-r--r--arch/powerpc/platforms/pseries/smp.c24
-rw-r--r--arch/powerpc/platforms/pseries/xics.c949
-rw-r--r--arch/powerpc/platforms/pseries/xics.h23
-rw-r--r--arch/powerpc/platforms/wsp/Kconfig28
-rw-r--r--arch/powerpc/platforms/wsp/Makefile6
-rw-r--r--arch/powerpc/platforms/wsp/ics.c712
-rw-r--r--arch/powerpc/platforms/wsp/ics.h20
-rw-r--r--arch/powerpc/platforms/wsp/opb_pic.c332
-rw-r--r--arch/powerpc/platforms/wsp/psr2.c95
-rw-r--r--arch/powerpc/platforms/wsp/scom_smp.c427
-rw-r--r--arch/powerpc/platforms/wsp/scom_wsp.c77
-rw-r--r--arch/powerpc/platforms/wsp/setup.c36
-rw-r--r--arch/powerpc/platforms/wsp/smp.c88
-rw-r--r--arch/powerpc/platforms/wsp/wsp.h17
-rw-r--r--arch/powerpc/sysdev/Kconfig10
-rw-r--r--arch/powerpc/sysdev/Makefile6
-rw-r--r--arch/powerpc/sysdev/axonram.c2
-rw-r--r--arch/powerpc/sysdev/cpm1.c8
-rw-r--r--arch/powerpc/sysdev/cpm2_pic.c10
-rw-r--r--arch/powerpc/sysdev/fsl_85xx_cache_sram.c4
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c10
-rw-r--r--arch/powerpc/sysdev/i8259.c13
-rw-r--r--arch/powerpc/sysdev/ipic.c16
-rw-r--r--arch/powerpc/sysdev/mmio_nvram.c2
-rw-r--r--arch/powerpc/sysdev/mpc8xx_pic.c10
-rw-r--r--arch/powerpc/sysdev/mpc8xxx_gpio.c12
-rw-r--r--arch/powerpc/sysdev/mpic.c209
-rw-r--r--arch/powerpc/sysdev/mv64x60_pic.c14
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe_ic.c6
-rw-r--r--arch/powerpc/sysdev/scom.c192
-rw-r--r--arch/powerpc/sysdev/uic.c12
-rw-r--r--arch/powerpc/sysdev/xics/Kconfig13
-rw-r--r--arch/powerpc/sysdev/xics/Makefile6
-rw-r--r--arch/powerpc/sysdev/xics/icp-hv.c164
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c293
-rw-r--r--arch/powerpc/sysdev/xics/ics-rtas.c240
-rw-r--r--arch/powerpc/sysdev/xics/xics-common.c443
-rw-r--r--arch/powerpc/sysdev/xilinx_intc.c8
-rw-r--r--arch/powerpc/xmon/xmon.c38
-rw-r--r--arch/s390/crypto/Makefile1
-rw-r--r--arch/s390/crypto/aes_s390.c383
-rw-r--r--arch/s390/crypto/crypt_s390.h112
-rw-r--r--arch/s390/crypto/des_check_key.c132
-rw-r--r--arch/s390/crypto/des_s390.c370
-rw-r--r--arch/s390/crypto/ghash_s390.c162
-rw-r--r--arch/s390/crypto/prng.c2
-rw-r--r--arch/s390/crypto/sha1_s390.c2
-rw-r--r--arch/s390/crypto/sha256_s390.c2
-rw-r--r--arch/s390/crypto/sha512_s390.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/sq.c1
-rw-r--r--arch/sparc/Kconfig7
-rw-r--r--arch/sparc/include/asm/cpudata_32.h5
-rw-r--r--arch/sparc/include/asm/floppy_32.h40
-rw-r--r--arch/sparc/include/asm/io.h13
-rw-r--r--arch/sparc/include/asm/irq_32.h6
-rw-r--r--arch/sparc/include/asm/leon.h41
-rw-r--r--arch/sparc/include/asm/pcic.h12
-rw-r--r--arch/sparc/include/asm/pgtable_32.h6
-rw-r--r--arch/sparc/include/asm/pgtable_64.h3
-rw-r--r--arch/sparc/include/asm/setup.h12
-rw-r--r--arch/sparc/include/asm/smp_32.h37
-rw-r--r--arch/sparc/include/asm/smp_64.h4
-rw-r--r--arch/sparc/include/asm/spinlock_32.h1
-rw-r--r--arch/sparc/include/asm/system_32.h5
-rw-r--r--arch/sparc/include/asm/system_64.h4
-rw-r--r--arch/sparc/include/asm/unistd.h3
-rw-r--r--arch/sparc/include/asm/winmacro.h9
-rw-r--r--arch/sparc/kernel/Makefile4
-rw-r--r--arch/sparc/kernel/cpu.c139
-rw-r--r--arch/sparc/kernel/cpumap.c4
-rw-r--r--arch/sparc/kernel/devices.c4
-rw-r--r--arch/sparc/kernel/ds.c14
-rw-r--r--arch/sparc/kernel/entry.S41
-rw-r--r--arch/sparc/kernel/head_32.S51
-rw-r--r--arch/sparc/kernel/ioport.c42
-rw-r--r--arch/sparc/kernel/irq.h51
-rw-r--r--arch/sparc/kernel/irq_32.c513
-rw-r--r--arch/sparc/kernel/irq_64.c6
-rw-r--r--arch/sparc/kernel/kernel.h5
-rw-r--r--arch/sparc/kernel/leon_kernel.c365
-rw-r--r--arch/sparc/kernel/leon_smp.c148
-rw-r--r--arch/sparc/kernel/mdesc.c2
-rw-r--r--arch/sparc/kernel/of_device_64.c3
-rw-r--r--arch/sparc/kernel/pci_msi.c3
-rw-r--r--arch/sparc/kernel/pcic.c83
-rw-r--r--arch/sparc/kernel/perf_event.c1
-rw-r--r--arch/sparc/kernel/process_32.c12
-rw-r--r--arch/sparc/kernel/prom_32.c1
-rw-r--r--arch/sparc/kernel/setup_32.c87
-rw-r--r--arch/sparc/kernel/setup_64.c78
-rw-r--r--arch/sparc/kernel/smp_32.c103
-rw-r--r--arch/sparc/kernel/smp_64.c58
-rw-r--r--arch/sparc/kernel/sun4c_irq.c150
-rw-r--r--arch/sparc/kernel/sun4d_irq.c494
-rw-r--r--arch/sparc/kernel/sun4d_smp.c93
-rw-r--r--arch/sparc/kernel/sun4m_irq.c179
-rw-r--r--arch/sparc/kernel/sun4m_smp.c51
-rw-r--r--arch/sparc/kernel/sysfs.c3
-rw-r--r--arch/sparc/kernel/systbls_32.S2
-rw-r--r--arch/sparc/kernel/systbls_64.S4
-rw-r--r--arch/sparc/kernel/time_32.c10
-rw-r--r--arch/sparc/kernel/us2e_cpufreq.c4
-rw-r--r--arch/sparc/kernel/us3_cpufreq.c4
-rw-r--r--arch/sparc/lib/Makefile1
-rw-r--r--arch/sparc/lib/rwsem_32.S204
-rw-r--r--arch/sparc/mm/init_64.c14
-rw-r--r--arch/x86/Kbuild1
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/crypto/Makefile4
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c9
-rw-r--r--arch/x86/crypto/fpu.c10
-rw-r--r--arch/x86/ia32/ia32entry.S1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h193
-rw-r--r--arch/x86/include/asm/kvm_host.h55
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/uaccess.h1
-rw-r--r--arch/x86/include/asm/uaccess_32.h1
-rw-r--r--arch/x86/include/asm/uaccess_64.h1
-rw-r--r--arch/x86/include/asm/unistd_32.h3
-rw-r--r--arch/x86/include/asm/unistd_64.h2
-rw-r--r--arch/x86/kernel/signal.c14
-rw-r--r--arch/x86/kernel/syscall_table_32.S1
-rw-r--r--arch/x86/kvm/emulate.c1754
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/mmu.c16
-rw-r--r--arch/x86/kvm/paging_tmpl.h83
-rw-r--r--arch/x86/kvm/svm.c585
-rw-r--r--arch/x86/kvm/vmx.c228
-rw-r--r--arch/x86/kvm/x86.c570
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/mm/fault.c1
-rw-r--r--arch/x86/net/Makefile4
-rw-r--r--arch/x86/net/bpf_jit.S140
-rw-r--r--arch/x86/net/bpf_jit_comp.c654
366 files changed, 15306 insertions, 7229 deletions
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index 433be2a24f3..3d890a98a08 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -46,6 +46,7 @@ SECTIONS
__init_end = .;
/* Freed after init ends here */
+ _sdata = .; /* Start of rw data section */
_data = .;
RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 4ce8d1358fe..c04dd576f33 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -37,6 +37,7 @@
#include <linux/crash_dump.h>
#include <linux/iommu-helper.h>
#include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
#include <asm/delay.h> /* ia64_get_itc() */
#include <asm/io.h>
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 787de4a77d8..53c0ba004e9 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -209,6 +209,7 @@ SECTIONS {
data : {
} :data
.data : AT(ADDR(.data) - LOAD_OFFSET) {
+ _sdata = .;
INIT_TASK_DATA(PAGE_SIZE)
CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
READ_MOSTLY_DATA(SMP_CACHE_BYTES)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
index f6c5617e16a..b214b5b0432 100644
--- a/arch/ia64/kvm/vti.h
+++ b/arch/ia64/kvm/vti.h
@@ -83,13 +83,13 @@
union vac {
unsigned long value;
struct {
- int a_int:1;
- int a_from_int_cr:1;
- int a_to_int_cr:1;
- int a_from_psr:1;
- int a_from_cpuid:1;
- int a_cover:1;
- int a_bsw:1;
+ unsigned int a_int:1;
+ unsigned int a_from_int_cr:1;
+ unsigned int a_to_int_cr:1;
+ unsigned int a_from_psr:1;
+ unsigned int a_from_cpuid:1;
+ unsigned int a_cover:1;
+ unsigned int a_bsw:1;
long reserved:57;
};
};
@@ -97,12 +97,12 @@ union vac {
union vdc {
unsigned long value;
struct {
- int d_vmsw:1;
- int d_extint:1;
- int d_ibr_dbr:1;
- int d_pmc:1;
- int d_to_pmd:1;
- int d_itm:1;
+ unsigned int d_vmsw:1;
+ unsigned int d_extint:1;
+ unsigned int d_ibr_dbr:1;
+ unsigned int d_pmc:1;
+ unsigned int d_to_pmd:1;
+ unsigned int d_itm:1;
long reserved:58;
};
};
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 0799fea4c58..20b35937612 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -10,6 +10,7 @@
#include <linux/interrupt.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
+#include <linux/prefetch.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c
index 5cdd7e4a597..f7b798993ce 100644
--- a/arch/ia64/oprofile/backtrace.c
+++ b/arch/ia64/oprofile/backtrace.c
@@ -29,7 +29,7 @@ typedef struct
unsigned int depth;
struct pt_regs *regs;
struct unw_frame_info frame;
- u64 *prev_pfs_loc; /* state for WAR for old spinlock ool code */
+ unsigned long *prev_pfs_loc; /* state for WAR for old spinlock ool code */
} ia64_backtrace_t;
/* Returns non-zero if the PC is in the Interrupt Vector Table */
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index c194d64cdbb..cf95aec7746 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -44,6 +44,7 @@ SECTIONS
EXCEPTION_TABLE(16)
NOTES
+ _sdata = .; /* Start of data section */
RODATA
RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
_edata = .; /* End of data section */
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 878be5f38ca..d0993594f55 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -25,6 +25,8 @@ SECTIONS
EXCEPTION_TABLE(16)
+ _sdata = .; /* Start of data section */
+
RODATA
RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE)
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index 1ad6b7ad2c1..8080469ee6c 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -25,6 +25,7 @@ SECTIONS
_etext = .; /* End of text section */
EXCEPTION_TABLE(16) :data
+ _sdata = .; /* Start of rw data section */
RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE) :data
/* End of data goes *here* so that freeing init code works properly. */
_edata = .;
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index cd2ca544454..01af3876cf9 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -65,6 +65,7 @@ SECTIONS
NOTES :text :note
.dummy : { *(.dummy) } :text
+ _sdata = .; /* Start of data section */
RODATA
/* writeable */
diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c
index 3dc19f48295..e9f95dcde37 100644
--- a/arch/mips/txx9/generic/setup_tx4939.c
+++ b/arch/mips/txx9/generic/setup_tx4939.c
@@ -318,19 +318,15 @@ void __init tx4939_sio_init(unsigned int sclk, unsigned int cts_mask)
}
#if defined(CONFIG_TC35815) || defined(CONFIG_TC35815_MODULE)
-static int tx4939_get_eth_speed(struct net_device *dev)
+static u32 tx4939_get_eth_speed(struct net_device *dev)
{
- struct ethtool_cmd cmd = { ETHTOOL_GSET };
- int speed = 100; /* default 100Mbps */
- int err;
- if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
- return speed;
- err = dev->ethtool_ops->get_settings(dev, &cmd);
- if (err < 0)
- return speed;
- speed = cmd.speed == SPEED_100 ? 100 : 10;
- return speed;
+ struct ethtool_cmd cmd;
+ if (dev_ethtool_get_settings(dev, &cmd))
+ return 100; /* default 100Mbps */
+
+ return ethtool_cmd_speed(&cmd);
}
+
static int tx4939_netdev_event(struct notifier_block *this,
unsigned long event,
void *ptr)
@@ -343,8 +339,7 @@ static int tx4939_netdev_event(struct notifier_block *this,
else if (dev->irq == TXX9_IRQ_BASE + TX4939_IR_ETH(1))
bit = TX4939_PCFG_SPEED1;
if (bit) {
- int speed = tx4939_get_eth_speed(dev);
- if (speed == 100)
+ if (tx4939_get_eth_speed(dev) == 100)
txx9_set64(&tx4939_ccfgptr->pcfg, bit);
else
txx9_clear64(&tx4939_ccfgptr->pcfg, bit);
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index d18328b3f93..da601dd34c0 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -3,6 +3,7 @@
#include <linux/mm.h>
#include <linux/uaccess.h>
+#include <asm/tlbflush.h>
/* The usual comment is "Caches aren't brain-dead on the <architecture>".
* Unfortunately, that doesn't apply to PA-RISC. */
@@ -112,8 +113,10 @@ void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
static inline void
flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
{
- if (PageAnon(page))
+ if (PageAnon(page)) {
+ flush_tlb_page(vma, vmaddr);
flush_dcache_page_asm(page_to_phys(page), vmaddr);
+ }
}
#ifdef CONFIG_DEBUG_RODATA
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 5d7b8ce9fdf..22dadeb5869 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -177,7 +177,10 @@ struct vm_area_struct;
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _PAGE_KERNEL (_PAGE_PRESENT | _PAGE_EXEC | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define _PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define _PAGE_KERNEL_EXEC (_PAGE_KERNEL_RO | _PAGE_EXEC)
+#define _PAGE_KERNEL_RWX (_PAGE_KERNEL_EXEC | _PAGE_WRITE)
+#define _PAGE_KERNEL (_PAGE_KERNEL_RO | _PAGE_WRITE)
/* The pgd/pmd contains a ptr (in phys addr space); since all pgds/pmds
* are page-aligned, we don't care about the PAGE_OFFSET bits, except
@@ -208,7 +211,9 @@ struct vm_area_struct;
#define PAGE_COPY PAGE_EXECREAD
#define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED)
#define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
-#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RWX __pgprot(_PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO)
#define PAGE_KERNEL_UNC __pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE)
#define PAGE_GATEWAY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_GATEWAY| _PAGE_READ)
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 3eb82c2a5ec..9cbc2c3bf63 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -814,8 +814,14 @@
#define __NR_recvmmsg (__NR_Linux + 319)
#define __NR_accept4 (__NR_Linux + 320)
#define __NR_prlimit64 (__NR_Linux + 321)
-
-#define __NR_Linux_syscalls (__NR_prlimit64 + 1)
+#define __NR_fanotify_init (__NR_Linux + 322)
+#define __NR_fanotify_mark (__NR_Linux + 323)
+#define __NR_clock_adjtime (__NR_Linux + 324)
+#define __NR_name_to_handle_at (__NR_Linux + 325)
+#define __NR_open_by_handle_at (__NR_Linux + 326)
+#define __NR_syncfs (__NR_Linux + 327)
+
+#define __NR_Linux_syscalls (__NR_syncfs + 1)
#define __IGNORE_select /* newselect */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 3f11331c277..83335f3da5f 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -304,10 +304,20 @@ void flush_dcache_page(struct page *page)
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
addr = mpnt->vm_start + offset;
+ /* The TLB is the engine of coherence on parisc: The
+ * CPU is entitled to speculate any page with a TLB
+ * mapping, so here we kill the mapping then flush the
+ * page along a special flush only alias mapping.
+ * This guarantees that the page is no-longer in the
+ * cache for any process and nor may it be
+ * speculatively read in (until the user or kernel
+ * specifically accesses it, of course) */
+
+ flush_tlb_page(mpnt, addr);
if (old_addr == 0 || (old_addr & (SHMLBA - 1)) != (addr & (SHMLBA - 1))) {
__flush_cache_page(mpnt, addr, page_to_phys(page));
if (old_addr)
- printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %s\n", old_addr, addr, mpnt->vm_file ? mpnt->vm_file->f_path.dentry->d_name.name : "(null)");
+ printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %s\n", old_addr, addr, mpnt->vm_file ? (char *)mpnt->vm_file->f_path.dentry->d_name.name : "(null)");
old_addr = addr;
}
}
@@ -499,6 +509,7 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
{
BUG_ON(!vma->vm_mm->context);
+ flush_tlb_page(vma, vmaddr);
__flush_cache_page(vma, vmaddr, page_to_phys(pfn_to_page(pfn)));
}
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index ead8d2a1034..6f059443914 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -692,6 +692,9 @@ ENTRY(fault_vector_11)
END(fault_vector_11)
#endif
+ /* Fault vector is separately protected and *must* be on its own page */
+ .align PAGE_SIZE
+ENTRY(end_fault_vector)
.import handle_interruption,code
.import do_cpu_irq_mask,code
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index 145c5e4caaa..37aabd772fb 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -106,8 +106,9 @@ $bss_loop:
#endif
- /* Now initialize the PTEs themselves */
- ldo 0+_PAGE_KERNEL(%r0),%r3 /* Hardwired 0 phys addr start */
+ /* Now initialize the PTEs themselves. We use RWX for
+ * everything ... it will get remapped correctly later */
+ ldo 0+_PAGE_KERNEL_RWX(%r0),%r3 /* Hardwired 0 phys addr start */
ldi (1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */
load32 PA(pg0),%r1
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index 6e81bb596e5..cedbbb8b18d 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -61,8 +61,10 @@
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/bug.h>
+#include <linux/mm.h>
#include <linux/slab.h>
+#include <asm/pgtable.h>
#include <asm/unwind.h>
#if 0
@@ -214,7 +216,13 @@ void *module_alloc(unsigned long size)
{
if (size == 0)
return NULL;
- return vmalloc(size);
+ /* using RWX means less protection for modules, but it's
+ * easier than trying to map the text, data, init_text and
+ * init_data correctly */
+ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+ GFP_KERNEL | __GFP_HIGHMEM,
+ PAGE_KERNEL_RWX, -1,
+ __builtin_return_address(0));
}
#ifndef CONFIG_64BIT
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index a85823668cb..93ff3d90edd 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -817,10 +817,7 @@ ENTRY(purge_kernel_dcache_page)
.procend
ENDPROC(purge_kernel_dcache_page)
-
- .export flush_user_dcache_range_asm
-
-flush_user_dcache_range_asm:
+ENTRY(flush_user_dcache_range_asm)
.proc
.callinfo NO_CALLS
.entry
@@ -839,6 +836,7 @@ flush_user_dcache_range_asm:
.exit
.procend
+ENDPROC(flush_user_dcache_range_asm)
ENTRY(flush_kernel_dcache_range_asm)
.proc
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 88a0ad14a9c..dc9a6246232 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -228,3 +228,11 @@ asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
((loff_t)lenhi << 32) | lenlo);
}
+
+asmlinkage long compat_sys_fanotify_mark(int fan_fd, int flags, u32 mask_hi,
+ u32 mask_lo, int fd,
+ const char __user *pathname)
+{
+ return sys_fanotify_mark(fan_fd, flags, ((u64)mask_hi << 32) | mask_lo,
+ fd, pathname);
+}
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 4be85ee10b8..a5b02ce4d41 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -420,6 +420,12 @@
ENTRY_COMP(recvmmsg)
ENTRY_SAME(accept4) /* 320 */
ENTRY_SAME(prlimit64)
+ ENTRY_SAME(fanotify_init)
+ ENTRY_COMP(fanotify_mark)
+ ENTRY_COMP(clock_adjtime)
+ ENTRY_SAME(name_to_handle_at) /* 325 */
+ ENTRY_COMP(open_by_handle_at)
+ ENTRY_SAME(syncfs)
/* Nothing yet */
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 8f1e4efd143..e1a55849bfa 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -69,6 +69,9 @@ SECTIONS
/* End of text section */
_etext = .;
+ /* Start of data section */
+ _sdata = .;
+
RODATA
/* writeable */
@@ -134,6 +137,7 @@ SECTIONS
. = ALIGN(16384);
__init_begin = .;
INIT_TEXT_SECTION(16384)
+ . = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(16)
/* we have to discard exit text and such at runtime, not link time */
.exit.text :
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index b1d126258de..5fa1e273006 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -371,24 +371,158 @@ static void __init setup_bootmem(void)
request_resource(&sysram_resources[0], &pdcdata_resource);
}
+static void __init map_pages(unsigned long start_vaddr,
+ unsigned long start_paddr, unsigned long size,
+ pgprot_t pgprot, int force)
+{
+ pgd_t *pg_dir;
+ pmd_t *pmd;
+ pte_t *pg_table;
+ unsigned long end_paddr;
+ unsigned long start_pmd;
+ unsigned long start_pte;
+ unsigned long tmp1;
+ unsigned long tmp2;
+ unsigned long address;
+ unsigned long vaddr;
+ unsigned long ro_start;
+ unsigned long ro_end;
+ unsigned long fv_addr;
+ unsigned long gw_addr;
+ extern const unsigned long fault_vector_20;
+ extern void * const linux_gateway_page;
+
+ ro_start = __pa((unsigned long)_text);
+ ro_end = __pa((unsigned long)&data_start);
+ fv_addr = __pa((unsigned long)&fault_vector_20) & PAGE_MASK;
+ gw_addr = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK;
+
+ end_paddr = start_paddr + size;
+
+ pg_dir = pgd_offset_k(start_vaddr);
+
+#if PTRS_PER_PMD == 1
+ start_pmd = 0;
+#else
+ start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
+#endif
+ start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
+
+ address = start_paddr;
+ vaddr = start_vaddr;
+ while (address < end_paddr) {
+#if PTRS_PER_PMD == 1
+ pmd = (pmd_t *)__pa(pg_dir);
+#else
+ pmd = (pmd_t *)pgd_address(*pg_dir);
+
+ /*
+ * pmd is physical at this point
+ */
+
+ if (!pmd) {
+ pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0), PAGE_SIZE << PMD_ORDER);
+ pmd = (pmd_t *) __pa(pmd);
+ }
+
+ pgd_populate(NULL, pg_dir, __va(pmd));
+#endif
+ pg_dir++;
+
+ /* now change pmd to kernel virtual addresses */
+
+ pmd = (pmd_t *)__va(pmd) + start_pmd;
+ for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++, pmd++) {
+
+ /*
+ * pg_table is physical at this point
+ */
+
+ pg_table = (pte_t *)pmd_address(*pmd);
+ if (!pg_table) {
+ pg_table = (pte_t *)
+ alloc_bootmem_low_pages_node(NODE_DATA(0), PAGE_SIZE);
+ pg_table = (pte_t *) __pa(pg_table);
+ }
+
+ pmd_populate_kernel(NULL, pmd, __va(pg_table));
+
+ /* now change pg_table to kernel virtual addresses */
+
+ pg_table = (pte_t *) __va(pg_table) + start_pte;
+ for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++, pg_table++) {
+ pte_t pte;
+
+ /*
+ * Map the fault vector writable so we can
+ * write the HPMC checksum.
+ */
+ if (force)
+ pte = __mk_pte(address, pgprot);
+ else if (core_kernel_text(vaddr) &&
+ address != fv_addr)
+ pte = __mk_pte(address, PAGE_KERNEL_EXEC);
+ else
+#if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
+ if (address >= ro_start && address < ro_end
+ && address != fv_addr
+ && address != gw_addr)
+ pte = __mk_pte(address, PAGE_KERNEL_RO);
+ else
+#endif
+ pte = __mk_pte(address, pgprot);
+
+ if (address >= end_paddr) {
+ if (force)
+ break;
+ else
+ pte_val(pte) = 0;
+ }
+
+ set_pte(pg_table, pte);
+
+ address += PAGE_SIZE;
+ vaddr += PAGE_SIZE;
+ }
+ start_pte = 0;
+
+ if (address >= end_paddr)
+ break;
+ }
+ start_pmd = 0;
+ }
+}
+
void free_initmem(void)
{
unsigned long addr;
unsigned long init_begin = (unsigned long)__init_begin;
unsigned long init_end = (unsigned long)__init_end;
-#ifdef CONFIG_DEBUG_KERNEL
+ /* The init text pages are marked R-X. We have to
+ * flush the icache and mark them RW-
+ *
+ * This is tricky, because map_pages is in the init section.
+ * Do a dummy remap of the data section first (the data
+ * section is already PAGE_KERNEL) to pull in the TLB entries
+ * for map_kernel */
+ map_pages(init_begin, __pa(init_begin), init_end - init_begin,
+ PAGE_KERNEL_RWX, 1);
+ /* now remap at PAGE_KERNEL since the TLB is pre-primed to execute
+ * map_pages */
+ map_pages(init_begin, __pa(init_begin), init_end - init_begin,
+ PAGE_KERNEL, 1);
+
+ /* force the kernel to see the new TLB entries */
+ __flush_tlb_range(0, init_begin, init_end);
/* Attempt to catch anyone trying to execute code here
* by filling the page with BRK insns.
*/
memset((void *)init_begin, 0x00, init_end - init_begin);
+ /* finally dump all the instructions which were cached, since the
+ * pages are no-longer executable */
flush_icache_range(init_begin, init_end);
-#endif
- /* align __init_begin and __init_end to page size,
- ignoring linker script where we might have tried to save RAM */
- init_begin = PAGE_ALIGN(init_begin);
- init_end = PAGE_ALIGN(init_end);
for (addr = init_begin; addr < init_end; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
@@ -618,114 +752,6 @@ void show_mem(unsigned int filter)
#endif
}
-
-static void __init map_pages(unsigned long start_vaddr, unsigned long start_paddr, unsigned long size, pgprot_t pgprot)
-{
- pgd_t *pg_dir;
- pmd_t *pmd;
- pte_t *pg_table;
- unsigned long end_paddr;
- unsigned long start_pmd;
- unsigned long start_pte;
- unsigned long tmp1;
- unsigned long tmp2;
- unsigned long address;
- unsigned long ro_start;
- unsigned long ro_end;
- unsigned long fv_addr;
- unsigned long gw_addr;
- extern const unsigned long fault_vector_20;
- extern void * const linux_gateway_page;
-
- ro_start = __pa((unsigned long)_text);
- ro_end = __pa((unsigned long)&data_start);
- fv_addr = __pa((unsigned long)&fault_vector_20) & PAGE_MASK;
- gw_addr = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK;
-
- end_paddr = start_paddr + size;
-
- pg_dir = pgd_offset_k(start_vaddr);
-
-#if PTRS_PER_PMD == 1
- start_pmd = 0;
-#else
- start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
-#endif
- start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
-
- address = start_paddr;
- while (address < end_paddr) {
-#if PTRS_PER_PMD == 1
- pmd = (pmd_t *)__pa(pg_dir);
-#else
- pmd = (pmd_t *)pgd_address(*pg_dir);
-
- /*
- * pmd is physical at this point
- */
-
- if (!pmd) {
- pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE << PMD_ORDER);
- pmd = (pmd_t *) __pa(pmd);
- }
-
- pgd_populate(NULL, pg_dir, __va(pmd));
-#endif
- pg_dir++;
-
- /* now change pmd to kernel virtual addresses */
-
- pmd = (pmd_t *)__va(pmd) + start_pmd;
- for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++,pmd++) {
-
- /*
- * pg_table is physical at this point
- */
-
- pg_table = (pte_t *)pmd_address(*pmd);
- if (!pg_table) {
- pg_table = (pte_t *)
- alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE);
- pg_table = (pte_t *) __pa(pg_table);
- }
-
- pmd_populate_kernel(NULL, pmd, __va(pg_table));
-
- /* now change pg_table to kernel virtual addresses */
-
- pg_table = (pte_t *) __va(pg_table) + start_pte;
- for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++,pg_table++) {
- pte_t pte;
-
- /*
- * Map the fault vector writable so we can
- * write the HPMC checksum.
- */
-#if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
- if (address >= ro_start && address < ro_end
- && address != fv_addr
- && address != gw_addr)
- pte = __mk_pte(address, PAGE_KERNEL_RO);
- else
-#endif
- pte = __mk_pte(address, pgprot);
-
- if (address >= end_paddr)
- pte_val(pte) = 0;
-
- set_pte(pg_table, pte);
-
- address += PAGE_SIZE;
- }
- start_pte = 0;
-
- if (address >= end_paddr)
- break;
- }
- start_pmd = 0;
- }
-}
-
/*
* pagetable_init() sets up the page tables
*
@@ -750,14 +776,14 @@ static void __init pagetable_init(void)
size = pmem_ranges[range].pages << PAGE_SHIFT;
map_pages((unsigned long)__va(start_paddr), start_paddr,
- size, PAGE_KERNEL);
+ size, PAGE_KERNEL, 0);
}
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_end && initrd_end > mem_limit) {
printk(KERN_INFO "initrd: mapping %08lx-%08lx\n", initrd_start, initrd_end);
map_pages(initrd_start, __pa(initrd_start),
- initrd_end - initrd_start, PAGE_KERNEL);
+ initrd_end - initrd_start, PAGE_KERNEL, 0);
}
#endif
@@ -782,7 +808,7 @@ static void __init gateway_init(void)
*/
map_pages(linux_gateway_page_addr, __pa(&linux_gateway_page),
- PAGE_SIZE, PAGE_GATEWAY);
+ PAGE_SIZE, PAGE_GATEWAY, 1);
}
#ifdef CONFIG_HPUX
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8f4d50b0adf..a3128ca0fe1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -193,6 +193,12 @@ config SYS_SUPPORTS_APM_EMULATION
default y if PMAC_APM_EMU
bool
+config EPAPR_BOOT
+ bool
+ help
+ Used to allow a board to specify it wants an ePAPR compliant wrapper.
+ default n
+
config DEFAULT_UIMAGE
bool
help
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 2d38a50e66b..a597dd77b90 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -267,6 +267,11 @@ config PPC_EARLY_DEBUG_USBGECKO
Select this to enable early debugging for Nintendo GameCube/Wii
consoles via an external USB Gecko adapter.
+config PPC_EARLY_DEBUG_WSP
+ bool "Early debugging via WSP's internal UART"
+ depends on PPC_WSP
+ select PPC_UDBG_16550
+
endchoice
config PPC_EARLY_DEBUG_44x_PHYSLOW
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 89178164af5..c26200b40a4 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -69,7 +69,8 @@ src-wlib := string.S crt0.S crtsavres.S stdio.c main.c \
cpm-serial.c stdlib.c mpc52xx-psc.c planetcore.c uartlite.c \
fsl-soc.c mpc8xx.c pq2.c ugecon.c
src-plat := of.c cuboot-52xx.c cuboot-824x.c cuboot-83xx.c cuboot-85xx.c holly.c \
- cuboot-ebony.c cuboot-hotfoot.c treeboot-ebony.c prpmc2800.c \
+ cuboot-ebony.c cuboot-hotfoot.c epapr.c treeboot-ebony.c \
+ prpmc2800.c \
ps3-head.S ps3-hvcall.S ps3.c treeboot-bamboo.c cuboot-8xx.c \
cuboot-pq2.c cuboot-sequoia.c treeboot-walnut.c \
cuboot-bamboo.c cuboot-mpc7448hpc2.c cuboot-taishan.c \
@@ -127,7 +128,7 @@ quiet_cmd_bootas = BOOTAS $@
cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
quiet_cmd_bootar = BOOTAR $@
- cmd_bootar = $(CROSS32AR) -cr $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
+ cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
$(obj-libfdt): $(obj)/%.o: $(srctree)/scripts/dtc/libfdt/%.c FORCE
$(call if_changed_dep,bootcc)
@@ -182,6 +183,7 @@ image-$(CONFIG_PPC_HOLLY) += dtbImage.holly
image-$(CONFIG_PPC_PRPMC2800) += dtbImage.prpmc2800
image-$(CONFIG_PPC_ISERIES) += zImage.iseries
image-$(CONFIG_DEFAULT_UIMAGE) += uImage
+image-$(CONFIG_EPAPR_BOOT) += zImage.epapr
#
# Targets which embed a device tree blob
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index f1c4dfc635b..0f7428a37ef 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -6,16 +6,28 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
+ * NOTE: this code runs in 32 bit mode, is position-independent,
+ * and is packaged as ELF32.
*/
#include "ppc_asm.h"
.text
- /* a procedure descriptor used when booting this as a COFF file */
+ /* A procedure descriptor used when booting this as a COFF file.
+ * When making COFF, this comes first in the link and we're
+ * linked at 0x500000.
+ */
.globl _zimage_start_opd
_zimage_start_opd:
- .long _zimage_start, 0, 0, 0
+ .long 0x500000, 0, 0, 0
+
+p_start: .long _start
+p_etext: .long _etext
+p_bss_start: .long __bss_start
+p_end: .long _end
+
+ .weak _platform_stack_top
+p_pstack: .long _platform_stack_top
.weak _zimage_start
.globl _zimage_start
@@ -24,37 +36,65 @@ _zimage_start:
_zimage_start_lib:
/* Work out the offset between the address we were linked at
and the address where we're running. */
- bl 1f
-1: mflr r0
- lis r9,1b@ha
- addi r9,r9,1b@l
- subf. r0,r9,r0
- beq 3f /* if running at same address as linked */
+ bl .+4
+p_base: mflr r10 /* r10 now points to runtime addr of p_base */
+ /* grab the link address of the dynamic section in r11 */
+ addis r11,r10,(_GLOBAL_OFFSET_TABLE_-p_base)@ha
+ lwz r11,(_GLOBAL_OFFSET_TABLE_-p_base)@l(r11)
+ cmpwi r11,0
+ beq 3f /* if not linked -pie */
+ /* get the runtime address of the dynamic section in r12 */
+ .weak __dynamic_start
+ addis r12,r10,(__dynamic_start-p_base)@ha
+ addi r12,r12,(__dynamic_start-p_base)@l
+ subf r11,r11,r12 /* runtime - linktime offset */
+
+ /* The dynamic section contains a series of tagged entries.
+ * We need the RELA and RELACOUNT entries. */
+RELA = 7
+RELACOUNT = 0x6ffffff9
+ li r9,0
+ li r0,0
+9: lwz r8,0(r12) /* get tag */
+ cmpwi r8,0
+ beq 10f /* end of list */
+ cmpwi r8,RELA
+ bne 11f
+ lwz r9,4(r12) /* get RELA pointer in r9 */
+ b 12f
+11: addis r8,r8,(-RELACOUNT)@ha
+ cmpwi r8,RELACOUNT@l
+ bne 12f
+ lwz r0,4(r12) /* get RELACOUNT value in r0 */
+12: addi r12,r12,8
+ b 9b
- /* The .got2 section contains a list of addresses, so add
- the address offset onto each entry. */
- lis r9,__got2_start@ha
- addi r9,r9,__got2_start@l
- lis r8,__got2_end@ha
- addi r8,r8,__got2_end@l
- subf. r8,r9,r8
+ /* The relocation section contains a list of relocations.
+ * We now do the R_PPC_RELATIVE ones, which point to words
+ * which need to be initialized with addend + offset.
+ * The R_PPC_RELATIVE ones come first and there are RELACOUNT
+ * of them. */
+10: /* skip relocation if we don't have both */
+ cmpwi r0,0
beq 3f
- srwi. r8,r8,2
- mtctr r8
- add r9,r0,r9
-2: lwz r8,0(r9)
- add r8,r8,r0
- stw r8,0(r9)
- addi r9,r9,4
+ cmpwi r9,0
+ beq 3f
+
+ add r9,r9,r11 /* Relocate RELA pointer */
+ mtctr r0
+2: lbz r0,4+3(r9) /* ELF32_R_INFO(reloc->r_info) */
+ cmpwi r0,22 /* R_PPC_RELATIVE */
+ bne 3f
+ lwz r12,0(r9) /* reloc->r_offset */
+ lwz r0,8(r9) /* reloc->r_addend */
+ add r0,r0,r11
+ stwx r0,r11,r12
+ addi r9,r9,12
bdnz 2b
/* Do a cache flush for our text, in case the loader didn't */
-3: lis r9,_start@ha
- addi r9,r9,_start@l
- add r9,r0,r9
- lis r8,_etext@ha
- addi r8,r8,_etext@l
- add r8,r0,r8
+3: lwz r9,p_start-p_base(r10) /* note: these are relocated now */
+ lwz r8,p_etext-p_base(r10)
4: dcbf r0,r9
icbi r0,r9
addi r9,r9,0x20
@@ -64,27 +104,19 @@ _zimage_start_lib:
isync
/* Clear the BSS */
- lis r9,__bss_start@ha
- addi r9,r9,__bss_start@l
- add r9,r0,r9
- lis r8,_end@ha
- addi r8,r8,_end@l
- add r8,r0,r8
- li r10,0
-5: stw r10,0(r9)
+ lwz r9,p_bss_start-p_base(r10)
+ lwz r8,p_end-p_base(r10)
+ li r0,0
+5: stw r0,0(r9)
addi r9,r9,4
cmplw cr0,r9,r8
blt 5b
/* Possibly set up a custom stack */
-.weak _platform_stack_top
- lis r8,_platform_stack_top@ha
- addi r8,r8,_platform_stack_top@l
+ lwz r8,p_pstack-p_base(r10)
cmpwi r8,0
beq 6f
- add r8,r0,r8
lwz r1,0(r8)
- add r1,r0,r1
li r0,0
stwu r0,-16(r1) /* establish a stack frame */
6:
diff --git a/arch/powerpc/boot/dts/p1020rdb.dts b/arch/powerpc/boot/dts/p1020rdb.dts
index e0668f87779..d6a8ae45813 100644
--- a/arch/powerpc/boot/dts/p1020rdb.dts
+++ b/arch/powerpc/boot/dts/p1020rdb.dts
@@ -9,12 +9,11 @@
* option) any later version.
*/
-/dts-v1/;
+/include/ "p1020si.dtsi"
+
/ {
- model = "fsl,P1020";
+ model = "fsl,P1020RDB";
compatible = "fsl,P1020RDB";
- #address-cells = <2>;
- #size-cells = <2>;
aliases {
serial0 = &serial0;
@@ -26,34 +25,11 @@
pci1 = &pci1;
};
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,P1020@0 {
- device_type = "cpu";
- reg = <0x0>;
- next-level-cache = <&L2>;
- };
-
- PowerPC,P1020@1 {
- device_type = "cpu";
- reg = <0x1>;
- next-level-cache = <&L2>;
- };
- };
-
memory {
device_type = "memory";
};
localbus@ffe05000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus";
- reg = <0 0xffe05000 0 0x1000>;
- interrupts = <19 2>;
- interrupt-parent = <&mpic>;
/* NOR, NAND Flashes and Vitesse 5 port L2 switch */
ranges = <0x0 0x0 0x0 0xef000000 0x01000000
@@ -165,88 +141,14 @@
};
soc@ffe00000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "fsl,p1020-immr", "simple-bus";
- ranges = <0x0 0x0 0xffe00000 0x100000>;
- bus-frequency = <0>; // Filled out by uboot.
-
- ecm-law@0 {
- compatible = "fsl,ecm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <12>;
- };
-
- ecm@1000 {
- compatible = "fsl,p1020-ecm", "fsl,ecm";
- reg = <0x1000 0x1000>;
- interrupts = <16 2>;
- interrupt-parent = <&mpic>;
- };
-
- memory-controller@2000 {
- compatible = "fsl,p1020-memory-controller";
- reg = <0x2000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <16 2>;
- };
-
i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
rtc@68 {
compatible = "dallas,ds1339";
reg = <0x68>;
};
};
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
-
spi@7000 {
- cell-index = <0>;
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,espi";
- reg = <0x7000 0x1000>;
- interrupts = <59 0x2>;
- interrupt-parent = <&mpic>;
- mode = "cpu";
fsl_m25p80@0 {
#address-cells = <1>;
@@ -294,66 +196,7 @@
};
};
- gpio: gpio-controller@f000 {
- #gpio-cells = <2>;
- compatible = "fsl,mpc8572-gpio";
- reg = <0xf000 0x100>;
- interrupts = <47 0x2>;
- interrupt-parent = <&mpic>;
- gpio-controller;
- };
-
- L2: l2-cache-controller@20000 {
- compatible = "fsl,p1020-l2-cache-controller";
- reg = <0x20000 0x1000>;
- cache-line-size = <32>; // 32 bytes
- cache-size = <0x40000>; // L2,256K
- interrupt-parent = <&mpic>;
- interrupts = <16 2>;
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
-
mdio@24000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,etsec2-mdio";
- reg = <0x24000 0x1000 0xb0030 0x4>;
phy0: ethernet-phy@0 {
interrupt-parent = <&mpic>;
@@ -369,10 +212,6 @@
};
mdio@25000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,etsec2-tbi";
- reg = <0x25000 0x1000 0xb1030 0x4>;
tbi0: tbi-phy@11 {
reg = <0x11>;
@@ -381,97 +220,25 @@
};
enet0: ethernet@b0000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "network";
- model = "eTSEC";
- compatible = "fsl,etsec2";
- fsl,num_rx_queues = <0x8>;
- fsl,num_tx_queues = <0x8>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupt-parent = <&mpic>;
fixed-link = <1 1 1000 0 0>;
phy-connection-type = "rgmii-id";
- queue-group@0 {
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0xb0000 0x1000>;
- interrupts = <29 2 30 2 34 2>;
- };
-
- queue-group@1 {
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0xb4000 0x1000>;
- interrupts = <17 2 18 2 24 2>;
- };
};
enet1: ethernet@b1000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "network";
- model = "eTSEC";
- compatible = "fsl,etsec2";
- fsl,num_rx_queues = <0x8>;
- fsl,num_tx_queues = <0x8>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupt-parent = <&mpic>;
phy-handle = <&phy0>;
tbi-handle = <&tbi0>;
phy-connection-type = "sgmii";
- queue-group@0 {
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0xb1000 0x1000>;
- interrupts = <35 2 36 2 40 2>;
- };
-
- queue-group@1 {
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0xb5000 0x1000>;
- interrupts = <51 2 52 2 67 2>;
- };
};
enet2: ethernet@b2000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "network";
- model = "eTSEC";
- compatible = "fsl,etsec2";
- fsl,num_rx_queues = <0x8>;
- fsl,num_tx_queues = <0x8>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupt-parent = <&mpic>;
phy-handle = <&phy1>;
phy-connection-type = "rgmii-id";
- queue-group@0 {
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0xb2000 0x1000>;
- interrupts = <31 2 32 2 33 2>;
- };
-
- queue-group@1 {
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0xb6000 0x1000>;
- interrupts = <25 2 26 2 27 2>;
- };
};
usb@22000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl-usb2-dr";
- reg = <0x22000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <28 0x2>;
phy_type = "ulpi";
};
@@ -481,82 +248,23 @@
it enables USB2. OTOH, U-Boot does create a new node
when there isn't any. So, just comment it out.
usb@23000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl-usb2-dr";
- reg = <0x23000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <46 0x2>;
phy_type = "ulpi";
};
*/
- sdhci@2e000 {
- compatible = "fsl,p1020-esdhc", "fsl,esdhc";
- reg = <0x2e000 0x1000>;
- interrupts = <72 0x2>;
- interrupt-parent = <&mpic>;
- /* Filled in by U-Boot */
- clock-frequency = <0>;
- };
-
- crypto@30000 {
- compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
- "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <45 2 58 2>;
- interrupt-parent = <&mpic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0xbfe>;
- fsl,descriptor-types-mask = <0x3ab0ebf>;
- };
-
- mpic: pic@40000 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
-
- msi@41600 {
- compatible = "fsl,p1020-msi", "fsl,mpic-msi";
- reg = <0x41600 0x80>;
- msi-available-ranges = <0 0x100>;
- interrupts = <
- 0xe0 0
- 0xe1 0
- 0xe2 0
- 0xe3 0
- 0xe4 0
- 0xe5 0
- 0xe6 0
- 0xe7 0>;
- interrupt-parent = <&mpic>;
- };
-
- global-utilities@e0000 { //global utilities block
- compatible = "fsl,p1020-guts";
- reg = <0xe0000 0x1000>;
- fsl,has-rstcr;
- };
};
pci0: pcie@ffe09000 {
- compatible = "fsl,mpc8548-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0 0xffe09000 0 0x1000>;
- bus-range = <0 255>;
ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <16 2>;
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ 0000 0x0 0x0 0x1 &mpic 0x4 0x1
+ 0000 0x0 0x0 0x2 &mpic 0x5 0x1
+ 0000 0x0 0x0 0x3 &mpic 0x6 0x1
+ 0000 0x0 0x0 0x4 &mpic 0x7 0x1
+ >;
pcie@0 {
reg = <0x0 0x0 0x0 0x0 0x0>;
#size-cells = <2>;
@@ -573,18 +281,16 @@
};
pci1: pcie@ffe0a000 {
- compatible = "fsl,mpc8548-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0 0xffe0a000 0 0x1000>;
- bus-range = <0 255>;
ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <16 2>;
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ 0000 0x0 0x0 0x1 &mpic 0x0 0x1
+ 0000 0x0 0x0 0x2 &mpic 0x1 0x1
+ 0000 0x0 0x0 0x3 &mpic 0x2 0x1
+ 0000 0x0 0x0 0x4 &mpic 0x3 0x1
+ >;
pcie@0 {
reg = <0x0 0x0 0x0 0x0 0x0>;
#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts
new file mode 100644
index 00000000000..f0bf7f42f09
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts
@@ -0,0 +1,213 @@
+/*
+ * P1020 RDB Core0 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts file allows core0 to have memory, l2, i2c, spi, gpio, tdm, dma, usb,
+ * eth1, eth2, sdhc, crypto, global-util, message, pci0, pci1, msi.
+ *
+ * Please note to add "-b 0" for core0's dts compiling.
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+/include/ "p1020si.dtsi"
+
+/ {
+ model = "fsl,P1020RDB";
+ compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP";
+
+ aliases {
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ serial0 = &serial0;
+ pci0 = &pci0;
+ pci1 = &pci1;
+ };
+
+ cpus {
+ PowerPC,P1020@1 {
+ status = "disabled";
+ };
+ };
+
+ memory {
+ device_type = "memory";
+ };
+
+ localbus@ffe05000 {
+ status = "disabled";
+ };
+
+ soc@ffe00000 {
+ i2c@3000 {
+ rtc@68 {
+ compatible = "dallas,ds1339";
+ reg = <0x68>;
+ };
+ };
+
+ serial1: serial@4600 {
+ status = "disabled";
+ };
+
+ spi@7000 {
+ fsl_m25p80@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,espi-flash";
+ reg = <0>;
+ linux,modalias = "fsl_m25p80";
+ spi-max-frequency = <40000000>;
+
+ partition@0 {
+ /* 512KB for u-boot Bootloader Image */
+ reg = <0x0 0x00080000>;
+ label = "SPI (RO) U-Boot Image";
+ read-only;
+ };
+
+ partition@80000 {
+ /* 512KB for DTB Image */
+ reg = <0x00080000 0x00080000>;
+ label = "SPI (RO) DTB Image";
+ read-only;
+ };
+
+ partition@100000 {
+ /* 4MB for Linux Kernel Image */
+ reg = <0x00100000 0x00400000>;
+ label = "SPI (RO) Linux Kernel Image";
+ read-only;
+ };
+
+ partition@500000 {
+ /* 4MB for Compressed RFS Image */
+ reg = <0x00500000 0x00400000>;
+ label = "SPI (RO) Compressed RFS Image";
+ read-only;
+ };
+
+ partition@900000 {
+ /* 7MB for JFFS2 based RFS */
+ reg = <0x00900000 0x00700000>;
+ label = "SPI (RW) JFFS2 RFS";
+ };
+ };
+ };
+
+ mdio@24000 {
+ phy0: ethernet-phy@0 {
+ interrupt-parent = <&mpic>;
+ interrupts = <3 1>;
+ reg = <0x0>;
+ };
+ phy1: ethernet-phy@1 {
+ interrupt-parent = <&mpic>;
+ interrupts = <2 1>;
+ reg = <0x1>;
+ };
+ };
+
+ mdio@25000 {
+ tbi0: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+
+ enet0: ethernet@b0000 {
+ status = "disabled";
+ };
+
+ enet1: ethernet@b1000 {
+ phy-handle = <&phy0>;
+ tbi-handle = <&tbi0>;
+ phy-connection-type = "sgmii";
+ };
+
+ enet2: ethernet@b2000 {
+ phy-handle = <&phy1>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ usb@22000 {
+ phy_type = "ulpi";
+ };
+
+ /* USB2 is shared with localbus, so it must be disabled
+ by default. We can't put 'status = "disabled";' here
+ since U-Boot doesn't clear the status property when
+ it enables USB2. OTOH, U-Boot does create a new node
+ when there isn't any. So, just comment it out.
+ usb@23000 {
+ phy_type = "ulpi";
+ };
+ */
+
+ mpic: pic@40000 {
+ protected-sources = <
+ 42 29 30 34 /* serial1, enet0-queue-group0 */
+ 17 18 24 45 /* enet0-queue-group1, crypto */
+ >;
+ };
+
+ };
+
+ pci0: pcie@ffe09000 {
+ ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ 0000 0x0 0x0 0x1 &mpic 0x4 0x1
+ 0000 0x0 0x0 0x2 &mpic 0x5 0x1
+ 0000 0x0 0x0 0x3 &mpic 0x6 0x1
+ 0000 0x0 0x0 0x4 &mpic 0x7 0x1
+ >;
+ pcie@0 {
+ reg = <0x0 0x0 0x0 0x0 0x0>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ device_type = "pci";
+ ranges = <0x2000000 0x0 0xa0000000
+ 0x2000000 0x0 0xa0000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x100000>;
+ };
+ };
+
+ pci1: pcie@ffe0a000 {
+ ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ 0000 0x0 0x0 0x1 &mpic 0x0 0x1
+ 0000 0x0 0x0 0x2 &mpic 0x1 0x1
+ 0000 0x0 0x0 0x3 &mpic 0x2 0x1
+ 0000 0x0 0x0 0x4 &mpic 0x3 0x1
+ >;
+ pcie@0 {
+ reg = <0x0 0x0 0x0 0x0 0x0>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ device_type = "pci";
+ ranges = <0x2000000 0x0 0x80000000
+ 0x2000000 0x0 0x80000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x100000>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts
new file mode 100644
index 00000000000..6ec02204a44
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts
@@ -0,0 +1,148 @@
+/*
+ * P1020 RDB Core1 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts allows core1 to have l2, eth0, crypto.
+ *
+ * Please note to add "-b 1" for core1's dts compiling.
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+/include/ "p1020si.dtsi"
+
+/ {
+ model = "fsl,P1020RDB";
+ compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP";
+
+ aliases {
+ ethernet0 = &enet0;
+ serial0 = &serial1;
+ };
+
+ cpus {
+ PowerPC,P1020@0 {
+ status = "disabled";
+ };
+ };
+
+ memory {
+ device_type = "memory";
+ };
+
+ localbus@ffe05000 {
+ status = "disabled";
+ };
+
+ soc@ffe00000 {
+ ecm-law@0 {
+ status = "disabled";
+ };
+
+ ecm@1000 {
+ status = "disabled";
+ };
+
+ memory-controller@2000 {
+ status = "disabled";
+ };
+
+ i2c@3000 {
+ status = "disabled";
+ };
+
+ i2c@3100 {
+ status = "disabled";
+ };
+
+ serial0: serial@4500 {
+ status = "disabled";
+ };
+
+ spi@7000 {
+ status = "disabled";
+ };
+
+ gpio: gpio-controller@f000 {
+ status = "disabled";
+ };
+
+ dma@21300 {
+ status = "disabled";
+ };
+
+ mdio@24000 {
+ status = "disabled";
+ };
+
+ mdio@25000 {
+ status = "disabled";
+ };
+
+ enet0: ethernet@b0000 {
+ fixed-link = <1 1 1000 0 0>;
+ phy-connection-type = "rgmii-id";
+
+ };
+
+ enet1: ethernet@b1000 {
+ status = "disabled";
+ };
+
+ enet2: ethernet@b2000 {
+ status = "disabled";
+ };
+
+ usb@22000 {
+ status = "disabled";
+ };
+
+ sdhci@2e000 {
+ status = "disabled";
+ };
+
+ mpic: pic@40000 {
+ protected-sources = <
+ 16 /* ecm, mem, L2, pci0, pci1 */
+ 43 42 59 /* i2c, serial0, spi */
+ 47 63 62 /* gpio, tdm */
+ 20 21 22 23 /* dma */
+ 03 02 /* mdio */
+ 35 36 40 /* enet1-queue-group0 */
+ 51 52 67 /* enet1-queue-group1 */
+ 31 32 33 /* enet2-queue-group0 */
+ 25 26 27 /* enet2-queue-group1 */
+ 28 72 58 /* usb, sdhci, crypto */
+ 0xb0 0xb1 0xb2 /* message */
+ 0xb3 0xb4 0xb5
+ 0xb6 0xb7
+ 0xe0 0xe1 0xe2 /* msi */
+ 0xe3 0xe4 0xe5
+ 0xe6 0xe7 /* sdhci, crypto , pci */
+ >;
+ };
+
+ msi@41600 {
+ status = "disabled";
+ };
+
+ global-utilities@e0000 { //global utilities block
+ status = "disabled";
+ };
+
+ };
+
+ pci0: pcie@ffe09000 {
+ status = "disabled";
+ };
+
+ pci1: pcie@ffe0a000 {
+ status = "disabled";
+ };
+};
diff --git a/arch/powerpc/boot/dts/p1020si.dtsi b/arch/powerpc/boot/dts/p1020si.dtsi
new file mode 100644
index 00000000000..5c5acb66c3f
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020si.dtsi
@@ -0,0 +1,377 @@
+/*
+ * P1020si Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+/ {
+ compatible = "fsl,P1020";
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ PowerPC,P1020@0 {
+ device_type = "cpu";
+ reg = <0x0>;
+ next-level-cache = <&L2>;
+ };
+
+ PowerPC,P1020@1 {
+ device_type = "cpu";
+ reg = <0x1>;
+ next-level-cache = <&L2>;
+ };
+ };
+
+ localbus@ffe05000 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus";
+ reg = <0 0xffe05000 0 0x1000>;
+ interrupts = <19 2>;
+ interrupt-parent = <&mpic>;
+ };
+
+ soc@ffe00000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ device_type = "soc";
+ compatible = "fsl,p1020-immr", "simple-bus";
+ ranges = <0x0 0x0 0xffe00000 0x100000>;
+ bus-frequency = <0>; // Filled out by uboot.
+
+ ecm-law@0 {
+ compatible = "fsl,ecm-law";
+ reg = <0x0 0x1000>;
+ fsl,num-laws = <12>;
+ };
+
+ ecm@1000 {
+ compatible = "fsl,p1020-ecm", "fsl,ecm";
+ reg = <0x1000 0x1000>;
+ interrupts = <16 2>;
+ interrupt-parent = <&mpic>;
+ };
+
+ memory-controller@2000 {
+ compatible = "fsl,p1020-memory-controller";
+ reg = <0x2000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <16 2>;
+ };
+
+ i2c@3000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ cell-index = <0>;
+ compatible = "fsl-i2c";
+ reg = <0x3000 0x100>;
+ interrupts = <43 2>;
+ interrupt-parent = <&mpic>;
+ dfsrr;
+ };
+
+ i2c@3100 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ cell-index = <1>;
+ compatible = "fsl-i2c";
+ reg = <0x3100 0x100>;
+ interrupts = <43 2>;
+ interrupt-parent = <&mpic>;
+ dfsrr;
+ };
+
+ serial0: serial@4500 {
+ cell-index = <0>;
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0x4500 0x100>;
+ clock-frequency = <0>;
+ interrupts = <42 2>;
+ interrupt-parent = <&mpic>;
+ };
+
+ serial1: serial@4600 {
+ cell-index = <1>;
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0x4600 0x100>;
+ clock-frequency = <0>;
+ interrupts = <42 2>;
+ interrupt-parent = <&mpic>;
+ };
+
+ spi@7000 {
+ cell-index = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,espi";
+ reg = <0x7000 0x1000>;
+ interrupts = <59 0x2>;
+ interrupt-parent = <&mpic>;
+ mode = "cpu";
+ };
+
+ gpio: gpio-controller@f000 {
+ #gpio-cells = <2>;
+ compatible = "fsl,mpc8572-gpio";
+ reg = <0xf000 0x100>;
+ interrupts = <47 0x2>;
+ interrupt-parent = <&mpic>;
+ gpio-controller;
+ };
+
+ L2: l2-cache-controller@20000 {
+ compatible = "fsl,p1020-l2-cache-controller";
+ reg = <0x20000 0x1000>;
+ cache-line-size = <32>; // 32 bytes
+ cache-size = <0x40000>; // L2,256K
+ interrupt-parent = <&mpic>;
+ interrupts = <16 2>;
+ };
+
+ dma@21300 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,eloplus-dma";
+ reg = <0x21300 0x4>;
+ ranges = <0x0 0x21100 0x200>;
+ cell-index = <0>;
+ dma-channel@0 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x0 0x80>;
+ cell-index = <0>;
+ interrupt-parent = <&mpic>;
+ interrupts = <20 2>;
+ };
+ dma-channel@80 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x80 0x80>;
+ cell-index = <1>;
+ interrupt-parent = <&mpic>;
+ interrupts = <21 2>;
+ };
+ dma-channel@100 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x100 0x80>;
+ cell-index = <2>;
+ interrupt-parent = <&mpic>;
+ interrupts = <22 2>;
+ };
+ dma-channel@180 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x180 0x80>;
+ cell-index = <3>;
+ interrupt-parent = <&mpic>;
+ interrupts = <23 2>;
+ };
+ };
+
+ mdio@24000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,etsec2-mdio";
+ reg = <0x24000 0x1000 0xb0030 0x4>;
+
+ };
+
+ mdio@25000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,etsec2-tbi";
+ reg = <0x25000 0x1000 0xb1030 0x4>;
+
+ };
+
+ enet0: ethernet@b0000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ device_type = "network";
+ model = "eTSEC";
+ compatible = "fsl,etsec2";
+ fsl,num_rx_queues = <0x8>;
+ fsl,num_tx_queues = <0x8>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ interrupt-parent = <&mpic>;
+
+ queue-group@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0xb0000 0x1000>;
+ interrupts = <29 2 30 2 34 2>;
+ };
+
+ queue-group@1 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0xb4000 0x1000>;
+ interrupts = <17 2 18 2 24 2>;
+ };
+ };
+
+ enet1: ethernet@b1000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ device_type = "network";
+ model = "eTSEC";
+ compatible = "fsl,etsec2";
+ fsl,num_rx_queues = <0x8>;
+ fsl,num_tx_queues = <0x8>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ interrupt-parent = <&mpic>;
+
+ queue-group@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0xb1000 0x1000>;
+ interrupts = <35 2 36 2 40 2>;
+ };
+
+ queue-group@1 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0xb5000 0x1000>;
+ interrupts = <51 2 52 2 67 2>;
+ };
+ };
+
+ enet2: ethernet@b2000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ device_type = "network";
+ model = "eTSEC";
+ compatible = "fsl,etsec2";
+ fsl,num_rx_queues = <0x8>;
+ fsl,num_tx_queues = <0x8>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ interrupt-parent = <&mpic>;
+
+ queue-group@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0xb2000 0x1000>;
+ interrupts = <31 2 32 2 33 2>;
+ };
+
+ queue-group@1 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0xb6000 0x1000>;
+ interrupts = <25 2 26 2 27 2>;
+ };
+ };
+
+ usb@22000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl-usb2-dr";
+ reg = <0x22000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <28 0x2>;
+ };
+
+ /* USB2 is shared with localbus, so it must be disabled
+ by default. We can't put 'status = "disabled";' here
+ since U-Boot doesn't clear the status property when
+ it enables USB2. OTOH, U-Boot does create a new node
+ when there isn't any. So, just comment it out.
+ usb@23000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl-usb2-dr";
+ reg = <0x23000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <46 0x2>;
+ phy_type = "ulpi";
+ };
+ */
+
+ sdhci@2e000 {
+ compatible = "fsl,p1020-esdhc", "fsl,esdhc";
+ reg = <0x2e000 0x1000>;
+ interrupts = <72 0x2>;
+ interrupt-parent = <&mpic>;
+ /* Filled in by U-Boot */
+ clock-frequency = <0>;
+ };
+
+ crypto@30000 {
+ compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
+ "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
+ reg = <0x30000 0x10000>;
+ interrupts = <45 2 58 2>;
+ interrupt-parent = <&mpic>;
+ fsl,num-channels = <4>;
+ fsl,channel-fifo-len = <24>;
+ fsl,exec-units-mask = <0xbfe>;
+ fsl,descriptor-types-mask = <0x3ab0ebf>;
+ };
+
+ mpic: pic@40000 {
+ interrupt-controller;
+ #address-cells = <0>;
+ #interrupt-cells = <2>;
+ reg = <0x40000 0x40000>;
+ compatible = "chrp,open-pic";
+ device_type = "open-pic";
+ };
+
+ msi@41600 {
+ compatible = "fsl,p1020-msi", "fsl,mpic-msi";
+ reg = <0x41600 0x80>;
+ msi-available-ranges = <0 0x100>;
+ interrupts = <
+ 0xe0 0
+ 0xe1 0
+ 0xe2 0
+ 0xe3 0
+ 0xe4 0
+ 0xe5 0
+ 0xe6 0
+ 0xe7 0>;
+ interrupt-parent = <&mpic>;
+ };
+
+ global-utilities@e0000 { //global utilities block
+ compatible = "fsl,p1020-guts","fsl,p2020-guts";
+ reg = <0xe0000 0x1000>;
+ fsl,has-rstcr;
+ };
+ };
+
+ pci0: pcie@ffe09000 {
+ compatible = "fsl,mpc8548-pcie";
+ device_type = "pci";
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ reg = <0 0xffe09000 0 0x1000>;
+ bus-range = <0 255>;
+ clock-frequency = <33333333>;
+ interrupt-parent = <&mpic>;
+ interrupts = <16 2>;
+ };
+
+ pci1: pcie@ffe0a000 {
+ compatible = "fsl,mpc8548-pcie";
+ device_type = "pci";
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ reg = <0 0xffe0a000 0 0x1000>;
+ bus-range = <0 255>;
+ clock-frequency = <33333333>;
+ interrupt-parent = <&mpic>;
+ interrupts = <16 2>;
+ };
+};
diff --git a/arch/powerpc/boot/dts/p1022ds.dts b/arch/powerpc/boot/dts/p1022ds.dts
index 59ef405c1c9..4f685a779f4 100644
--- a/arch/powerpc/boot/dts/p1022ds.dts
+++ b/arch/powerpc/boot/dts/p1022ds.dts
@@ -52,7 +52,7 @@
#size-cells = <1>;
compatible = "fsl,p1022-elbc", "fsl,elbc", "simple-bus";
reg = <0 0xffe05000 0 0x1000>;
- interrupts = <19 2>;
+ interrupts = <19 2 0 0>;
ranges = <0x0 0x0 0xf 0xe8000000 0x08000000
0x1 0x0 0xf 0xe0000000 0x08000000
@@ -157,7 +157,7 @@
* IRQ8 is generated if the "EVENT" switch is pressed
* and PX_CTL[EVESEL] is set to 00.
*/
- interrupts = <8 8>;
+ interrupts = <8 8 0 0>;
};
};
@@ -178,13 +178,13 @@
ecm@1000 {
compatible = "fsl,p1022-ecm", "fsl,ecm";
reg = <0x1000 0x1000>;
- interrupts = <16 2>;
+ interrupts = <16 2 0 0>;
};
memory-controller@2000 {
compatible = "fsl,p1022-memory-controller";
reg = <0x2000 0x1000>;
- interrupts = <16 2>;
+ interrupts = <16 2 0 0>;
};
i2c@3000 {
@@ -193,7 +193,7 @@
cell-index = <0>;
compatible = "fsl-i2c";
reg = <0x3000 0x100>;
- interrupts = <43 2>;
+ interrupts = <43 2 0 0>;
dfsrr;
};
@@ -203,7 +203,7 @@
cell-index = <1>;
compatible = "fsl-i2c";
reg = <0x3100 0x100>;
- interrupts = <43 2>;
+ interrupts = <43 2 0 0>;
dfsrr;
wm8776:codec@1a {
@@ -220,7 +220,7 @@
compatible = "ns16550";
reg = <0x4500 0x100>;
clock-frequency = <0>;
- interrupts = <42 2>;
+ interrupts = <42 2 0 0>;
};
serial1: serial@4600 {
@@ -229,7 +229,7 @@
compatible = "ns16550";
reg = <0x4600 0x100>;
clock-frequency = <0>;
- interrupts = <42 2>;
+ interrupts = <42 2 0 0>;
};
spi@7000 {
@@ -238,7 +238,7 @@
#size-cells = <0>;
compatible = "fsl,espi";
reg = <0x7000 0x1000>;
- interrupts = <59 0x2>;
+ interrupts = <59 0x2 0 0>;
espi,num-ss-bits = <4>;
mode = "cpu";
@@ -275,7 +275,7 @@
compatible = "fsl,mpc8610-ssi";
cell-index = <0>;
reg = <0x15000 0x100>;
- interrupts = <75 2>;
+ interrupts = <75 2 0 0>;
fsl,mode = "i2s-slave";
codec-handle = <&wm8776>;
fsl,playback-dma = <&dma00>;
@@ -294,25 +294,25 @@
compatible = "fsl,ssi-dma-channel";
reg = <0x0 0x80>;
cell-index = <0>;
- interrupts = <76 2>;
+ interrupts = <76 2 0 0>;
};
dma01: dma-channel@80 {
compatible = "fsl,ssi-dma-channel";
reg = <0x80 0x80>;
cell-index = <1>;
- interrupts = <77 2>;
+ interrupts = <77 2 0 0>;
};
dma-channel@100 {
compatible = "fsl,eloplus-dma-channel";
reg = <0x100 0x80>;
cell-index = <2>;
- interrupts = <78 2>;
+ interrupts = <78 2 0 0>;
};
dma-channel@180 {
compatible = "fsl,eloplus-dma-channel";
reg = <0x180 0x80>;
cell-index = <3>;
- interrupts = <79 2>;
+ interrupts = <79 2 0 0>;
};
};
@@ -320,7 +320,7 @@
#gpio-cells = <2>;
compatible = "fsl,mpc8572-gpio";
reg = <0xf000 0x100>;
- interrupts = <47 0x2>;
+ interrupts = <47 0x2 0 0>;
gpio-controller;
};
@@ -329,7 +329,7 @@
reg = <0x20000 0x1000>;
cache-line-size = <32>; // 32 bytes
cache-size = <0x40000>; // L2, 256K
- interrupts = <16 2>;
+ interrupts = <16 2 0 0>;
};
dma@21300 {
@@ -343,25 +343,25 @@
compatible = "fsl,eloplus-dma-channel";
reg = <0x0 0x80>;
cell-index = <0>;
- interrupts = <20 2>;
+ interrupts = <20 2 0 0>;
};
dma-channel@80 {
compatible = "fsl,eloplus-dma-channel";
reg = <0x80 0x80>;
cell-index = <1>;
- interrupts = <21 2>;
+ interrupts = <21 2 0 0>;
};
dma-channel@100 {
compatible = "fsl,eloplus-dma-channel";
reg = <0x100 0x80>;
cell-index = <2>;
- interrupts = <22 2>;
+ interrupts = <22 2 0 0>;
};
dma-channel@180 {
compatible = "fsl,eloplus-dma-channel";
reg = <0x180 0x80>;
cell-index = <3>;
- interrupts = <23 2>;
+ interrupts = <23 2 0 0>;
};
};
@@ -370,7 +370,7 @@
#size-cells = <0>;
compatible = "fsl-usb2-dr";
reg = <0x22000 0x1000>;
- interrupts = <28 0x2>;
+ interrupts = <28 0x2 0 0>;
phy_type = "ulpi";
};
@@ -381,11 +381,11 @@
reg = <0x24000 0x1000 0xb0030 0x4>;
phy0: ethernet-phy@0 {
- interrupts = <3 1>;
+ interrupts = <3 1 0 0>;
reg = <0x1>;
};
phy1: ethernet-phy@1 {
- interrupts = <9 1>;
+ interrupts = <9 1 0 0>;
reg = <0x2>;
};
};
@@ -416,13 +416,13 @@
#address-cells = <1>;
#size-cells = <1>;
reg = <0xB0000 0x1000>;
- interrupts = <29 2 30 2 34 2>;
+ interrupts = <29 2 0 0 30 2 0 0 34 2 0 0>;
};
queue-group@1{
#address-cells = <1>;
#size-cells = <1>;
reg = <0xB4000 0x1000>;
- interrupts = <17 2 18 2 24 2>;
+ interrupts = <17 2 0 0 18 2 0 0 24 2 0 0>;
};
};
@@ -443,20 +443,20 @@
#address-cells = <1>;
#size-cells = <1>;
reg = <0xB1000 0x1000>;
- interrupts = <35 2 36 2 40 2>;
+ interrupts = <35 2 0 0 36 2 0 0 40 2 0 0>;
};
queue-group@1{
#address-cells = <1>;
#size-cells = <1>;
reg = <0xB5000 0x1000>;
- interrupts = <51 2 52 2 67 2>;
+ interrupts = <51 2 0 0 52 2 0 0 67 2 0 0>;
};
};
sdhci@2e000 {
compatible = "fsl,p1022-esdhc", "fsl,esdhc";
reg = <0x2e000 0x1000>;
- interrupts = <72 0x2>;
+ interrupts = <72 0x2 0 0>;
fsl,sdhci-auto-cmd12;
/* Filled in by U-Boot */
clock-frequency = <0>;
@@ -467,7 +467,7 @@
"fsl,sec2.4", "fsl,sec2.2", "fsl,sec2.1",
"fsl,sec2.0";
reg = <0x30000 0x10000>;
- interrupts = <45 2 58 2>;
+ interrupts = <45 2 0 0 58 2 0 0>;
fsl,num-channels = <4>;
fsl,channel-fifo-len = <24>;
fsl,exec-units-mask = <0x97c>;
@@ -478,14 +478,14 @@
compatible = "fsl,p1022-sata", "fsl,pq-sata-v2";
reg = <0x18000 0x1000>;
cell-index = <1>;
- interrupts = <74 0x2>;
+ interrupts = <74 0x2 0 0>;
};
sata@19000 {
compatible = "fsl,p1022-sata", "fsl,pq-sata-v2";
reg = <0x19000 0x1000>;
cell-index = <2>;
- interrupts = <41 0x2>;
+ interrupts = <41 0x2 0 0>;
};
power@e0070{
@@ -496,21 +496,33 @@
display@10000 {
compatible = "fsl,diu", "fsl,p1022-diu";
reg = <0x10000 1000>;
- interrupts = <64 2>;
+ interrupts = <64 2 0 0>;
};
timer@41100 {
compatible = "fsl,mpic-global-timer";
- reg = <0x41100 0x204>;
- interrupts = <0xf7 0x2>;
+ reg = <0x41100 0x100 0x41300 4>;
+ interrupts = <0 0 3 0
+ 1 0 3 0
+ 2 0 3 0
+ 3 0 3 0>;
+ };
+
+ timer@42100 {
+ compatible = "fsl,mpic-global-timer";
+ reg = <0x42100 0x100 0x42300 4>;
+ interrupts = <4 0 3 0
+ 5 0 3 0
+ 6 0 3 0
+ 7 0 3 0>;
};
mpic: pic@40000 {
interrupt-controller;
#address-cells = <0>;
- #interrupt-cells = <2>;
+ #interrupt-cells = <4>;
reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
+ compatible = "fsl,mpic";
device_type = "open-pic";
};
@@ -519,14 +531,14 @@
reg = <0x41600 0x80>;
msi-available-ranges = <0 0x100>;
interrupts = <
- 0xe0 0
- 0xe1 0
- 0xe2 0
- 0xe3 0
- 0xe4 0
- 0xe5 0
- 0xe6 0
- 0xe7 0>;
+ 0xe0 0 0 0
+ 0xe1 0 0 0
+ 0xe2 0 0 0
+ 0xe3 0 0 0
+ 0xe4 0 0 0
+ 0xe5 0 0 0
+ 0xe6 0 0 0
+ 0xe7 0 0 0>;
};
global-utilities@e0000 { //global utilities block
@@ -547,7 +559,7 @@
ranges = <0x2000000 0x0 0xa0000000 0xc 0x20000000 0x0 0x20000000
0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
clock-frequency = <33333333>;
- interrupts = <16 2>;
+ interrupts = <16 2 0 0>;
interrupt-map-mask = <0xf800 0 0 7>;
interrupt-map = <
/* IDSEL 0x0 */
@@ -582,7 +594,7 @@
ranges = <0x2000000 0x0 0xc0000000 0xc 0x40000000 0x0 0x20000000
0x1000000 0x0 0x00000000 0xf 0xffc20000 0x0 0x10000>;
clock-frequency = <33333333>;
- interrupts = <16 2>;
+ interrupts = <16 2 0 0>;
interrupt-map-mask = <0xf800 0 0 7>;
interrupt-map = <
/* IDSEL 0x0 */
@@ -618,7 +630,7 @@
ranges = <0x2000000 0x0 0x80000000 0xc 0x00000000 0x0 0x20000000
0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
clock-frequency = <33333333>;
- interrupts = <16 2>;
+ interrupts = <16 2 0 0>;
interrupt-map-mask = <0xf800 0 0 7>;
interrupt-map = <
/* IDSEL 0x0 */
diff --git a/arch/powerpc/boot/dts/p2020ds.dts b/arch/powerpc/boot/dts/p2020ds.dts
index 11019142813..2bcf3683d22 100644
--- a/arch/powerpc/boot/dts/p2020ds.dts
+++ b/arch/powerpc/boot/dts/p2020ds.dts
@@ -1,7 +1,7 @@
/*
* P2020 DS Device Tree Source
*
- * Copyright 2009 Freescale Semiconductor Inc.
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -9,12 +9,11 @@
* option) any later version.
*/
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
/ {
- model = "fsl,P2020";
+ model = "fsl,P2020DS";
compatible = "fsl,P2020DS";
- #address-cells = <2>;
- #size-cells = <2>;
aliases {
ethernet0 = &enet0;
@@ -27,35 +26,13 @@
pci2 = &pci2;
};
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,P2020@0 {
- device_type = "cpu";
- reg = <0x0>;
- next-level-cache = <&L2>;
- };
-
- PowerPC,P2020@1 {
- device_type = "cpu";
- reg = <0x1>;
- next-level-cache = <&L2>;
- };
- };
memory {
device_type = "memory";
};
localbus@ffe05000 {
- #address-cells = <2>;
- #size-cells = <1>;
compatible = "fsl,elbc", "simple-bus";
- reg = <0 0xffe05000 0 0x1000>;
- interrupts = <19 2>;
- interrupt-parent = <&mpic>;
-
ranges = <0x0 0x0 0x0 0xe8000000 0x08000000
0x1 0x0 0x0 0xe0000000 0x08000000
0x2 0x0 0x0 0xffa00000 0x00040000
@@ -158,352 +135,77 @@
};
soc@ffe00000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "fsl,p2020-immr", "simple-bus";
- ranges = <0x0 0 0xffe00000 0x100000>;
- bus-frequency = <0>; // Filled out by uboot.
-
- ecm-law@0 {
- compatible = "fsl,ecm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <12>;
- };
-
- ecm@1000 {
- compatible = "fsl,p2020-ecm", "fsl,ecm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- memory-controller@2000 {
- compatible = "fsl,p2020-memory-controller";
- reg = <0x2000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <18 2>;
- };
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
-
- spi@7000 {
- compatible = "fsl,espi";
- reg = <0x7000 0x1000>;
- interrupts = <59 0x2>;
- interrupt-parent = <&mpic>;
+ usb@22000 {
+ phy_type = "ulpi";
};
- dma@c300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,eloplus-dma";
- reg = <0xc300 0x4>;
- ranges = <0x0 0xc100 0x200>;
- cell-index = <1>;
- dma-channel@0 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
+ mdio@24520 {
+ phy0: ethernet-phy@0 {
interrupt-parent = <&mpic>;
- interrupts = <76 2>;
+ interrupts = <3 1>;
+ reg = <0x0>;
};
- dma-channel@80 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
+ phy1: ethernet-phy@1 {
interrupt-parent = <&mpic>;
- interrupts = <77 2>;
+ interrupts = <3 1>;
+ reg = <0x1>;
};
- dma-channel@100 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
+ phy2: ethernet-phy@2 {
interrupt-parent = <&mpic>;
- interrupts = <78 2>;
+ interrupts = <3 1>;
+ reg = <0x2>;
};
- dma-channel@180 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <79 2>;
+ tbi0: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
};
- };
- gpio: gpio-controller@f000 {
- #gpio-cells = <2>;
- compatible = "fsl,mpc8572-gpio";
- reg = <0xf000 0x100>;
- interrupts = <47 0x2>;
- interrupt-parent = <&mpic>;
- gpio-controller;
};
- L2: l2-cache-controller@20000 {
- compatible = "fsl,p2020-l2-cache-controller";
- reg = <0x20000 0x1000>;
- cache-line-size = <32>; // 32 bytes
- cache-size = <0x80000>; // L2, 512k
- interrupt-parent = <&mpic>;
- interrupts = <16 2>;
+ mdio@25520 {
+ tbi1: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
};
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
+ mdio@26520 {
+ tbi2: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
};
- dma-channel@100 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
- usb@22000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl-usb2-dr";
- reg = <0x22000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <28 0x2>;
- phy_type = "ulpi";
};
enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <29 2 30 2 34 2>;
- interrupt-parent = <&mpic>;
tbi-handle = <&tbi0>;
phy-handle = <&phy0>;
phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@0 {
- interrupt-parent = <&mpic>;
- interrupts = <3 1>;
- reg = <0x0>;
- };
- phy1: ethernet-phy@1 {
- interrupt-parent = <&mpic>;
- interrupts = <3 1>;
- reg = <0x1>;
- };
- phy2: ethernet-phy@2 {
- interrupt-parent = <&mpic>;
- interrupts = <3 1>;
- reg = <0x2>;
- };
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
};
enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 2 36 2 40 2>;
- interrupt-parent = <&mpic>;
tbi-handle = <&tbi1>;
phy-handle = <&phy1>;
phy-connection-type = "rgmii-id";
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
};
enet2: ethernet@26000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <2>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x26000 0x1000>;
- ranges = <0x0 0x26000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <31 2 32 2 33 2>;
- interrupt-parent = <&mpic>;
tbi-handle = <&tbi2>;
phy-handle = <&phy2>;
phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi2: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- sdhci@2e000 {
- compatible = "fsl,p2020-esdhc", "fsl,esdhc";
- reg = <0x2e000 0x1000>;
- interrupts = <72 0x2>;
- interrupt-parent = <&mpic>;
- /* Filled in by U-Boot */
- clock-frequency = <0>;
- };
-
- crypto@30000 {
- compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
- "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <45 2 58 2>;
- interrupt-parent = <&mpic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0xbfe>;
- fsl,descriptor-types-mask = <0x3ab0ebf>;
};
- mpic: pic@40000 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
msi@41600 {
compatible = "fsl,mpic-msi";
- reg = <0x41600 0x80>;
- msi-available-ranges = <0 0x100>;
- interrupts = <
- 0xe0 0
- 0xe1 0
- 0xe2 0
- 0xe3 0
- 0xe4 0
- 0xe5 0
- 0xe6 0
- 0xe7 0>;
- interrupt-parent = <&mpic>;
- };
-
- global-utilities@e0000 { //global utilities block
- compatible = "fsl,p2020-guts";
- reg = <0xe0000 0x1000>;
- fsl,has-rstcr;
};
};
pci0: pcie@ffe08000 {
- compatible = "fsl,mpc8548-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0 0xffe08000 0 0x1000>;
- bus-range = <0 255>;
ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <24 2>;
interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
interrupt-map = <
/* IDSEL 0x0 */
@@ -528,18 +230,8 @@
};
pci1: pcie@ffe09000 {
- compatible = "fsl,mpc8548-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0 0xffe09000 0 0x1000>;
- bus-range = <0 255>;
ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <25 2>;
interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
interrupt-map = <
@@ -667,18 +359,8 @@
};
pci2: pcie@ffe0a000 {
- compatible = "fsl,mpc8548-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0 0xffe0a000 0 0x1000>;
- bus-range = <0 255>;
ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <26 2>;
interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
interrupt-map = <
/* IDSEL 0x0 */
diff --git a/arch/powerpc/boot/dts/p2020rdb.dts b/arch/powerpc/boot/dts/p2020rdb.dts
index e2d48fd4416..3782a58f13b 100644
--- a/arch/powerpc/boot/dts/p2020rdb.dts
+++ b/arch/powerpc/boot/dts/p2020rdb.dts
@@ -9,12 +9,11 @@
* option) any later version.
*/
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
/ {
- model = "fsl,P2020";
+ model = "fsl,P2020RDB";
compatible = "fsl,P2020RDB";
- #address-cells = <2>;
- #size-cells = <2>;
aliases {
ethernet0 = &enet0;
@@ -26,34 +25,11 @@
pci1 = &pci1;
};
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,P2020@0 {
- device_type = "cpu";
- reg = <0x0>;
- next-level-cache = <&L2>;
- };
-
- PowerPC,P2020@1 {
- device_type = "cpu";
- reg = <0x1>;
- next-level-cache = <&L2>;
- };
- };
-
memory {
device_type = "memory";
};
localbus@ffe05000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,p2020-elbc", "fsl,elbc", "simple-bus";
- reg = <0 0xffe05000 0 0x1000>;
- interrupts = <19 2>;
- interrupt-parent = <&mpic>;
/* NOR and NAND Flashes */
ranges = <0x0 0x0 0x0 0xef000000 0x01000000
@@ -165,90 +141,16 @@
};
soc@ffe00000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "fsl,p2020-immr", "simple-bus";
- ranges = <0x0 0x0 0xffe00000 0x100000>;
- bus-frequency = <0>; // Filled out by uboot.
-
- ecm-law@0 {
- compatible = "fsl,ecm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <12>;
- };
-
- ecm@1000 {
- compatible = "fsl,p2020-ecm", "fsl,ecm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- memory-controller@2000 {
- compatible = "fsl,p2020-memory-controller";
- reg = <0x2000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <18 2>;
- };
-
i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
rtc@68 {
compatible = "dallas,ds1339";
reg = <0x68>;
};
};
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
+ spi@7000 {
- spi@7000 {
- cell-index = <0>;
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,espi";
- reg = <0x7000 0x1000>;
- interrupts = <59 0x2>;
- interrupt-parent = <&mpic>;
- mode = "cpu";
-
- fsl_m25p80@0 {
+ fsl_m25p80@0 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "fsl,espi-flash";
@@ -294,254 +196,68 @@
};
};
- dma@c300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,eloplus-dma";
- reg = <0xc300 0x4>;
- ranges = <0x0 0xc100 0x200>;
- cell-index = <1>;
- dma-channel@0 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <76 2>;
- };
- dma-channel@80 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <77 2>;
- };
- dma-channel@100 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <78 2>;
- };
- dma-channel@180 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <79 2>;
- };
- };
-
- gpio: gpio-controller@f000 {
- #gpio-cells = <2>;
- compatible = "fsl,mpc8572-gpio";
- reg = <0xf000 0x100>;
- interrupts = <47 0x2>;
- interrupt-parent = <&mpic>;
- gpio-controller;
- };
-
- L2: l2-cache-controller@20000 {
- compatible = "fsl,p2020-l2-cache-controller";
- reg = <0x20000 0x1000>;
- cache-line-size = <32>; // 32 bytes
- cache-size = <0x80000>; // L2,512K
- interrupt-parent = <&mpic>;
- interrupts = <16 2>;
+ usb@22000 {
+ phy_type = "ulpi";
};
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
+ mdio@24520 {
+ phy0: ethernet-phy@0 {
interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
+ interrupts = <3 1>;
+ reg = <0x0>;
+ };
+ phy1: ethernet-phy@1 {
interrupt-parent = <&mpic>;
- interrupts = <23 2>;
+ interrupts = <3 1>;
+ reg = <0x1>;
+ };
+ };
+
+ mdio@25520 {
+ tbi0: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
};
};
- usb@22000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl-usb2-dr";
- reg = <0x22000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <28 0x2>;
- phy_type = "ulpi";
+ mdio@26520 {
+ status = "disabled";
};
enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <29 2 30 2 34 2>;
- interrupt-parent = <&mpic>;
fixed-link = <1 1 1000 0 0>;
phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@0 {
- interrupt-parent = <&mpic>;
- interrupts = <3 1>;
- reg = <0x0>;
- };
- phy1: ethernet-phy@1 {
- interrupt-parent = <&mpic>;
- interrupts = <3 1>;
- reg = <0x1>;
- };
- };
};
enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 2 36 2 40 2>;
- interrupt-parent = <&mpic>;
tbi-handle = <&tbi0>;
phy-handle = <&phy0>;
phy-connection-type = "sgmii";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
};
enet2: ethernet@26000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <2>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x26000 0x1000>;
- ranges = <0x0 0x26000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <31 2 32 2 33 2>;
- interrupt-parent = <&mpic>;
phy-handle = <&phy1>;
phy-connection-type = "rgmii-id";
};
- sdhci@2e000 {
- compatible = "fsl,p2020-esdhc", "fsl,esdhc";
- reg = <0x2e000 0x1000>;
- interrupts = <72 0x2>;
- interrupt-parent = <&mpic>;
- /* Filled in by U-Boot */
- clock-frequency = <0>;
- };
-
- crypto@30000 {
- compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
- "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <45 2 58 2>;
- interrupt-parent = <&mpic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0xbfe>;
- fsl,descriptor-types-mask = <0x3ab0ebf>;
- };
-
- mpic: pic@40000 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
-
- msi@41600 {
- compatible = "fsl,p2020-msi", "fsl,mpic-msi";
- reg = <0x41600 0x80>;
- msi-available-ranges = <0 0x100>;
- interrupts = <
- 0xe0 0
- 0xe1 0
- 0xe2 0
- 0xe3 0
- 0xe4 0
- 0xe5 0
- 0xe6 0
- 0xe7 0>;
- interrupt-parent = <&mpic>;
- };
+ };
- global-utilities@e0000 { //global utilities block
- compatible = "fsl,p2020-guts";
- reg = <0xe0000 0x1000>;
- fsl,has-rstcr;
- };
+ pci0: pcie@ffe08000 {
+ status = "disabled";
};
- pci0: pcie@ffe09000 {
- compatible = "fsl,mpc8548-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0 0xffe09000 0 0x1000>;
- bus-range = <0 255>;
+ pci1: pcie@ffe09000 {
ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <25 2>;
- pcie@0 {
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ 0000 0x0 0x0 0x1 &mpic 0x4 0x1
+ 0000 0x0 0x0 0x2 &mpic 0x5 0x1
+ 0000 0x0 0x0 0x3 &mpic 0x6 0x1
+ 0000 0x0 0x0 0x4 &mpic 0x7 0x1
+ >;
+ pcie@0 {
reg = <0x0 0x0 0x0 0x0 0x0>;
#size-cells = <2>;
#address-cells = <3>;
@@ -556,19 +272,17 @@
};
};
- pci1: pcie@ffe0a000 {
- compatible = "fsl,mpc8548-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0 0xffe0a000 0 0x1000>;
- bus-range = <0 255>;
+ pci2: pcie@ffe0a000 {
ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <26 2>;
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ 0000 0x0 0x0 0x1 &mpic 0x0 0x1
+ 0000 0x0 0x0 0x2 &mpic 0x1 0x1
+ 0000 0x0 0x0 0x3 &mpic 0x2 0x1
+ 0000 0x0 0x0 0x4 &mpic 0x3 0x1
+ >;
pcie@0 {
reg = <0x0 0x0 0x0 0x0 0x0>;
#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
index b69c3a5dc85..fc8ddddfccb 100644
--- a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
+++ b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
@@ -14,12 +14,11 @@
* option) any later version.
*/
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
/ {
- model = "fsl,P2020";
+ model = "fsl,P2020RDB";
compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP";
- #address-cells = <2>;
- #size-cells = <2>;
aliases {
ethernet1 = &enet1;
@@ -29,91 +28,33 @@
};
cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,P2020@0 {
- device_type = "cpu";
- reg = <0x0>;
- next-level-cache = <&L2>;
+ PowerPC,P2020@1 {
+ status = "disabled";
};
+
};
memory {
device_type = "memory";
};
- soc@ffe00000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "fsl,p2020-immr", "simple-bus";
- ranges = <0x0 0x0 0xffe00000 0x100000>;
- bus-frequency = <0>; // Filled out by uboot.
-
- ecm-law@0 {
- compatible = "fsl,ecm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <12>;
- };
-
- ecm@1000 {
- compatible = "fsl,p2020-ecm", "fsl,ecm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- memory-controller@2000 {
- compatible = "fsl,p2020-memory-controller";
- reg = <0x2000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <18 2>;
- };
+ localbus@ffe05000 {
+ status = "disabled";
+ };
+ soc@ffe00000 {
i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
rtc@68 {
compatible = "dallas,ds1339";
reg = <0x68>;
};
};
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
+ serial1: serial@4600 {
+ status = "disabled";
};
spi@7000 {
- cell-index = <0>;
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,espi";
- reg = <0x7000 0x1000>;
- interrupts = <59 0x2>;
- interrupt-parent = <&mpic>;
- mode = "cpu";
fsl_m25p80@0 {
#address-cells = <1>;
@@ -161,76 +102,15 @@
};
};
- gpio: gpio-controller@f000 {
- #gpio-cells = <2>;
- compatible = "fsl,mpc8572-gpio";
- reg = <0xf000 0x100>;
- interrupts = <47 0x2>;
- interrupt-parent = <&mpic>;
- gpio-controller;
- };
-
- L2: l2-cache-controller@20000 {
- compatible = "fsl,p2020-l2-cache-controller";
- reg = <0x20000 0x1000>;
- cache-line-size = <32>; // 32 bytes
- cache-size = <0x80000>; // L2,512K
- interrupt-parent = <&mpic>;
- interrupts = <16 2>;
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
+ dma@c300 {
+ status = "disabled";
};
usb@22000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl-usb2-dr";
- reg = <0x22000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <28 0x2>;
phy_type = "ulpi";
};
mdio@24520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x24520 0x20>;
phy0: ethernet-phy@0 {
interrupt-parent = <&mpic>;
@@ -245,29 +125,21 @@
};
mdio@25520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x26520 0x20>;
-
tbi0: tbi-phy@11 {
reg = <0x11>;
device_type = "tbi-phy";
};
};
+ mdio@26520 {
+ status = "disabled";
+ };
+
+ enet0: ethernet@24000 {
+ status = "disabled";
+ };
+
enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 2 36 2 40 2>;
- interrupt-parent = <&mpic>;
tbi-handle = <&tbi0>;
phy-handle = <&phy0>;
phy-connection-type = "sgmii";
@@ -275,49 +147,12 @@
};
enet2: ethernet@26000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <2>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x26000 0x1000>;
- ranges = <0x0 0x26000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <31 2 32 2 33 2>;
- interrupt-parent = <&mpic>;
phy-handle = <&phy1>;
phy-connection-type = "rgmii-id";
};
- sdhci@2e000 {
- compatible = "fsl,p2020-esdhc", "fsl,esdhc";
- reg = <0x2e000 0x1000>;
- interrupts = <72 0x2>;
- interrupt-parent = <&mpic>;
- /* Filled in by U-Boot */
- clock-frequency = <0>;
- };
-
- crypto@30000 {
- compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
- "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <45 2 58 2>;
- interrupt-parent = <&mpic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0xbfe>;
- fsl,descriptor-types-mask = <0x3ab0ebf>;
- };
mpic: pic@40000 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
protected-sources = <
42 76 77 78 79 /* serial1 , dma2 */
29 30 34 26 /* enet0, pci1 */
@@ -326,26 +161,28 @@
>;
};
- global-utilities@e0000 {
- compatible = "fsl,p2020-guts";
- reg = <0xe0000 0x1000>;
- fsl,has-rstcr;
+ msi@41600 {
+ status = "disabled";
};
+
+
};
- pci0: pcie@ffe09000 {
- compatible = "fsl,mpc8548-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0 0xffe09000 0 0x1000>;
- bus-range = <0 255>;
+ pci0: pcie@ffe08000 {
+ status = "disabled";
+ };
+
+ pci1: pcie@ffe09000 {
ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <25 2>;
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ 0000 0x0 0x0 0x1 &mpic 0x4 0x1
+ 0000 0x0 0x0 0x2 &mpic 0x5 0x1
+ 0000 0x0 0x0 0x3 &mpic 0x6 0x1
+ 0000 0x0 0x0 0x4 &mpic 0x7 0x1
+ >;
pcie@0 {
reg = <0x0 0x0 0x0 0x0 0x0>;
#size-cells = <2>;
@@ -360,4 +197,8 @@
0x0 0x100000>;
};
};
+
+ pci2: pcie@ffe0a000 {
+ status = "disabled";
+ };
};
diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
index 7a31d46c01b..261c34ba45e 100644
--- a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
+++ b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
@@ -15,27 +15,21 @@
* option) any later version.
*/
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
/ {
- model = "fsl,P2020";
+ model = "fsl,P2020RDB";
compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP";
- #address-cells = <2>;
- #size-cells = <2>;
aliases {
ethernet0 = &enet0;
- serial0 = &serial0;
+ serial0 = &serial1;
pci1 = &pci1;
};
cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,P2020@1 {
- device_type = "cpu";
- reg = <0x1>;
- next-level-cache = <&L2>;
+ PowerPC,P2020@0 {
+ status = "disabled";
};
};
@@ -43,20 +37,37 @@
device_type = "memory";
};
+ localbus@ffe05000 {
+ status = "disabled";
+ };
+
soc@ffe00000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "fsl,p2020-immr", "simple-bus";
- ranges = <0x0 0x0 0xffe00000 0x100000>;
- bus-frequency = <0>; // Filled out by uboot.
-
- serial0: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
+ ecm-law@0 {
+ status = "disabled";
+ };
+
+ ecm@1000 {
+ status = "disabled";
+ };
+
+ memory-controller@2000 {
+ status = "disabled";
+ };
+
+ i2c@3000 {
+ status = "disabled";
+ };
+
+ i2c@3100 {
+ status = "disabled";
+ };
+
+ serial0: serial@4500 {
+ status = "disabled";
+ };
+
+ spi@7000 {
+ status = "disabled";
};
dma@c300 {
@@ -96,6 +107,10 @@
};
};
+ gpio: gpio-controller@f000 {
+ status = "disabled";
+ };
+
L2: l2-cache-controller@20000 {
compatible = "fsl,p2020-l2-cache-controller";
reg = <0x20000 0x1000>;
@@ -104,31 +119,49 @@
interrupt-parent = <&mpic>;
};
+ dma@21300 {
+ status = "disabled";
+ };
+
+ usb@22000 {
+ status = "disabled";
+ };
+
+ mdio@24520 {
+ status = "disabled";
+ };
+
+ mdio@25520 {
+ status = "disabled";
+ };
+
+ mdio@26520 {
+ status = "disabled";
+ };
enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <29 2 30 2 34 2>;
- interrupt-parent = <&mpic>;
fixed-link = <1 1 1000 0 0>;
phy-connection-type = "rgmii-id";
};
+ enet1: ethernet@25000 {
+ status = "disabled";
+ };
+
+ enet2: ethernet@26000 {
+ status = "disabled";
+ };
+
+ sdhci@2e000 {
+ status = "disabled";
+ };
+
+ crypto@30000 {
+ status = "disabled";
+ };
+
mpic: pic@40000 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
protected-sources = <
17 18 43 42 59 47 /*ecm, mem, i2c, serial0, spi,gpio */
16 20 21 22 23 28 /* L2, dma1, USB */
@@ -152,21 +185,32 @@
0xe7 0>;
interrupt-parent = <&mpic>;
};
+
+ global-utilities@e0000 { //global utilities block
+ status = "disabled";
+ };
+
};
- pci1: pcie@ffe0a000 {
- compatible = "fsl,mpc8548-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0 0xffe0a000 0 0x1000>;
- bus-range = <0 255>;
+ pci0: pcie@ffe08000 {
+ status = "disabled";
+ };
+
+ pci1: pcie@ffe09000 {
+ status = "disabled";
+ };
+
+ pci2: pcie@ffe0a000 {
ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <26 2>;
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ 0000 0x0 0x0 0x1 &mpic 0x0 0x1
+ 0000 0x0 0x0 0x2 &mpic 0x1 0x1
+ 0000 0x0 0x0 0x3 &mpic 0x2 0x1
+ 0000 0x0 0x0 0x4 &mpic 0x3 0x1
+ >;
pcie@0 {
reg = <0x0 0x0 0x0 0x0 0x0>;
#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p2020si.dtsi b/arch/powerpc/boot/dts/p2020si.dtsi
new file mode 100644
index 00000000000..6def17f265d
--- /dev/null
+++ b/arch/powerpc/boot/dts/p2020si.dtsi
@@ -0,0 +1,382 @@
+/*
+ * P2020 Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+/ {
+ compatible = "fsl,P2020";
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ PowerPC,P2020@0 {
+ device_type = "cpu";
+ reg = <0x0>;
+ next-level-cache = <&L2>;
+ };
+
+ PowerPC,P2020@1 {
+ device_type = "cpu";
+ reg = <0x1>;
+ next-level-cache = <&L2>;
+ };
+ };
+
+ localbus@ffe05000 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "fsl,p2020-elbc", "fsl,elbc", "simple-bus";
+ reg = <0 0xffe05000 0 0x1000>;
+ interrupts = <19 2>;
+ interrupt-parent = <&mpic>;
+ };
+
+ soc@ffe00000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ device_type = "soc";
+ compatible = "fsl,p2020-immr", "simple-bus";
+ ranges = <0x0 0x0 0xffe00000 0x100000>;
+ bus-frequency = <0>; // Filled out by uboot.
+
+ ecm-law@0 {
+ compatible = "fsl,ecm-law";
+ reg = <0x0 0x1000>;
+ fsl,num-laws = <12>;
+ };
+
+ ecm@1000 {
+ compatible = "fsl,p2020-ecm", "fsl,ecm";
+ reg = <0x1000 0x1000>;
+ interrupts = <17 2>;
+ interrupt-parent = <&mpic>;
+ };
+
+ memory-controller@2000 {
+ compatible = "fsl,p2020-memory-controller";
+ reg = <0x2000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <18 2>;
+ };
+
+ i2c@3000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ cell-index = <0>;
+ compatible = "fsl-i2c";
+ reg = <0x3000 0x100>;
+ interrupts = <43 2>;
+ interrupt-parent = <&mpic>;
+ dfsrr;
+ };
+
+ i2c@3100 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ cell-index = <1>;
+ compatible = "fsl-i2c";
+ reg = <0x3100 0x100>;
+ interrupts = <43 2>;
+ interrupt-parent = <&mpic>;
+ dfsrr;
+ };
+
+ serial0: serial@4500 {
+ cell-index = <0>;
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0x4500 0x100>;
+ clock-frequency = <0>;
+ interrupts = <42 2>;
+ interrupt-parent = <&mpic>;
+ };
+
+ serial1: serial@4600 {
+ cell-index = <1>;
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0x4600 0x100>;
+ clock-frequency = <0>;
+ interrupts = <42 2>;
+ interrupt-parent = <&mpic>;
+ };
+
+ spi@7000 {
+ cell-index = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,espi";
+ reg = <0x7000 0x1000>;
+ interrupts = <59 0x2>;
+ interrupt-parent = <&mpic>;
+ mode = "cpu";
+ };
+
+ dma@c300 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,eloplus-dma";
+ reg = <0xc300 0x4>;
+ ranges = <0x0 0xc100 0x200>;
+ cell-index = <1>;
+ dma-channel@0 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x0 0x80>;
+ cell-index = <0>;
+ interrupt-parent = <&mpic>;
+ interrupts = <76 2>;
+ };
+ dma-channel@80 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x80 0x80>;
+ cell-index = <1>;
+ interrupt-parent = <&mpic>;
+ interrupts = <77 2>;
+ };
+ dma-channel@100 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x100 0x80>;
+ cell-index = <2>;
+ interrupt-parent = <&mpic>;
+ interrupts = <78 2>;
+ };
+ dma-channel@180 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x180 0x80>;
+ cell-index = <3>;
+ interrupt-parent = <&mpic>;
+ interrupts = <79 2>;
+ };
+ };
+
+ gpio: gpio-controller@f000 {
+ #gpio-cells = <2>;
+ compatible = "fsl,mpc8572-gpio";
+ reg = <0xf000 0x100>;
+ interrupts = <47 0x2>;
+ interrupt-parent = <&mpic>;
+ gpio-controller;
+ };
+
+ L2: l2-cache-controller@20000 {
+ compatible = "fsl,p2020-l2-cache-controller";
+ reg = <0x20000 0x1000>;
+ cache-line-size = <32>; // 32 bytes
+ cache-size = <0x80000>; // L2,512K
+ interrupt-parent = <&mpic>;
+ interrupts = <16 2>;
+ };
+
+ dma@21300 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,eloplus-dma";
+ reg = <0x21300 0x4>;
+ ranges = <0x0 0x21100 0x200>;
+ cell-index = <0>;
+ dma-channel@0 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x0 0x80>;
+ cell-index = <0>;
+ interrupt-parent = <&mpic>;
+ interrupts = <20 2>;
+ };
+ dma-channel@80 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x80 0x80>;
+ cell-index = <1>;
+ interrupt-parent = <&mpic>;
+ interrupts = <21 2>;
+ };
+ dma-channel@100 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x100 0x80>;
+ cell-index = <2>;
+ interrupt-parent = <&mpic>;
+ interrupts = <22 2>;
+ };
+ dma-channel@180 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x180 0x80>;
+ cell-index = <3>;
+ interrupt-parent = <&mpic>;
+ interrupts = <23 2>;
+ };
+ };
+
+ usb@22000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl-usb2-dr";
+ reg = <0x22000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <28 0x2>;
+ };
+
+ mdio@24520 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,gianfar-mdio";
+ reg = <0x24520 0x20>;
+ };
+
+ mdio@25520 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,gianfar-tbi";
+ reg = <0x26520 0x20>;
+ };
+
+ mdio@26520 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,gianfar-tbi";
+ reg = <0x520 0x20>;
+ };
+
+ enet0: ethernet@24000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ cell-index = <0>;
+ device_type = "network";
+ model = "eTSEC";
+ compatible = "gianfar";
+ reg = <0x24000 0x1000>;
+ ranges = <0x0 0x24000 0x1000>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ interrupts = <29 2 30 2 34 2>;
+ interrupt-parent = <&mpic>;
+ };
+
+ enet1: ethernet@25000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ cell-index = <1>;
+ device_type = "network";
+ model = "eTSEC";
+ compatible = "gianfar";
+ reg = <0x25000 0x1000>;
+ ranges = <0x0 0x25000 0x1000>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ interrupts = <35 2 36 2 40 2>;
+ interrupt-parent = <&mpic>;
+
+ };
+
+ enet2: ethernet@26000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ cell-index = <2>;
+ device_type = "network";
+ model = "eTSEC";
+ compatible = "gianfar";
+ reg = <0x26000 0x1000>;
+ ranges = <0x0 0x26000 0x1000>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ interrupts = <31 2 32 2 33 2>;
+ interrupt-parent = <&mpic>;
+
+ };
+
+ sdhci@2e000 {
+ compatible = "fsl,p2020-esdhc", "fsl,esdhc";
+ reg = <0x2e000 0x1000>;
+ interrupts = <72 0x2>;
+ interrupt-parent = <&mpic>;
+ /* Filled in by U-Boot */
+ clock-frequency = <0>;
+ };
+
+ crypto@30000 {
+ compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
+ "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
+ reg = <0x30000 0x10000>;
+ interrupts = <45 2 58 2>;
+ interrupt-parent = <&mpic>;
+ fsl,num-channels = <4>;
+ fsl,channel-fifo-len = <24>;
+ fsl,exec-units-mask = <0xbfe>;
+ fsl,descriptor-types-mask = <0x3ab0ebf>;
+ };
+
+ mpic: pic@40000 {
+ interrupt-controller;
+ #address-cells = <0>;
+ #interrupt-cells = <2>;
+ reg = <0x40000 0x40000>;
+ compatible = "chrp,open-pic";
+ device_type = "open-pic";
+ };
+
+ msi@41600 {
+ compatible = "fsl,p2020-msi", "fsl,mpic-msi";
+ reg = <0x41600 0x80>;
+ msi-available-ranges = <0 0x100>;
+ interrupts = <
+ 0xe0 0
+ 0xe1 0
+ 0xe2 0
+ 0xe3 0
+ 0xe4 0
+ 0xe5 0
+ 0xe6 0
+ 0xe7 0>;
+ interrupt-parent = <&mpic>;
+ };
+
+ global-utilities@e0000 { //global utilities block
+ compatible = "fsl,p2020-guts";
+ reg = <0xe0000 0x1000>;
+ fsl,has-rstcr;
+ };
+ };
+
+ pci0: pcie@ffe08000 {
+ compatible = "fsl,mpc8548-pcie";
+ device_type = "pci";
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ reg = <0 0xffe08000 0 0x1000>;
+ bus-range = <0 255>;
+ clock-frequency = <33333333>;
+ interrupt-parent = <&mpic>;
+ interrupts = <24 2>;
+ };
+
+ pci1: pcie@ffe09000 {
+ compatible = "fsl,mpc8548-pcie";
+ device_type = "pci";
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ reg = <0 0xffe09000 0 0x1000>;
+ bus-range = <0 255>;
+ clock-frequency = <33333333>;
+ interrupt-parent = <&mpic>;
+ interrupts = <25 2>;
+ };
+
+ pci2: pcie@ffe0a000 {
+ compatible = "fsl,mpc8548-pcie";
+ device_type = "pci";
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ reg = <0 0xffe0a000 0 0x1000>;
+ bus-range = <0 255>;
+ clock-frequency = <33333333>;
+ interrupt-parent = <&mpic>;
+ interrupts = <26 2>;
+ };
+};
diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts
index 5b7fc29dd6c..927f94d16e9 100644
--- a/arch/powerpc/boot/dts/p4080ds.dts
+++ b/arch/powerpc/boot/dts/p4080ds.dts
@@ -1,7 +1,7 @@
/*
* P4080DS Device Tree Source
*
- * Copyright 2009 Freescale Semiconductor Inc.
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -33,6 +33,17 @@
dma1 = &dma1;
sdhc = &sdhc;
+ crypto = &crypto;
+ sec_jr0 = &sec_jr0;
+ sec_jr1 = &sec_jr1;
+ sec_jr2 = &sec_jr2;
+ sec_jr3 = &sec_jr3;
+ rtic_a = &rtic_a;
+ rtic_b = &rtic_b;
+ rtic_c = &rtic_c;
+ rtic_d = &rtic_d;
+ sec_mon = &sec_mon;
+
rio0 = &rapidio0;
};
@@ -410,6 +421,79 @@
dr_mode = "host";
phy_type = "ulpi";
};
+
+ crypto: crypto@300000 {
+ compatible = "fsl,sec-v4.0";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x300000 0x10000>;
+ ranges = <0 0x300000 0x10000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <92 2>;
+
+ sec_jr0: jr@1000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x1000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <88 2>;
+ };
+
+ sec_jr1: jr@2000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x2000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <89 2>;
+ };
+
+ sec_jr2: jr@3000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x3000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <90 2>;
+ };
+
+ sec_jr3: jr@4000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x4000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <91 2>;
+ };
+
+ rtic@6000 {
+ compatible = "fsl,sec-v4.0-rtic";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x6000 0x100>;
+ ranges = <0x0 0x6100 0xe00>;
+
+ rtic_a: rtic-a@0 {
+ compatible = "fsl,sec-v4.0-rtic-memory";
+ reg = <0x00 0x20 0x100 0x80>;
+ };
+
+ rtic_b: rtic-b@20 {
+ compatible = "fsl,sec-v4.0-rtic-memory";
+ reg = <0x20 0x20 0x200 0x80>;
+ };
+
+ rtic_c: rtic-c@40 {
+ compatible = "fsl,sec-v4.0-rtic-memory";
+ reg = <0x40 0x20 0x300 0x80>;
+ };
+
+ rtic_d: rtic-d@60 {
+ compatible = "fsl,sec-v4.0-rtic-memory";
+ reg = <0x60 0x20 0x500 0x80>;
+ };
+ };
+ };
+
+ sec_mon: sec_mon@314000 {
+ compatible = "fsl,sec-v4.0-mon";
+ reg = <0x314000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <93 2>;
+ };
};
rapidio0: rapidio@ffe0c0000 {
diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c
new file mode 100644
index 00000000000..06c1961bd12
--- /dev/null
+++ b/arch/powerpc/boot/epapr.c
@@ -0,0 +1,66 @@
+/*
+ * Bootwrapper for ePAPR compliant firmwares
+ *
+ * Copyright 2010 David Gibson <david@gibson.dropbear.id.au>, IBM Corporation.
+ *
+ * Based on earlier bootwrappers by:
+ * (c) Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp,\
+ * and
+ * Scott Wood <scottwood@freescale.com>
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "io.h"
+#include <libfdt.h>
+
+BSS_STACK(4096);
+
+#define EPAPR_SMAGIC 0x65504150
+#define EPAPR_EMAGIC 0x45504150
+
+static unsigned epapr_magic;
+static unsigned long ima_size;
+static unsigned long fdt_addr;
+
+static void platform_fixups(void)
+{
+ if ((epapr_magic != EPAPR_EMAGIC)
+ && (epapr_magic != EPAPR_SMAGIC))
+ fatal("r6 contained 0x%08x instead of ePAPR magic number\n",
+ epapr_magic);
+
+ if (ima_size < (unsigned long)_end)
+ printf("WARNING: Image loaded outside IMA!"
+ " (_end=%p, ima_size=0x%lx)\n", _end, ima_size);
+ if (ima_size < fdt_addr)
+ printf("WARNING: Device tree address is outside IMA!"
+ "(fdt_addr=0x%lx, ima_size=0x%lx)\n", fdt_addr,
+ ima_size);
+ if (ima_size < fdt_addr + fdt_totalsize((void *)fdt_addr))
+ printf("WARNING: Device tree extends outside IMA!"
+ " (fdt_addr=0x%lx, size=0x%x, ima_size=0x%lx\n",
+ fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+ unsigned long r6, unsigned long r7)
+{
+ epapr_magic = r6;
+ ima_size = r7;
+ fdt_addr = r3;
+
+ /* FIXME: we should process reserve entries */
+
+ simple_alloc_init(_end, ima_size - (unsigned long)_end, 32, 64);
+
+ fdt_init((void *)fdt_addr);
+
+ serial_console_init();
+ platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index cb97e7511d7..c74531af72c 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -39,6 +39,7 @@ dts=
cacheit=
binary=
gzip=.gz
+pie=
# cross-compilation prefix
CROSS=
@@ -157,9 +158,10 @@ pmac|chrp)
platformo=$object/of.o
;;
coff)
- platformo=$object/of.o
+ platformo="$object/crt0.o $object/of.o"
lds=$object/zImage.coff.lds
link_address='0x500000'
+ pie=
;;
miboot|uboot)
# miboot and U-boot want just the bare bits, not an ELF binary
@@ -208,6 +210,7 @@ ps3)
ksection=.kernel:vmlinux.bin
isection=.kernel:initrd
link_address=''
+ pie=
;;
ep88xc|ep405|ep8248e)
platformo="$object/fixed-head.o $object/$platform.o"
@@ -244,6 +247,10 @@ gamecube|wii)
treeboot-iss4xx-mpic)
platformo="$object/treeboot-iss4xx.o"
;;
+epapr)
+ link_address='0x20000000'
+ pie=-pie
+ ;;
esac
vmz="$tmpdir/`basename \"$kernel\"`.$ext"
@@ -251,7 +258,7 @@ if [ -z "$cacheit" -o ! -f "$vmz$gzip" -o "$vmz$gzip" -ot "$kernel" ]; then
${CROSS}objcopy $objflags "$kernel" "$vmz.$$"
if [ -n "$gzip" ]; then
- gzip -f -9 "$vmz.$$"
+ gzip -n -f -9 "$vmz.$$"
fi
if [ -n "$cacheit" ]; then
@@ -310,9 +317,9 @@ fi
if [ "$platform" != "miboot" ]; then
if [ -n "$link_address" ] ; then
- text_start="-Ttext $link_address --defsym _start=$link_address"
+ text_start="-Ttext $link_address"
fi
- ${CROSS}ld -m elf32ppc -T $lds $text_start -o "$ofile" \
+ ${CROSS}ld -m elf32ppc -T $lds $text_start $pie -o "$ofile" \
$platformo $tmp $object/wrapper.a
rm $tmp
fi
@@ -336,7 +343,7 @@ coff)
$objbin/hack-coff "$ofile"
;;
cuboot*)
- gzip -f -9 "$ofile"
+ gzip -n -f -9 "$ofile"
${MKIMAGE} -A ppc -O linux -T kernel -C gzip -a "$base" -e "$entry" \
$uboot_version -d "$ofile".gz "$ofile"
;;
@@ -383,6 +390,6 @@ ps3)
odir="$(dirname "$ofile.bin")"
rm -f "$odir/otheros.bld"
- gzip --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld"
+ gzip -n --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld"
;;
esac
diff --git a/arch/powerpc/boot/zImage.coff.lds.S b/arch/powerpc/boot/zImage.coff.lds.S
index 856dc78b14e..de4c9e3c934 100644
--- a/arch/powerpc/boot/zImage.coff.lds.S
+++ b/arch/powerpc/boot/zImage.coff.lds.S
@@ -3,13 +3,13 @@ ENTRY(_zimage_start_opd)
EXTERN(_zimage_start_opd)
SECTIONS
{
- _start = .;
.text :
{
+ _start = .;
*(.text)
*(.fixup)
+ _etext = .;
}
- _etext = .;
. = ALIGN(4096);
.data :
{
@@ -17,9 +17,7 @@ SECTIONS
*(.data*)
*(__builtin_*)
*(.sdata*)
- __got2_start = .;
*(.got2)
- __got2_end = .;
_dtb_start = .;
*(.kernel:dtb)
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 0962d62bdb5..2bd8731f136 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -3,49 +3,64 @@ ENTRY(_zimage_start)
EXTERN(_zimage_start)
SECTIONS
{
- _start = .;
.text :
{
+ _start = .;
*(.text)
*(.fixup)
+ _etext = .;
}
- _etext = .;
. = ALIGN(4096);
.data :
{
*(.rodata*)
*(.data*)
*(.sdata*)
- __got2_start = .;
*(.got2)
- __got2_end = .;
}
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .dynamic :
+ {
+ __dynamic_start = .;
+ *(.dynamic)
+ }
+ .hash : { *(.hash) }
+ .interp : { *(.interp) }
+ .rela.dyn : { *(.rela*) }
. = ALIGN(8);
- _dtb_start = .;
- .kernel:dtb : { *(.kernel:dtb) }
- _dtb_end = .;
-
- . = ALIGN(4096);
- _vmlinux_start = .;
- .kernel:vmlinux.strip : { *(.kernel:vmlinux.strip) }
- _vmlinux_end = .;
+ .kernel:dtb :
+ {
+ _dtb_start = .;
+ *(.kernel:dtb)
+ _dtb_end = .;
+ }
. = ALIGN(4096);
- _initrd_start = .;
- .kernel:initrd : { *(.kernel:initrd) }
- _initrd_end = .;
+ .kernel:vmlinux.strip :
+ {
+ _vmlinux_start = .;
+ *(.kernel:vmlinux.strip)
+ _vmlinux_end = .;
+ }
. = ALIGN(4096);
- _edata = .;
+ .kernel:initrd :
+ {
+ _initrd_start = .;
+ *(.kernel:initrd)
+ _initrd_end = .;
+ }
. = ALIGN(4096);
- __bss_start = .;
.bss :
{
- *(.sbss)
- *(.bss)
+ _edata = .;
+ __bss_start = .;
+ *(.sbss)
+ *(.bss)
+ *(COMMON)
+ _end = . ;
}
- . = ALIGN(4096);
- _end = . ;
}
diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
index c683bce4c26..126ef1b08a0 100644
--- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
@@ -104,7 +104,6 @@ CONFIG_ROOT_NFS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
index a721cd3d793..abcf00ad939 100644
--- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
@@ -101,7 +101,6 @@ CONFIG_ROOT_NFS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
index 55e0725500d..11662c217ac 100644
--- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
@@ -58,7 +58,6 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
index d724095530a..ebe9b30b072 100644
--- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
@@ -59,7 +59,6 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
index 4b44beaa21a..eb25229b387 100644
--- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
+++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
@@ -63,7 +63,6 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
index b614508d6fd..f51c7ebc181 100644
--- a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
+++ b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
@@ -168,7 +168,6 @@ CONFIG_MAC_PARTITION=y
CONFIG_CRC_T10DIF=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_DEBUG_INFO=y
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig
index f9e6a3ea5a6..2a84fd7f631 100644
--- a/arch/powerpc/configs/c2k_defconfig
+++ b/arch/powerpc/configs/c2k_defconfig
@@ -132,8 +132,8 @@ CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_CLS_IND=y
CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
CONFIG_BT_RFCOMM=m
CONFIG_BT_RFCOMM_TTY=y
CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/e55xx_smp_defconfig b/arch/powerpc/configs/e55xx_smp_defconfig
index 9fa1613e5e2..d32283555b5 100644
--- a/arch/powerpc/configs/e55xx_smp_defconfig
+++ b/arch/powerpc/configs/e55xx_smp_defconfig
@@ -6,10 +6,10 @@ CONFIG_NR_CPUS=2
CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SPARSE_IRQ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSFS_DEPRECATED_V2=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
@@ -25,8 +25,32 @@ CONFIG_P5020_DS=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BINFMT_MISC=m
-CONFIG_SPARSE_IRQ=y
# CONFIG_PCI is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NET_IPIP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_ARPD=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_IPV6=y
+CONFIG_IP_SCTP=m
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
@@ -34,6 +58,9 @@ CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=131072
CONFIG_MISC_DEVICES=y
CONFIG_EEPROM_LEGACY=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+CONFIG_NET_ETHERNET=y
CONFIG_INPUT_FF_MEMLESS=m
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
@@ -64,22 +91,14 @@ CONFIG_NLS=y
CONFIG_NLS_UTF8=m
CONFIG_CRC_T10DIF=y
CONFIG_CRC_ITU_T=m
-CONFIG_LIBCRC32C=m
CONFIG_FRAME_WARN=1024
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_DEBUG_INFO=y
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_VIRQ_DEBUG=y
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_SHA1=m
-CONFIG_CRYPTO_DES=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig
index c06a86c3309..96b89df7752 100644
--- a/arch/powerpc/configs/mpc85xx_defconfig
+++ b/arch/powerpc/configs/mpc85xx_defconfig
@@ -204,7 +204,6 @@ CONFIG_CRC_T10DIF=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_DEBUG_INFO=y
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
index 942ced90557..de65841aa04 100644
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig
@@ -206,7 +206,6 @@ CONFIG_CRC_T10DIF=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_DEBUG_INFO=y
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/mpc86xx_defconfig b/arch/powerpc/configs/mpc86xx_defconfig
index 038a308cbfc..a1cc8179e9f 100644
--- a/arch/powerpc/configs/mpc86xx_defconfig
+++ b/arch/powerpc/configs/mpc86xx_defconfig
@@ -171,7 +171,6 @@ CONFIG_MAC_PARTITION=y
CONFIG_CRC_T10DIF=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_DEBUG_INFO=y
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index ac4fc41035f..f8b394a76ac 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -112,8 +112,8 @@ CONFIG_IRDA_CACHE_LAST_LSAP=y
CONFIG_IRDA_FAST_RR=y
CONFIG_IRTTY_SIR=m
CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
CONFIG_BT_RFCOMM=m
CONFIG_BT_RFCOMM_TTY=y
CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 0a10fb009ef..214208924a9 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -351,8 +351,8 @@ CONFIG_VLSI_FIR=m
CONFIG_VIA_FIR=m
CONFIG_MCS_FIR=m
CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
CONFIG_BT_RFCOMM=m
CONFIG_BT_RFCOMM_TTY=y
CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index caba919f65d..6472322bf13 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -52,8 +52,8 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_INET_DIAG is not set
CONFIG_IPV6=y
CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
CONFIG_BT_RFCOMM=m
CONFIG_BT_RFCOMM_TTY=y
CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 249ddd0a27c..7de13865508 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -146,12 +146,18 @@ CONFIG_SCSI_MULTI_LUN=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_FC_ATTRS=y
CONFIG_SCSI_SAS_ATTRS=m
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
CONFIG_SCSI_IBMVSCSI=y
CONFIG_SCSI_IBMVFC=m
CONFIG_SCSI_SYM53C8XX_2=y
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
CONFIG_SCSI_IPR=y
CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
CONFIG_SCSI_LPFC=m
CONFIG_ATA=y
# CONFIG_ATA_SFF is not set
@@ -197,6 +203,8 @@ CONFIG_S2IO=m
CONFIG_MYRI10GE=m
CONFIG_NETXEN_NIC=m
CONFIG_MLX4_EN=m
+CONFIG_QLGE=m
+CONFIG_BE2NET=m
CONFIG_PPP=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 1833d1a07e7..c0d842cfd01 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -157,6 +157,7 @@ extern const char *powerpc_base_platform;
#define CPU_FTR_476_DD2 ASM_CONST(0x0000000000010000)
#define CPU_FTR_NEED_COHERENT ASM_CONST(0x0000000000020000)
#define CPU_FTR_NO_BTIC ASM_CONST(0x0000000000040000)
+#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x0000000000080000)
#define CPU_FTR_NODSISRALIGN ASM_CONST(0x0000000000100000)
#define CPU_FTR_PPC_LE ASM_CONST(0x0000000000200000)
#define CPU_FTR_REAL_LE ASM_CONST(0x0000000000400000)
@@ -178,22 +179,18 @@ extern const char *powerpc_base_platform;
#define LONG_ASM_CONST(x) 0
#endif
-#define CPU_FTR_SLB LONG_ASM_CONST(0x0000000100000000)
-#define CPU_FTR_16M_PAGE LONG_ASM_CONST(0x0000000200000000)
-#define CPU_FTR_TLBIEL LONG_ASM_CONST(0x0000000400000000)
+
+#define CPU_FTR_HVMODE_206 LONG_ASM_CONST(0x0000000800000000)
+#define CPU_FTR_CFAR LONG_ASM_CONST(0x0000001000000000)
#define CPU_FTR_IABR LONG_ASM_CONST(0x0000002000000000)
#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000004000000000)
#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000008000000000)
#define CPU_FTR_SMT LONG_ASM_CONST(0x0000010000000000)
-#define CPU_FTR_LOCKLESS_TLBIE LONG_ASM_CONST(0x0000040000000000)
-#define CPU_FTR_CI_LARGE_PAGE LONG_ASM_CONST(0x0000100000000000)
#define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000200000000000)
#define CPU_FTR_PURR LONG_ASM_CONST(0x0000400000000000)
#define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000800000000000)
#define CPU_FTR_SPURR LONG_ASM_CONST(0x0001000000000000)
#define CPU_FTR_DSCR LONG_ASM_CONST(0x0002000000000000)
-#define CPU_FTR_1T_SEGMENT LONG_ASM_CONST(0x0004000000000000)
-#define CPU_FTR_NO_SLBIE_B LONG_ASM_CONST(0x0008000000000000)
#define CPU_FTR_VSX LONG_ASM_CONST(0x0010000000000000)
#define CPU_FTR_SAO LONG_ASM_CONST(0x0020000000000000)
#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000)
@@ -202,12 +199,14 @@ extern const char *powerpc_base_platform;
#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0200000000000000)
#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0400000000000000)
#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0800000000000000)
+#define CPU_FTR_ICSWX LONG_ASM_CONST(0x1000000000000000)
#ifndef __ASSEMBLY__
-#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_SLB | \
- CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \
- CPU_FTR_NODSISRALIGN | CPU_FTR_16M_PAGE)
+#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN)
+
+#define MMU_FTR_PPCAS_ARCH_V2 (MMU_FTR_SLB | MMU_FTR_TLBIEL | \
+ MMU_FTR_16M_PAGE)
/* We only set the altivec features if the kernel was compiled with altivec
* support
@@ -387,7 +386,8 @@ extern const char *powerpc_base_platform;
CPU_FTR_DBELL)
#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
- CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
+ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+ CPU_FTR_DEBUG_LVL_EXC)
#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
/* 64-bit CPUs */
@@ -407,44 +407,45 @@ extern const char *powerpc_base_platform;
#define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
- CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
- CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS | \
- CPU_FTR_POPCNTB)
+ CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \
+ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
- CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
+ CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
- CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
+ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR)
#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
- CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_HVMODE_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
- CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
+ CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
- CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
+ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+ CPU_FTR_ICSWX | CPU_FTR_CFAR)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
- CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE | \
- CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
+ CPU_FTR_PAUSE_ZERO | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
CPU_FTR_UNALIGNED_LD_STD)
#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
- CPU_FTR_PPCAS_ARCH_V2 | \
- CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \
- CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_NO_SLBIE_B)
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTR_PURR | CPU_FTR_REAL_LE)
#define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
+#define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \
+ CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN)
+
#ifdef __powerpc64__
#ifdef CONFIG_PPC_BOOK3E
-#define CPU_FTRS_POSSIBLE (CPU_FTRS_E5500)
+#define CPU_FTRS_POSSIBLE (CPU_FTRS_E5500 | CPU_FTRS_A2)
#else
#define CPU_FTRS_POSSIBLE \
(CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | \
CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 | \
CPU_FTRS_POWER7 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \
- CPU_FTR_1T_SEGMENT | CPU_FTR_VSX)
+ CPU_FTR_VSX)
#endif
#else
enum {
@@ -487,7 +488,7 @@ enum {
#ifdef __powerpc64__
#ifdef CONFIG_PPC_BOOK3E
-#define CPU_FTRS_ALWAYS (CPU_FTRS_E5500)
+#define CPU_FTRS_ALWAYS (CPU_FTRS_E5500 & CPU_FTRS_A2)
#else
#define CPU_FTRS_ALWAYS \
(CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 & \
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index f71bb4c118b..ce516e5eb0d 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -37,16 +37,16 @@ extern cpumask_t threads_core_mask;
* This can typically be used for things like IPI for tlb invalidations
* since those need to be done only once per core/TLB
*/
-static inline cpumask_t cpu_thread_mask_to_cores(cpumask_t threads)
+static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
{
cpumask_t tmp, res;
int i;
- res = CPU_MASK_NONE;
+ cpumask_clear(&res);
for (i = 0; i < NR_CPUS; i += threads_per_core) {
- cpus_shift_left(tmp, threads_core_mask, i);
- if (cpus_intersects(threads, tmp))
- cpu_set(i, res);
+ cpumask_shift_left(&tmp, &threads_core_mask, i);
+ if (cpumask_intersects(threads, &tmp))
+ cpumask_set_cpu(i, &res);
}
return res;
}
@@ -58,7 +58,7 @@ static inline int cpu_nr_cores(void)
static inline cpumask_t cpu_online_cores_map(void)
{
- return cpu_thread_mask_to_cores(cpu_online_map);
+ return cpu_thread_mask_to_cores(cpu_online_mask);
}
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 0893ab9343a..9c70d0ca96d 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -27,9 +27,8 @@ enum ppc_dbell {
PPC_G_DBELL_MC = 4, /* guest mcheck doorbell */
};
-extern void doorbell_message_pass(int target, int msg);
+extern void doorbell_cause_ipi(int cpu, unsigned long data);
extern void doorbell_exception(struct pt_regs *regs);
-extern void doorbell_check_self(void);
extern void doorbell_setup_this_cpu(void);
static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag)
diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index f0fb4fc1f6e..45921672b97 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -52,6 +52,10 @@ extern struct ppc_emulated {
#ifdef CONFIG_VSX
struct ppc_emulated_entry vsx;
#endif
+#ifdef CONFIG_PPC64
+ struct ppc_emulated_entry mfdscr;
+ struct ppc_emulated_entry mtdscr;
+#endif
} ppc_emulated;
extern u32 ppc_warn_emulated;
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 7778d6f0c87..f5dfe3411f6 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -46,6 +46,7 @@
#define EX_CCR 60
#define EX_R3 64
#define EX_LR 72
+#define EX_CFAR 80
/*
* We're short on space and time in the exception prolog, so we can't
@@ -56,30 +57,40 @@
#define LOAD_HANDLER(reg, label) \
addi reg,reg,(label)-_stext; /* virt addr of handler ... */
-#define EXCEPTION_PROLOG_1(area) \
- mfspr r13,SPRN_SPRG_PACA; /* get paca address into r13 */ \
+/* Exception register prefixes */
+#define EXC_HV H
+#define EXC_STD
+
+#define EXCEPTION_PROLOG_1(area) \
+ GET_PACA(r13); \
std r9,area+EX_R9(r13); /* save r9 - r12 */ \
std r10,area+EX_R10(r13); \
std r11,area+EX_R11(r13); \
std r12,area+EX_R12(r13); \
- mfspr r9,SPRN_SPRG_SCRATCH0; \
+ BEGIN_FTR_SECTION_NESTED(66); \
+ mfspr r10,SPRN_CFAR; \
+ std r10,area+EX_CFAR(r13); \
+ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
+ GET_SCRATCH0(r9); \
std r9,area+EX_R13(r13); \
mfcr r9
-#define EXCEPTION_PROLOG_PSERIES_1(label) \
+#define __EXCEPTION_PROLOG_PSERIES_1(label, h) \
ld r12,PACAKBASE(r13); /* get high part of &label */ \
ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \
- mfspr r11,SPRN_SRR0; /* save SRR0 */ \
+ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
LOAD_HANDLER(r12,label) \
- mtspr SPRN_SRR0,r12; \
- mfspr r12,SPRN_SRR1; /* and SRR1 */ \
- mtspr SPRN_SRR1,r10; \
- rfid; \
+ mtspr SPRN_##h##SRR0,r12; \
+ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
+ mtspr SPRN_##h##SRR1,r10; \
+ h##rfid; \
b . /* prevent speculative execution */
+#define EXCEPTION_PROLOG_PSERIES_1(label, h) \
+ __EXCEPTION_PROLOG_PSERIES_1(label, h)
-#define EXCEPTION_PROLOG_PSERIES(area, label) \
+#define EXCEPTION_PROLOG_PSERIES(area, label, h) \
EXCEPTION_PROLOG_1(area); \
- EXCEPTION_PROLOG_PSERIES_1(label);
+ EXCEPTION_PROLOG_PSERIES_1(label, h);
/*
* The common exception prolog is used for all except a few exceptions
@@ -98,10 +109,11 @@
beq- 1f; \
ld r1,PACAKSAVE(r13); /* kernel stack to use */ \
1: cmpdi cr1,r1,0; /* check if r1 is in userspace */ \
- bge- cr1,2f; /* abort if it is */ \
- b 3f; \
-2: li r1,(n); /* will be reloaded later */ \
+ blt+ cr1,3f; /* abort if it is */ \
+ li r1,(n); /* will be reloaded later */ \
sth r1,PACA_TRAP_SAVE(r13); \
+ std r3,area+EX_R3(r13); \
+ addi r3,r13,area; /* r3 -> where regs are saved*/ \
b bad_stack; \
3: std r9,_CCR(r1); /* save CR in stackframe */ \
std r11,_NIP(r1); /* save SRR0 in stackframe */ \
@@ -123,6 +135,10 @@
std r9,GPR11(r1); \
std r10,GPR12(r1); \
std r11,GPR13(r1); \
+ BEGIN_FTR_SECTION_NESTED(66); \
+ ld r10,area+EX_CFAR(r13); \
+ std r10,ORIG_GPR3(r1); \
+ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
mflr r9; /* save LR in stackframe */ \
std r9,_LINK(r1); \
@@ -143,57 +159,62 @@
/*
* Exception vectors.
*/
-#define STD_EXCEPTION_PSERIES(n, label) \
- . = n; \
+#define STD_EXCEPTION_PSERIES(loc, vec, label) \
+ . = loc; \
.globl label##_pSeries; \
label##_pSeries: \
HMT_MEDIUM; \
- DO_KVM n; \
- mtspr SPRN_SPRG_SCRATCH0,r13; /* save r13 */ \
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
+ DO_KVM vec; \
+ SET_SCRATCH0(r13); /* save r13 */ \
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_STD)
-#define HSTD_EXCEPTION_PSERIES(n, label) \
- . = n; \
- .globl label##_pSeries; \
-label##_pSeries: \
+#define STD_EXCEPTION_HV(loc, vec, label) \
+ . = loc; \
+ .globl label##_hv; \
+label##_hv: \
HMT_MEDIUM; \
- mtspr SPRN_SPRG_SCRATCH0,r20; /* save r20 */ \
- mfspr r20,SPRN_HSRR0; /* copy HSRR0 to SRR0 */ \
- mtspr SPRN_SRR0,r20; \
- mfspr r20,SPRN_HSRR1; /* copy HSRR0 to SRR0 */ \
- mtspr SPRN_SRR1,r20; \
- mfspr r20,SPRN_SPRG_SCRATCH0; /* restore r20 */ \
- mtspr SPRN_SPRG_SCRATCH0,r13; /* save r13 */ \
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
+ DO_KVM vec; \
+ SET_SCRATCH0(r13); /* save r13 */ \
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_HV)
-
-#define MASKABLE_EXCEPTION_PSERIES(n, label) \
- . = n; \
- .globl label##_pSeries; \
-label##_pSeries: \
+#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h) \
HMT_MEDIUM; \
- DO_KVM n; \
- mtspr SPRN_SPRG_SCRATCH0,r13; /* save r13 */ \
- mfspr r13,SPRN_SPRG_PACA; /* get paca address into r13 */ \
+ DO_KVM vec; \
+ SET_SCRATCH0(r13); /* save r13 */ \
+ GET_PACA(r13); \
std r9,PACA_EXGEN+EX_R9(r13); /* save r9, r10 */ \
std r10,PACA_EXGEN+EX_R10(r13); \
lbz r10,PACASOFTIRQEN(r13); \
mfcr r9; \
cmpwi r10,0; \
- beq masked_interrupt; \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
+ beq masked_##h##interrupt; \
+ GET_SCRATCH0(r10); \
std r10,PACA_EXGEN+EX_R13(r13); \
std r11,PACA_EXGEN+EX_R11(r13); \
std r12,PACA_EXGEN+EX_R12(r13); \
ld r12,PACAKBASE(r13); /* get high part of &label */ \
ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \
- mfspr r11,SPRN_SRR0; /* save SRR0 */ \
+ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
LOAD_HANDLER(r12,label##_common) \
- mtspr SPRN_SRR0,r12; \
- mfspr r12,SPRN_SRR1; /* and SRR1 */ \
- mtspr SPRN_SRR1,r10; \
- rfid; \
+ mtspr SPRN_##h##SRR0,r12; \
+ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
+ mtspr SPRN_##h##SRR1,r10; \
+ h##rfid; \
b . /* prevent speculative execution */
+#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h) \
+ __MASKABLE_EXCEPTION_PSERIES(vec, label, h)
+
+#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \
+ . = loc; \
+ .globl label##_pSeries; \
+label##_pSeries: \
+ _MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_STD)
+
+#define MASKABLE_EXCEPTION_HV(loc, vec, label) \
+ . = loc; \
+ .globl label##_hv; \
+label##_hv: \
+ _MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_HV)
#ifdef CONFIG_PPC_ISERIES
#define DISABLE_INTS \
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 921a8470e18..9a67a38bf7b 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -49,7 +49,7 @@ label##5: \
FTR_ENTRY_OFFSET label##2b-label##5b; \
FTR_ENTRY_OFFSET label##3b-label##5b; \
FTR_ENTRY_OFFSET label##4b-label##5b; \
- .ifgt (label##4b-label##3b)-(label##2b-label##1b); \
+ .ifgt (label##4b- label##3b)-(label##2b- label##1b); \
.error "Feature section else case larger than body"; \
.endif; \
.popsection;
@@ -146,6 +146,19 @@ label##5: \
#ifndef __ASSEMBLY__
+#define ASM_FTR_IF(section_if, section_else, msk, val) \
+ stringify_in_c(BEGIN_FTR_SECTION) \
+ section_if "; " \
+ stringify_in_c(FTR_SECTION_ELSE) \
+ section_else "; " \
+ stringify_in_c(ALT_FTR_SECTION_END((msk), (val)))
+
+#define ASM_FTR_IFSET(section_if, section_else, msk) \
+ ASM_FTR_IF(section_if, section_else, (msk), (msk))
+
+#define ASM_FTR_IFCLR(section_if, section_else, msk) \
+ ASM_FTR_IF(section_if, section_else, (msk), 0)
+
#define ASM_MMU_FTR_IF(section_if, section_else, msk, val) \
stringify_in_c(BEGIN_MMU_FTR_SECTION) \
section_if "; " \
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 4ef662e4a31..3a6c586c4e4 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -47,6 +47,7 @@
#define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000)
#define FW_FEATURE_CMO ASM_CONST(0x0000000002000000)
#define FW_FEATURE_VPHN ASM_CONST(0x0000000004000000)
+#define FW_FEATURE_XCMO ASM_CONST(0x0000000008000000)
#ifndef __ASSEMBLY__
@@ -60,7 +61,7 @@ enum {
FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR |
FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
- FW_FEATURE_CMO | FW_FEATURE_VPHN,
+ FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 8edec710cc6..852b8c1c09d 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -102,6 +102,7 @@
#define H_ANDCOND (1UL<<(63-33))
#define H_ICACHE_INVALIDATE (1UL<<(63-40)) /* icbi, etc. (ignored for IO pages) */
#define H_ICACHE_SYNCHRONIZE (1UL<<(63-41)) /* dcbst, icbi, etc (ignored for IO pages */
+#define H_COALESCE_CAND (1UL<<(63-42)) /* page is a good candidate for coalescing */
#define H_ZERO_PAGE (1UL<<(63-48)) /* zero the page before mapping (ignored for IO pages) */
#define H_COPY_PAGE (1UL<<(63-49))
#define H_N (1UL<<(63-61))
@@ -234,6 +235,7 @@
#define H_GET_MPP 0x2D4
#define H_HOME_NODE_ASSOCIATIVITY 0x2EC
#define H_BEST_ENERGY 0x2F4
+#define H_GET_MPP_X 0x314
#define MAX_HCALL_OPCODE H_BEST_ENERGY
#ifndef __ASSEMBLY__
@@ -312,6 +314,16 @@ struct hvcall_mpp_data {
int h_get_mpp(struct hvcall_mpp_data *);
+struct hvcall_mpp_x_data {
+ unsigned long coalesced_bytes;
+ unsigned long pool_coalesced_bytes;
+ unsigned long pool_purr_cycles;
+ unsigned long pool_spurr_cycles;
+ unsigned long reserved[3];
+};
+
+int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data);
+
#ifdef CONFIG_PPC_PSERIES
extern int CMO_PrPSP;
extern int CMO_SecPSP;
diff --git a/arch/powerpc/platforms/cell/io-workarounds.h b/arch/powerpc/include/asm/io-workarounds.h
index 6efc7782ebf..fbae4928692 100644
--- a/arch/powerpc/platforms/cell/io-workarounds.h
+++ b/arch/powerpc/include/asm/io-workarounds.h
@@ -31,7 +31,6 @@ struct iowa_bus {
void *private;
};
-void __devinit io_workaround_init(void);
void __devinit iowa_register_bus(struct pci_controller *, struct ppc_pci_io *,
int (*)(struct iowa_bus *, void *), void *);
struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR);
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 001f2f11c19..45698d55cd6 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -2,6 +2,8 @@
#define _ASM_POWERPC_IO_H
#ifdef __KERNEL__
+#define ARCH_HAS_IOREMAP_WC
+
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -481,10 +483,16 @@ __do_out_asm(_rec_outl, "stwbrx")
_memcpy_fromio(dst,PCI_FIX_ADDR(src),n)
#endif /* !CONFIG_EEH */
-#ifdef CONFIG_PPC_INDIRECT_IO
-#define DEF_PCI_HOOK(x) x
+#ifdef CONFIG_PPC_INDIRECT_PIO
+#define DEF_PCI_HOOK_pio(x) x
+#else
+#define DEF_PCI_HOOK_pio(x) NULL
+#endif
+
+#ifdef CONFIG_PPC_INDIRECT_MMIO
+#define DEF_PCI_HOOK_mem(x) x
#else
-#define DEF_PCI_HOOK(x) NULL
+#define DEF_PCI_HOOK_mem(x) NULL
#endif
/* Structure containing all the hooks */
@@ -504,7 +512,7 @@ extern struct ppc_pci_io {
#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \
static inline ret name at \
{ \
- if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL) \
+ if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL) \
return ppc_pci_io.name al; \
return __do_##name al; \
}
@@ -512,7 +520,7 @@ static inline ret name at \
#define DEF_PCI_AC_NORET(name, at, al, space, aa) \
static inline void name at \
{ \
- if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL) \
+ if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL) \
ppc_pci_io.name al; \
else \
__do_##name al; \
@@ -616,12 +624,13 @@ static inline void iosync(void)
* * ioremap is the standard one and provides non-cacheable guarded mappings
* and can be hooked by the platform via ppc_md
*
- * * ioremap_flags allows to specify the page flags as an argument and can
- * also be hooked by the platform via ppc_md. ioremap_prot is the exact
- * same thing as ioremap_flags.
+ * * ioremap_prot allows to specify the page flags as an argument and can
+ * also be hooked by the platform via ppc_md.
*
* * ioremap_nocache is identical to ioremap
*
+ * * ioremap_wc enables write combining
+ *
* * iounmap undoes such a mapping and can be hooked
*
* * __ioremap_at (and the pending __iounmap_at) are low level functions to
@@ -629,7 +638,7 @@ static inline void iosync(void)
* currently be hooked. Must be page aligned.
*
* * __ioremap is the low level implementation used by ioremap and
- * ioremap_flags and cannot be hooked (but can be used by a hook on one
+ * ioremap_prot and cannot be hooked (but can be used by a hook on one
* of the previous ones)
*
* * __ioremap_caller is the same as above but takes an explicit caller
@@ -640,10 +649,10 @@ static inline void iosync(void)
*
*/
extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
-extern void __iomem *ioremap_flags(phys_addr_t address, unsigned long size,
- unsigned long flags);
+extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
+ unsigned long flags);
+extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
#define ioremap_nocache(addr, size) ioremap((addr), (size))
-#define ioremap_prot(addr, size, prot) ioremap_flags((addr), (size), (prot))
extern void iounmap(volatile void __iomem *addr);
diff --git a/arch/powerpc/include/asm/io_event_irq.h b/arch/powerpc/include/asm/io_event_irq.h
new file mode 100644
index 00000000000..b1a9a1be3c2
--- /dev/null
+++ b/arch/powerpc/include/asm/io_event_irq.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2010, 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_POWERPC_IO_EVENT_IRQ_H
+#define _ASM_POWERPC_IO_EVENT_IRQ_H
+
+#include <linux/types.h>
+#include <linux/notifier.h>
+
+#define PSERIES_IOEI_RPC_MAX_LEN 216
+
+#define PSERIES_IOEI_TYPE_ERR_DETECTED 0x01
+#define PSERIES_IOEI_TYPE_ERR_RECOVERED 0x02
+#define PSERIES_IOEI_TYPE_EVENT 0x03
+#define PSERIES_IOEI_TYPE_RPC_PASS_THRU 0x04
+
+#define PSERIES_IOEI_SUBTYPE_NOT_APP 0x00
+#define PSERIES_IOEI_SUBTYPE_REBALANCE_REQ 0x01
+#define PSERIES_IOEI_SUBTYPE_NODE_ONLINE 0x03
+#define PSERIES_IOEI_SUBTYPE_NODE_OFFLINE 0x04
+#define PSERIES_IOEI_SUBTYPE_DUMP_SIZE_CHANGE 0x05
+#define PSERIES_IOEI_SUBTYPE_TORRENT_IRV_UPDATE 0x06
+#define PSERIES_IOEI_SUBTYPE_TORRENT_HFI_CFGED 0x07
+
+#define PSERIES_IOEI_SCOPE_NOT_APP 0x00
+#define PSERIES_IOEI_SCOPE_RIO_HUB 0x36
+#define PSERIES_IOEI_SCOPE_RIO_BRIDGE 0x37
+#define PSERIES_IOEI_SCOPE_PHB 0x38
+#define PSERIES_IOEI_SCOPE_EADS_GLOBAL 0x39
+#define PSERIES_IOEI_SCOPE_EADS_SLOT 0x3A
+#define PSERIES_IOEI_SCOPE_TORRENT_HUB 0x3B
+#define PSERIES_IOEI_SCOPE_SERVICE_PROC 0x51
+
+/* Platform Event Log Format, Version 6, data portition of IO event section */
+struct pseries_io_event {
+ uint8_t event_type; /* 0x00 IO-Event Type */
+ uint8_t rpc_data_len; /* 0x01 RPC data length */
+ uint8_t scope; /* 0x02 Error/Event Scope */
+ uint8_t event_subtype; /* 0x03 I/O-Event Sub-Type */
+ uint32_t drc_index; /* 0x04 DRC Index */
+ uint8_t rpc_data[PSERIES_IOEI_RPC_MAX_LEN];
+ /* 0x08 RPC Data (0-216 bytes, */
+ /* padded to 4 bytes alignment) */
+};
+
+extern struct atomic_notifier_head pseries_ioei_notifier_list;
+
+#endif /* _ASM_POWERPC_IO_EVENT_IRQ_H */
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 67ab5fb7d15..1bff591f7f7 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -88,9 +88,6 @@ struct irq_host_ops {
/* Dispose of such a mapping */
void (*unmap)(struct irq_host *h, unsigned int virq);
- /* Update of such a mapping */
- void (*remap)(struct irq_host *h, unsigned int virq, irq_hw_number_t hw);
-
/* Translate device-tree interrupt specifier from raw format coming
* from the firmware to a irq_hw_number_t (interrupt line number) and
* type (sense) that can be passed to set_irq_type(). In the absence
@@ -128,19 +125,10 @@ struct irq_host {
struct device_node *of_node;
};
-/* The main irq map itself is an array of NR_IRQ entries containing the
- * associate host and irq number. An entry with a host of NULL is free.
- * An entry can be allocated if it's free, the allocator always then sets
- * hwirq first to the host's invalid irq number and then fills ops.
- */
-struct irq_map_entry {
- irq_hw_number_t hwirq;
- struct irq_host *host;
-};
-
-extern struct irq_map_entry irq_map[NR_IRQS];
-
+struct irq_data;
+extern irq_hw_number_t irqd_to_hwirq(struct irq_data *d);
extern irq_hw_number_t virq_to_hw(unsigned int virq);
+extern bool virq_is_host(unsigned int virq, struct irq_host *host);
/**
* irq_alloc_host - Allocate a new irq_host data structure
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index f54408d995b..8a33698c61b 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -76,7 +76,7 @@ extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
extern cpumask_t cpus_in_sr;
static inline int kexec_sr_activated(int cpu)
{
- return cpu_isset(cpu,cpus_in_sr);
+ return cpumask_test_cpu(cpu, &cpus_in_sr);
}
struct kimage;
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 18ea6963ad7..d2ca5ed3877 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -45,6 +45,114 @@ struct kvm_regs {
__u64 gpr[32];
};
+#define KVM_SREGS_E_IMPL_NONE 0
+#define KVM_SREGS_E_IMPL_FSL 1
+
+#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */
+
+/*
+ * Feature bits indicate which sections of the sregs struct are valid,
+ * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers
+ * corresponding to unset feature bits will not be modified. This allows
+ * restoring a checkpoint made without that feature, while keeping the
+ * default values of the new registers.
+ *
+ * KVM_SREGS_E_BASE contains:
+ * CSRR0/1 (refers to SRR2/3 on 40x)
+ * ESR
+ * DEAR
+ * MCSR
+ * TSR
+ * TCR
+ * DEC
+ * TB
+ * VRSAVE (USPRG0)
+ */
+#define KVM_SREGS_E_BASE (1 << 0)
+
+/*
+ * KVM_SREGS_E_ARCH206 contains:
+ *
+ * PIR
+ * MCSRR0/1
+ * DECAR
+ * IVPR
+ */
+#define KVM_SREGS_E_ARCH206 (1 << 1)
+
+/*
+ * Contains EPCR, plus the upper half of 64-bit registers
+ * that are 32-bit on 32-bit implementations.
+ */
+#define KVM_SREGS_E_64 (1 << 2)
+
+#define KVM_SREGS_E_SPRG8 (1 << 3)
+#define KVM_SREGS_E_MCIVPR (1 << 4)
+
+/*
+ * IVORs are used -- contains IVOR0-15, plus additional IVORs
+ * in combination with an appropriate feature bit.
+ */
+#define KVM_SREGS_E_IVOR (1 << 5)
+
+/*
+ * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG.
+ * Also TLBnPS if MMUCFG[MAVN] = 1.
+ */
+#define KVM_SREGS_E_ARCH206_MMU (1 << 6)
+
+/* DBSR, DBCR, IAC, DAC, DVC */
+#define KVM_SREGS_E_DEBUG (1 << 7)
+
+/* Enhanced debug -- DSRR0/1, SPRG9 */
+#define KVM_SREGS_E_ED (1 << 8)
+
+/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_SPE (1 << 9)
+
+/* External Proxy (EXP) -- EPR */
+#define KVM_SREGS_EXP (1 << 10)
+
+/* External PID (E.PD) -- EPSC/EPLC */
+#define KVM_SREGS_E_PD (1 << 11)
+
+/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PC (1 << 12)
+
+/* Page table (E.PT) -- EPTCFG */
+#define KVM_SREGS_E_PT (1 << 13)
+
+/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PM (1 << 14)
+
+/*
+ * Special updates:
+ *
+ * Some registers may change even while a vcpu is not running.
+ * To avoid losing these changes, by default these registers are
+ * not updated by KVM_SET_SREGS. To force an update, set the bit
+ * in u.e.update_special corresponding to the register to be updated.
+ *
+ * The update_special field is zero on return from KVM_GET_SREGS.
+ *
+ * When restoring a checkpoint, the caller can set update_special
+ * to 0xffffffff to ensure that everything is restored, even new features
+ * that the caller doesn't know about.
+ */
+#define KVM_SREGS_E_UPDATE_MCSR (1 << 0)
+#define KVM_SREGS_E_UPDATE_TSR (1 << 1)
+#define KVM_SREGS_E_UPDATE_DEC (1 << 2)
+#define KVM_SREGS_E_UPDATE_DBSR (1 << 3)
+
+/*
+ * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
+ * previous KVM_GET_REGS.
+ *
+ * Unless otherwise indicated, setting any register with KVM_SET_SREGS
+ * directly sets its value. It does not trigger any special semantics such
+ * as write-one-to-clear. Calling KVM_SET_SREGS on an unmodified struct
+ * just received from KVM_GET_SREGS is always a no-op.
+ */
struct kvm_sregs {
__u32 pvr;
union {
@@ -62,6 +170,82 @@ struct kvm_sregs {
__u64 dbat[8];
} ppc32;
} s;
+ struct {
+ union {
+ struct { /* KVM_SREGS_E_IMPL_FSL */
+ __u32 features; /* KVM_SREGS_E_FSL_ */
+ __u32 svr;
+ __u64 mcar;
+ __u32 hid0;
+
+ /* KVM_SREGS_E_FSL_PIDn */
+ __u32 pid1, pid2;
+ } fsl;
+ __u8 pad[256];
+ } impl;
+
+ __u32 features; /* KVM_SREGS_E_ */
+ __u32 impl_id; /* KVM_SREGS_E_IMPL_ */
+ __u32 update_special; /* KVM_SREGS_E_UPDATE_ */
+ __u32 pir; /* read-only */
+ __u64 sprg8;
+ __u64 sprg9; /* E.ED */
+ __u64 csrr0;
+ __u64 dsrr0; /* E.ED */
+ __u64 mcsrr0;
+ __u32 csrr1;
+ __u32 dsrr1; /* E.ED */
+ __u32 mcsrr1;
+ __u32 esr;
+ __u64 dear;
+ __u64 ivpr;
+ __u64 mcivpr;
+ __u64 mcsr; /* KVM_SREGS_E_UPDATE_MCSR */
+
+ __u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */
+ __u32 tcr;
+ __u32 decar;
+ __u32 dec; /* KVM_SREGS_E_UPDATE_DEC */
+
+ /*
+ * Userspace can read TB directly, but the
+ * value reported here is consistent with "dec".
+ *
+ * Read-only.
+ */
+ __u64 tb;
+
+ __u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */
+ __u32 dbcr[3];
+ __u32 iac[4];
+ __u32 dac[2];
+ __u32 dvc[2];
+ __u8 num_iac; /* read-only */
+ __u8 num_dac; /* read-only */
+ __u8 num_dvc; /* read-only */
+ __u8 pad;
+
+ __u32 epr; /* EXP */
+ __u32 vrsave; /* a.k.a. USPRG0 */
+ __u32 epcr; /* KVM_SREGS_E_64 */
+
+ __u32 mas0;
+ __u32 mas1;
+ __u64 mas2;
+ __u64 mas7_3;
+ __u32 mas4;
+ __u32 mas6;
+
+ __u32 ivor_low[16]; /* IVOR0-15 */
+ __u32 ivor_high[18]; /* IVOR32+, plus room to expand */
+
+ __u32 mmucfg; /* read-only */
+ __u32 eptcfg; /* E.PT, read-only */
+ __u32 tlbcfg[4];/* read-only */
+ __u32 tlbps[4]; /* read-only */
+
+ __u32 eplc, epsc; /* E.PD */
+ } e;
__u8 pad[1020];
} u;
};
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
index d22d39942a9..a0e57618ff3 100644
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -61,7 +61,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
}
-void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 5b750467439..0951b17f4eb 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -59,6 +59,7 @@
#define BOOK3S_INTERRUPT_INST_SEGMENT 0x480
#define BOOK3S_INTERRUPT_EXTERNAL 0x500
#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501
+#define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502
#define BOOK3S_INTERRUPT_ALIGNMENT 0x600
#define BOOK3S_INTERRUPT_PROGRAM 0x700
#define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 36fdb3aff30..d5a8a386163 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -34,6 +34,7 @@
(\intno == BOOK3S_INTERRUPT_DATA_SEGMENT) || \
(\intno == BOOK3S_INTERRUPT_INST_SEGMENT) || \
(\intno == BOOK3S_INTERRUPT_EXTERNAL) || \
+ (\intno == BOOK3S_INTERRUPT_EXTERNAL_HV) || \
(\intno == BOOK3S_INTERRUPT_ALIGNMENT) || \
(\intno == BOOK3S_INTERRUPT_PROGRAM) || \
(\intno == BOOK3S_INTERRUPT_FP_UNAVAIL) || \
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index 7fea26fffb2..7a2a565f88c 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -43,6 +43,7 @@ struct kvmppc_vcpu_e500 {
u32 host_pid[E500_PID_NUM];
u32 pid[E500_PID_NUM];
+ u32 svr;
u32 mas0;
u32 mas1;
@@ -58,6 +59,7 @@ struct kvmppc_vcpu_e500 {
u32 hid1;
u32 tlb0cfg;
u32 tlb1cfg;
+ u64 mcar;
struct kvm_vcpu vcpu;
};
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index bba3b9b72a3..186f150b9b8 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -223,6 +223,7 @@ struct kvm_vcpu_arch {
ulong hflags;
ulong guest_owned_ext;
#endif
+ u32 vrsave; /* also USPRG0 */
u32 mmucr;
ulong sprg4;
ulong sprg5;
@@ -232,6 +233,9 @@ struct kvm_vcpu_arch {
ulong csrr1;
ulong dsrr0;
ulong dsrr1;
+ ulong mcsrr0;
+ ulong mcsrr1;
+ ulong mcsr;
ulong esr;
u32 dec;
u32 decar;
@@ -255,6 +259,7 @@ struct kvm_vcpu_arch {
u32 dbsr;
#ifdef CONFIG_KVM_EXIT_TIMING
+ struct mutex exit_timing_lock;
struct kvmppc_exit_timing timing_exit;
struct kvmppc_exit_timing timing_last_enter;
u32 last_exit_type;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ecb3bc74c34..9345238edec 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -61,6 +61,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
+extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
/* Core-specific hooks */
@@ -142,4 +143,12 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
return r;
}
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
+
#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index a077adc0b35..e0298d26ce5 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -210,6 +210,8 @@ struct dtl_entry {
#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */
#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
+extern struct kmem_cache *dtl_cache;
+
/*
* When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls
* reading from the dispatch trace log. If other code wants to consume
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index e4f01915fbb..47cacddb14c 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -29,21 +29,6 @@ struct file;
struct pci_controller;
struct kimage;
-#ifdef CONFIG_SMP
-struct smp_ops_t {
- void (*message_pass)(int target, int msg);
- int (*probe)(void);
- void (*kick_cpu)(int nr);
- void (*setup_cpu)(int nr);
- void (*bringup_done)(void);
- void (*take_timebase)(void);
- void (*give_timebase)(void);
- int (*cpu_disable)(void);
- void (*cpu_die)(unsigned int nr);
- int (*cpu_bootable)(unsigned int nr);
-};
-#endif
-
struct machdep_calls {
char *name;
#ifdef CONFIG_PPC64
@@ -267,6 +252,7 @@ struct machdep_calls {
extern void e500_idle(void);
extern void power4_idle(void);
+extern void power7_idle(void);
extern void ppc6xx_idle(void);
extern void book3e_idle(void);
@@ -311,12 +297,6 @@ extern sys_ctrler_t sys_ctrler;
#endif /* CONFIG_PPC_PMAC */
-#ifdef CONFIG_SMP
-/* Poor default implementations */
-extern void __devinit smp_generic_give_timebase(void);
-extern void __devinit smp_generic_take_timebase(void);
-#endif /* CONFIG_SMP */
-
/* Functions to produce codes on the leds.
* The SRC code should be unique for the message category and should
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index 17194fcd404..3ea0f9a259d 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -43,6 +43,7 @@
#define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000)
#define MAS0_ESEL(x) (((x) << 16) & 0x0FFF0000)
#define MAS0_NV(x) ((x) & 0x00000FFF)
+#define MAS0_ESEL_MASK 0x0FFF0000
#define MAS0_HES 0x00004000
#define MAS0_WQ_ALLWAYS 0x00000000
#define MAS0_WQ_COND 0x00001000
@@ -137,6 +138,21 @@
#define MMUCSR0_TLB2PS 0x00078000 /* TLB2 Page Size */
#define MMUCSR0_TLB3PS 0x00780000 /* TLB3 Page Size */
+/* MMUCFG bits */
+#define MMUCFG_MAVN_NASK 0x00000003
+#define MMUCFG_MAVN_V1_0 0x00000000
+#define MMUCFG_MAVN_V2_0 0x00000001
+#define MMUCFG_NTLB_MASK 0x0000000c
+#define MMUCFG_NTLB_SHIFT 2
+#define MMUCFG_PIDSIZE_MASK 0x000007c0
+#define MMUCFG_PIDSIZE_SHIFT 6
+#define MMUCFG_TWC 0x00008000
+#define MMUCFG_LRAT 0x00010000
+#define MMUCFG_RASIZE_MASK 0x00fe0000
+#define MMUCFG_RASIZE_SHIFT 17
+#define MMUCFG_LPIDSIZE_MASK 0x0f000000
+#define MMUCFG_LPIDSIZE_SHIFT 24
+
/* TLBnCFG encoding */
#define TLBnCFG_N_ENTRY 0x00000fff /* number of entries */
#define TLBnCFG_HES 0x00002000 /* HW select supported */
@@ -229,6 +245,10 @@ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
extern int mmu_linear_psize;
extern int mmu_vmemmap_psize;
+#ifdef CONFIG_PPC64
+extern unsigned long linear_map_top;
+#endif
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index ae7b3efec8e..d865bd909c7 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -408,6 +408,7 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
#endif /* CONFIG_PPC_SUBPAGE_PROT */
typedef unsigned long mm_context_id_t;
+struct spinlock;
typedef struct {
mm_context_id_t id;
@@ -423,6 +424,11 @@ typedef struct {
#ifdef CONFIG_PPC_SUBPAGE_PROT
struct subpage_prot_table spt;
#endif /* CONFIG_PPC_SUBPAGE_PROT */
+#ifdef CONFIG_PPC_ICSWX
+ struct spinlock *cop_lockp; /* guard acop and cop_pid */
+ unsigned long acop; /* mask of enabled coprocessor types */
+ unsigned int cop_pid; /* pid value used with coprocessors */
+#endif /* CONFIG_PPC_ICSWX */
} mm_context_t;
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index bb40a06d3b7..4138b21ae80 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -56,11 +56,6 @@
*/
#define MMU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x00200000)
-/* This indicates that the processor uses the ISA 2.06 server tlbie
- * mnemonics
- */
-#define MMU_FTR_TLBIE_206 ASM_CONST(0x00400000)
-
/* Enable use of TLB reservation. Processor should support tlbsrx.
* instruction and MAS0[WQ].
*/
@@ -70,6 +65,53 @@
*/
#define MMU_FTR_USE_PAIRED_MAS ASM_CONST(0x01000000)
+/* MMU is SLB-based
+ */
+#define MMU_FTR_SLB ASM_CONST(0x02000000)
+
+/* Support 16M large pages
+ */
+#define MMU_FTR_16M_PAGE ASM_CONST(0x04000000)
+
+/* Supports TLBIEL variant
+ */
+#define MMU_FTR_TLBIEL ASM_CONST(0x08000000)
+
+/* Supports tlbies w/o locking
+ */
+#define MMU_FTR_LOCKLESS_TLBIE ASM_CONST(0x10000000)
+
+/* Large pages can be marked CI
+ */
+#define MMU_FTR_CI_LARGE_PAGE ASM_CONST(0x20000000)
+
+/* 1T segments available
+ */
+#define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000)
+
+/* Doesn't support the B bit (1T segment) in SLBIE
+ */
+#define MMU_FTR_NO_SLBIE_B ASM_CONST(0x80000000)
+
+/* MMU feature bit sets for various CPUs */
+#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \
+ MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
+#define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2
+#define MMU_FTRS_PPC970 MMU_FTRS_POWER4
+#define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
+ MMU_FTR_CI_LARGE_PAGE
+#define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
+ MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B
+#define MMU_FTRS_A2 MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | \
+ MMU_FTR_USE_TLBIVAX_BCAST | \
+ MMU_FTR_LOCK_BCAST_INVAL | \
+ MMU_FTR_USE_TLBRSRV | \
+ MMU_FTR_USE_PAIRED_MAS | \
+ MMU_FTR_TLBIEL | \
+ MMU_FTR_16M_PAGE
#ifndef __ASSEMBLY__
#include <asm/cputable.h>
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 81fb41289d6..a73668a5f30 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -32,6 +32,10 @@ extern void __destroy_context(unsigned long context_id);
extern void mmu_context_init(void);
#endif
+extern void switch_cop(struct mm_struct *next);
+extern int use_cop(unsigned long acop, struct mm_struct *mm);
+extern void drop_cop(unsigned long acop, struct mm_struct *mm);
+
/*
* switch_mm is the entry point called from the architecture independent
* code in kernel/sched.c
@@ -55,6 +59,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
if (prev == next)
return;
+#ifdef CONFIG_PPC_ICSWX
+ /* Switch coprocessor context only if prev or next uses a coprocessor */
+ if (prev->context.acop || next->context.acop)
+ switch_cop(next);
+#endif /* CONFIG_PPC_ICSWX */
+
/* We must stop all altivec streams before changing the HW
* context
*/
@@ -67,7 +77,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
* sub architectures.
*/
#ifdef CONFIG_PPC_STD_MMU_64
- if (cpu_has_feature(CPU_FTR_SLB))
+ if (mmu_has_feature(MMU_FTR_SLB))
switch_slb(tsk, next);
else
switch_stab(tsk, next);
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index 49baddcdd14..df18989e78d 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -262,6 +262,7 @@ struct mpic
#ifdef CONFIG_SMP
struct irq_chip hc_ipi;
#endif
+ struct irq_chip hc_tm;
const char *name;
/* Flags */
unsigned int flags;
@@ -280,7 +281,7 @@ struct mpic
/* vector numbers used for internal sources (ipi/timers) */
unsigned int ipi_vecs[4];
- unsigned int timer_vecs[4];
+ unsigned int timer_vecs[8];
/* Spurious vector to program into unused sources */
unsigned int spurious_vec;
@@ -368,6 +369,8 @@ struct mpic
* NOTE: This flag trumps MPIC_WANTS_RESET.
*/
#define MPIC_NO_RESET 0x00004000
+/* Freescale MPIC (compatible includes "fsl,mpic") */
+#define MPIC_FSL 0x00008000
/* MPIC HW modification ID */
#define MPIC_REGSET_MASK 0xf0000000
diff --git a/arch/powerpc/include/asm/pSeries_reconfig.h b/arch/powerpc/include/asm/pSeries_reconfig.h
index d4b4bfa26fb..89d2f99c1bf 100644
--- a/arch/powerpc/include/asm/pSeries_reconfig.h
+++ b/arch/powerpc/include/asm/pSeries_reconfig.h
@@ -18,13 +18,18 @@
extern int pSeries_reconfig_notifier_register(struct notifier_block *);
extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
extern struct blocking_notifier_head pSeries_reconfig_chain;
+/* Not the best place to put this, will be fixed when we move some
+ * of the rtas suspend-me stuff to pseries */
+extern void pSeries_coalesce_init(void);
#else /* !CONFIG_PPC_PSERIES */
static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
{
return 0;
}
static inline void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) { }
+static inline void pSeries_coalesce_init(void) { }
#endif /* CONFIG_PPC_PSERIES */
+
#endif /* __KERNEL__ */
#endif /* _PPC64_PSERIES_RECONFIG_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index ec57540cd7a..74126765106 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -92,9 +92,9 @@ struct paca_struct {
* Now, starting in cacheline 2, the exception save areas
*/
/* used for most interrupts/exceptions */
- u64 exgen[10] __attribute__((aligned(0x80)));
- u64 exmc[10]; /* used for machine checks */
- u64 exslb[10]; /* used for SLB/segment table misses
+ u64 exgen[11] __attribute__((aligned(0x80)));
+ u64 exmc[11]; /* used for machine checks */
+ u64 exslb[11]; /* used for SLB/segment table misses
* on the linear mapping */
/* SLB related definitions */
u16 vmalloc_sllp;
@@ -106,7 +106,8 @@ struct paca_struct {
pgd_t *pgd; /* Current PGD */
pgd_t *kernel_pgd; /* Kernel PGD */
u64 exgen[8] __attribute__((aligned(0x80)));
- u64 extlb[EX_TLB_SIZE*3] __attribute__((aligned(0x80)));
+ /* We can have up to 3 levels of reentrancy in the TLB miss handler */
+ u64 extlb[3][EX_TLB_SIZE / sizeof(u64)] __attribute__((aligned(0x80)));
u64 exmc[8]; /* used for machine checks */
u64 excrit[8]; /* used for crit interrupts */
u64 exdbg[8]; /* used for debug interrupts */
@@ -125,7 +126,7 @@ struct paca_struct {
struct task_struct *__current; /* Pointer to current */
u64 kstack; /* Saved Kernel stack addr */
u64 stab_rr; /* stab/slb round-robin counter */
- u64 saved_r1; /* r1 save for RTAS calls */
+ u64 saved_r1; /* r1 save for RTAS calls or PM */
u64 saved_msr; /* MSR saved here by enter_rtas */
u16 trap_save; /* Used when bad stack is encountered */
u8 soft_enabled; /* irq soft-enable flag */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 812b2cd80ae..9356262fd3c 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -59,24 +59,7 @@ static __inline__ void clear_page(void *addr)
: "ctr", "memory");
}
-extern void copy_4K_page(void *to, void *from);
-
-#ifdef CONFIG_PPC_64K_PAGES
-static inline void copy_page(void *to, void *from)
-{
- unsigned int i;
- for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) {
- copy_4K_page(to, from);
- to += 4096;
- from += 4096;
- }
-}
-#else /* CONFIG_PPC_64K_PAGES */
-static inline void copy_page(void *to, void *from)
-{
- copy_4K_page(to, from);
-}
-#endif /* CONFIG_PPC_64K_PAGES */
+extern void copy_page(void *to, void *from);
/* Log 2 of page table size */
extern u64 ppc64_pft_size;
@@ -130,7 +113,7 @@ extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long len, unsigned int psize);
-#define slice_mm_new_context(mm) ((mm)->context.id == 0)
+#define slice_mm_new_context(mm) ((mm)->context.id == MMU_NO_CONTEXT)
#endif /* __ASSEMBLY__ */
#else
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 2b09cd522d3..81576ee0cfb 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -257,21 +257,20 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
- unsigned long old;
- if ((pte_val(*ptep) & _PAGE_RW) == 0)
- return;
- old = pte_update(mm, addr, ptep, _PAGE_RW, 0);
+ if ((pte_val(*ptep) & _PAGE_RW) == 0)
+ return;
+
+ pte_update(mm, addr, ptep, _PAGE_RW, 0);
}
static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- unsigned long old;
-
if ((pte_val(*ptep) & _PAGE_RW) == 0)
return;
- old = pte_update(mm, addr, ptep, _PAGE_RW, 1);
+
+ pte_update(mm, addr, ptep, _PAGE_RW, 1);
}
/*
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1255569387b..e472659d906 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -41,6 +41,10 @@
#define PPC_INST_RFCI 0x4c000066
#define PPC_INST_RFDI 0x4c00004e
#define PPC_INST_RFMCI 0x4c00004c
+#define PPC_INST_MFSPR_DSCR 0x7c1102a6
+#define PPC_INST_MFSPR_DSCR_MASK 0xfc1fffff
+#define PPC_INST_MTSPR_DSCR 0x7c1103a6
+#define PPC_INST_MTSPR_DSCR_MASK 0xfc1fffff
#define PPC_INST_STRING 0x7c00042a
#define PPC_INST_STRING_MASK 0xfc0007fe
@@ -56,6 +60,17 @@
#define PPC_INST_TLBSRX_DOT 0x7c0006a5
#define PPC_INST_XXLOR 0xf0000510
+#define PPC_INST_NAP 0x4c000364
+#define PPC_INST_SLEEP 0x4c0003a4
+
+/* A2 specific instructions */
+#define PPC_INST_ERATWE 0x7c0001a6
+#define PPC_INST_ERATRE 0x7c000166
+#define PPC_INST_ERATILX 0x7c000066
+#define PPC_INST_ERATIVAX 0x7c000666
+#define PPC_INST_ERATSX 0x7c000126
+#define PPC_INST_ERATSX_DOT 0x7c000127
+
/* macros to insert fields into opcodes */
#define __PPC_RA(a) (((a) & 0x1f) << 16)
#define __PPC_RB(b) (((b) & 0x1f) << 11)
@@ -67,6 +82,8 @@
#define __PPC_XT(s) __PPC_XS(s)
#define __PPC_T_TLB(t) (((t) & 0x3) << 21)
#define __PPC_WC(w) (((w) & 0x3) << 21)
+#define __PPC_WS(w) (((w) & 0x1f) << 11)
+
/*
* Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
* larx with EH set as an illegal instruction.
@@ -113,6 +130,21 @@
#define PPC_TLBIVAX(a,b) stringify_in_c(.long PPC_INST_TLBIVAX | \
__PPC_RA(a) | __PPC_RB(b))
+#define PPC_ERATWE(s, a, w) stringify_in_c(.long PPC_INST_ERATWE | \
+ __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_ERATRE(s, a, w) stringify_in_c(.long PPC_INST_ERATRE | \
+ __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_ERATILX(t, a, b) stringify_in_c(.long PPC_INST_ERATILX | \
+ __PPC_T_TLB(t) | __PPC_RA(a) | \
+ __PPC_RB(b))
+#define PPC_ERATIVAX(s, a, b) stringify_in_c(.long PPC_INST_ERATIVAX | \
+ __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_ERATSX(t, a, w) stringify_in_c(.long PPC_INST_ERATSX | \
+ __PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_INST_ERATSX_DOT | \
+ __PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b))
+
+
/*
* Define what the VSX XX1 form instructions will look like, then add
* the 128 bit load store instructions based on that.
@@ -126,4 +158,7 @@
#define XXLOR(t, a, b) stringify_in_c(.long PPC_INST_XXLOR | \
VSX_XX3((t), (a), (b)))
+#define PPC_NAP stringify_in_c(.long PPC_INST_NAP)
+#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP)
+
#endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 98210067c1c..1b422381fc1 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -170,6 +170,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
#define HMT_MEDIUM or 2,2,2
#define HMT_MEDIUM_HIGH or 5,5,5 # medium high priority
#define HMT_HIGH or 3,3,3
+#define HMT_EXTRA_HIGH or 7,7,7 # power7 only
#ifdef __KERNEL__
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index de1967a1ff5..d50c2b6d9bc 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -238,6 +238,10 @@ struct thread_struct {
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
void* kvm_shadow_vcpu; /* KVM internal data */
#endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
+#ifdef CONFIG_PPC64
+ unsigned long dscr;
+ int dscr_inherit;
+#endif
};
#define ARCH_MIN_TASKALIGN 16
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 7e4abebe76c..c5cae0dd176 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -99,17 +99,23 @@
#define MSR_LE __MASK(MSR_LE_LG) /* Little Endian */
#if defined(CONFIG_PPC_BOOK3S_64)
+#define MSR_64BIT MSR_SF
+
/* Server variant */
#define MSR_ MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV
-#define MSR_KERNEL MSR_ | MSR_SF
+#define MSR_KERNEL MSR_ | MSR_64BIT
#define MSR_USER32 MSR_ | MSR_PR | MSR_EE
-#define MSR_USER64 MSR_USER32 | MSR_SF
+#define MSR_USER64 MSR_USER32 | MSR_64BIT
#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx)
/* Default MSR for kernel mode. */
#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR)
#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
#endif
+#ifndef MSR_64BIT
+#define MSR_64BIT 0
+#endif
+
/* Floating Point Status and Control Register (FPSCR) Fields */
#define FPSCR_FX 0x80000000 /* FPU exception summary */
#define FPSCR_FEX 0x40000000 /* FPU enabled exception summary */
@@ -182,6 +188,8 @@
#define SPRN_CTR 0x009 /* Count Register */
#define SPRN_DSCR 0x11
+#define SPRN_CFAR 0x1c /* Come From Address Register */
+#define SPRN_ACOP 0x1F /* Available Coprocessor Register */
#define SPRN_CTRLF 0x088
#define SPRN_CTRLT 0x098
#define CTRL_CT 0xc0000000 /* current thread */
@@ -210,8 +218,43 @@
#define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */
#define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */
#define SPRN_SPURR 0x134 /* Scaled PURR */
+#define SPRN_HSPRG0 0x130 /* Hypervisor Scratch 0 */
+#define SPRN_HSPRG1 0x131 /* Hypervisor Scratch 1 */
+#define SPRN_HDSISR 0x132
+#define SPRN_HDAR 0x133
+#define SPRN_HDEC 0x136 /* Hypervisor Decrementer */
#define SPRN_HIOR 0x137 /* 970 Hypervisor interrupt offset */
+#define SPRN_RMOR 0x138 /* Real mode offset register */
+#define SPRN_HRMOR 0x139 /* Real mode offset register */
+#define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */
+#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */
#define SPRN_LPCR 0x13E /* LPAR Control Register */
+#define LPCR_VPM0 (1ul << (63-0))
+#define LPCR_VPM1 (1ul << (63-1))
+#define LPCR_ISL (1ul << (63-2))
+#define LPCR_DPFD_SH (63-11)
+#define LPCR_VRMA_L (1ul << (63-12))
+#define LPCR_VRMA_LP0 (1ul << (63-15))
+#define LPCR_VRMA_LP1 (1ul << (63-16))
+#define LPCR_RMLS 0x1C000000 /* impl dependent rmo limit sel */
+#define LPCR_ILE 0x02000000 /* !HV irqs set MSR:LE */
+#define LPCR_PECE 0x00007000 /* powersave exit cause enable */
+#define LPCR_PECE0 0x00004000 /* ext. exceptions can cause exit */
+#define LPCR_PECE1 0x00002000 /* decrementer can cause exit */
+#define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */
+#define LPCR_MER 0x00000800 /* Mediated External Exception */
+#define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */
+#define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */
+#define LPCR_RMI 0x00000002 /* real mode is cache inhibit */
+#define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */
+#define SPRN_LPID 0x13F /* Logical Partition Identifier */
+#define SPRN_HMER 0x150 /* Hardware m? error recovery */
+#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */
+#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */
+#define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */
+#define SPRN_TLBVPNR 0x155 /* P7 TLB control register */
+#define SPRN_TLBRPNR 0x156 /* P7 TLB control register */
+#define SPRN_TLBLPIDR 0x157 /* P7 TLB control register */
#define SPRN_DBAT0L 0x219 /* Data BAT 0 Lower Register */
#define SPRN_DBAT0U 0x218 /* Data BAT 0 Upper Register */
#define SPRN_DBAT1L 0x21B /* Data BAT 1 Lower Register */
@@ -434,16 +477,23 @@
#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */
#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */
#define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */
-#define SRR1_WAKERESET 0x00380000 /* System reset */
#define SRR1_WAKESYSERR 0x00300000 /* System error */
#define SRR1_WAKEEE 0x00200000 /* External interrupt */
#define SRR1_WAKEMT 0x00280000 /* mtctrl */
+#define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */
#define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */
#define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */
+#define SRR1_WAKERESET 0x00100000 /* System reset */
+#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */
+#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained,
+ * may not be recoverable */
+#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */
+#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */
#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
#define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */
#define SRR1_PROGTRAP 0x00020000 /* Trap */
#define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */
+
#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */
#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */
@@ -673,12 +723,15 @@
* SPRG usage:
*
* All 64-bit:
- * - SPRG1 stores PACA pointer
+ * - SPRG1 stores PACA pointer except 64-bit server in
+ * HV mode in which case it is HSPRG0
*
* 64-bit server:
* - SPRG0 unused (reserved for HV on Power4)
* - SPRG2 scratch for exception vectors
* - SPRG3 unused (user visible)
+ * - HSPRG0 stores PACA in HV mode
+ * - HSPRG1 scratch for "HV" exceptions
*
* 64-bit embedded
* - SPRG0 generic exception scratch
@@ -741,6 +794,41 @@
#ifdef CONFIG_PPC_BOOK3S_64
#define SPRN_SPRG_SCRATCH0 SPRN_SPRG2
+#define SPRN_SPRG_HPACA SPRN_HSPRG0
+#define SPRN_SPRG_HSCRATCH0 SPRN_HSPRG1
+
+#define GET_PACA(rX) \
+ BEGIN_FTR_SECTION_NESTED(66); \
+ mfspr rX,SPRN_SPRG_PACA; \
+ FTR_SECTION_ELSE_NESTED(66); \
+ mfspr rX,SPRN_SPRG_HPACA; \
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#define SET_PACA(rX) \
+ BEGIN_FTR_SECTION_NESTED(66); \
+ mtspr SPRN_SPRG_PACA,rX; \
+ FTR_SECTION_ELSE_NESTED(66); \
+ mtspr SPRN_SPRG_HPACA,rX; \
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#define GET_SCRATCH0(rX) \
+ BEGIN_FTR_SECTION_NESTED(66); \
+ mfspr rX,SPRN_SPRG_SCRATCH0; \
+ FTR_SECTION_ELSE_NESTED(66); \
+ mfspr rX,SPRN_SPRG_HSCRATCH0; \
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#define SET_SCRATCH0(rX) \
+ BEGIN_FTR_SECTION_NESTED(66); \
+ mtspr SPRN_SPRG_SCRATCH0,rX; \
+ FTR_SECTION_ELSE_NESTED(66); \
+ mtspr SPRN_SPRG_HSCRATCH0,rX; \
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#else /* CONFIG_PPC_BOOK3S_64 */
+#define GET_SCRATCH0(rX) mfspr rX,SPRN_SPRG_SCRATCH0
+#define SET_SCRATCH0(rX) mtspr SPRN_SPRG_SCRATCH0,rX
+
#endif
#ifdef CONFIG_PPC_BOOK3E_64
@@ -750,6 +838,10 @@
#define SPRN_SPRG_TLB_EXFRAME SPRN_SPRG2
#define SPRN_SPRG_TLB_SCRATCH SPRN_SPRG6
#define SPRN_SPRG_GEN_SCRATCH SPRN_SPRG0
+
+#define SET_PACA(rX) mtspr SPRN_SPRG_PACA,rX
+#define GET_PACA(rX) mfspr rX,SPRN_SPRG_PACA
+
#endif
#ifdef CONFIG_PPC_BOOK3S_32
@@ -800,6 +892,8 @@
#define SPRN_SPRG_SCRATCH1 SPRN_SPRG1
#endif
+
+
/*
* An mtfsf instruction with the L bit set. On CPUs that support this a
* full 64bits of FPSCR is restored and on other CPUs the L bit is ignored.
@@ -894,6 +988,8 @@
#define PV_POWER5p 0x003B
#define PV_POWER7 0x003F
#define PV_970FX 0x003C
+#define PV_POWER6 0x003E
+#define PV_POWER7 0x003F
#define PV_630 0x0040
#define PV_630p 0x0041
#define PV_970MP 0x0044
diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h
new file mode 100644
index 00000000000..3d52a1132f3
--- /dev/null
+++ b/arch/powerpc/include/asm/reg_a2.h
@@ -0,0 +1,165 @@
+/*
+ * Register definitions specific to the A2 core
+ *
+ * Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ASM_POWERPC_REG_A2_H__
+#define __ASM_POWERPC_REG_A2_H__
+
+#define SPRN_TENSR 0x1b5
+#define SPRN_TENS 0x1b6 /* Thread ENable Set */
+#define SPRN_TENC 0x1b7 /* Thread ENable Clear */
+
+#define SPRN_A2_CCR0 0x3f0 /* Core Configuration Register 0 */
+#define SPRN_A2_CCR1 0x3f1 /* Core Configuration Register 1 */
+#define SPRN_A2_CCR2 0x3f2 /* Core Configuration Register 2 */
+#define SPRN_MMUCR0 0x3fc /* MMU Control Register 0 */
+#define SPRN_MMUCR1 0x3fd /* MMU Control Register 1 */
+#define SPRN_MMUCR2 0x3fe /* MMU Control Register 2 */
+#define SPRN_MMUCR3 0x3ff /* MMU Control Register 3 */
+
+#define SPRN_IAR 0x372
+
+#define SPRN_IUCR0 0x3f3
+#define IUCR0_ICBI_ACK 0x1000
+
+#define SPRN_XUCR0 0x3f6 /* Execution Unit Config Register 0 */
+
+#define A2_IERAT_SIZE 16
+#define A2_DERAT_SIZE 32
+
+/* A2 MMUCR0 bits */
+#define MMUCR0_ECL 0x80000000 /* Extended Class for TLB fills */
+#define MMUCR0_TID_NZ 0x40000000 /* TID is non-zero */
+#define MMUCR0_TS 0x10000000 /* Translation space for TLB fills */
+#define MMUCR0_TGS 0x20000000 /* Guest space for TLB fills */
+#define MMUCR0_TLBSEL 0x0c000000 /* TLB or ERAT target for TLB fills */
+#define MMUCR0_TLBSEL_U 0x00000000 /* TLBSEL = UTLB */
+#define MMUCR0_TLBSEL_I 0x08000000 /* TLBSEL = I-ERAT */
+#define MMUCR0_TLBSEL_D 0x0c000000 /* TLBSEL = D-ERAT */
+#define MMUCR0_LOCKSRSH 0x02000000 /* Use TLB lock on tlbsx. */
+#define MMUCR0_TID_MASK 0x000000ff /* TID field */
+
+/* A2 MMUCR1 bits */
+#define MMUCR1_IRRE 0x80000000 /* I-ERAT round robin enable */
+#define MMUCR1_DRRE 0x40000000 /* D-ERAT round robin enable */
+#define MMUCR1_REE 0x20000000 /* Reference Exception Enable*/
+#define MMUCR1_CEE 0x10000000 /* Change exception enable */
+#define MMUCR1_CSINV_ALL 0x00000000 /* Inval ERAT on all CS evts */
+#define MMUCR1_CSINV_NISYNC 0x04000000 /* Inval ERAT on all ex isync*/
+#define MMUCR1_CSINV_NEVER 0x0c000000 /* Don't inval ERAT on CS */
+#define MMUCR1_ICTID 0x00080000 /* IERAT class field as TID */
+#define MMUCR1_ITTID 0x00040000 /* IERAT thdid field as TID */
+#define MMUCR1_DCTID 0x00020000 /* DERAT class field as TID */
+#define MMUCR1_DTTID 0x00010000 /* DERAT thdid field as TID */
+#define MMUCR1_DCCD 0x00008000 /* DERAT class ignore */
+#define MMUCR1_TLBWE_BINV 0x00004000 /* back invalidate on tlbwe */
+
+/* A2 MMUCR2 bits */
+#define MMUCR2_PSSEL_SHIFT 4
+
+/* A2 MMUCR3 bits */
+#define MMUCR3_THID 0x0000000f /* Thread ID */
+
+/* *** ERAT TLB bits definitions */
+#define TLB0_EPN_MASK ASM_CONST(0xfffffffffffff000)
+#define TLB0_CLASS_MASK ASM_CONST(0x0000000000000c00)
+#define TLB0_CLASS_00 ASM_CONST(0x0000000000000000)
+#define TLB0_CLASS_01 ASM_CONST(0x0000000000000400)
+#define TLB0_CLASS_10 ASM_CONST(0x0000000000000800)
+#define TLB0_CLASS_11 ASM_CONST(0x0000000000000c00)
+#define TLB0_V ASM_CONST(0x0000000000000200)
+#define TLB0_X ASM_CONST(0x0000000000000100)
+#define TLB0_SIZE_MASK ASM_CONST(0x00000000000000f0)
+#define TLB0_SIZE_4K ASM_CONST(0x0000000000000010)
+#define TLB0_SIZE_64K ASM_CONST(0x0000000000000030)
+#define TLB0_SIZE_1M ASM_CONST(0x0000000000000050)
+#define TLB0_SIZE_16M ASM_CONST(0x0000000000000070)
+#define TLB0_SIZE_1G ASM_CONST(0x00000000000000a0)
+#define TLB0_THDID_MASK ASM_CONST(0x000000000000000f)
+#define TLB0_THDID_0 ASM_CONST(0x0000000000000001)
+#define TLB0_THDID_1 ASM_CONST(0x0000000000000002)
+#define TLB0_THDID_2 ASM_CONST(0x0000000000000004)
+#define TLB0_THDID_3 ASM_CONST(0x0000000000000008)
+#define TLB0_THDID_ALL ASM_CONST(0x000000000000000f)
+
+#define TLB1_RESVATTR ASM_CONST(0x00f0000000000000)
+#define TLB1_U0 ASM_CONST(0x0008000000000000)
+#define TLB1_U1 ASM_CONST(0x0004000000000000)
+#define TLB1_U2 ASM_CONST(0x0002000000000000)
+#define TLB1_U3 ASM_CONST(0x0001000000000000)
+#define TLB1_R ASM_CONST(0x0000800000000000)
+#define TLB1_C ASM_CONST(0x0000400000000000)
+#define TLB1_RPN_MASK ASM_CONST(0x000003fffffff000)
+#define TLB1_W ASM_CONST(0x0000000000000800)
+#define TLB1_I ASM_CONST(0x0000000000000400)
+#define TLB1_M ASM_CONST(0x0000000000000200)
+#define TLB1_G ASM_CONST(0x0000000000000100)
+#define TLB1_E ASM_CONST(0x0000000000000080)
+#define TLB1_VF ASM_CONST(0x0000000000000040)
+#define TLB1_UX ASM_CONST(0x0000000000000020)
+#define TLB1_SX ASM_CONST(0x0000000000000010)
+#define TLB1_UW ASM_CONST(0x0000000000000008)
+#define TLB1_SW ASM_CONST(0x0000000000000004)
+#define TLB1_UR ASM_CONST(0x0000000000000002)
+#define TLB1_SR ASM_CONST(0x0000000000000001)
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+#define WSP_UART_PHYS 0xffc000c000
+/* This needs to be careful chosen to hit a !0 congruence class
+ * in the TLB since we bolt it in way 3, which is already occupied
+ * by our linear mapping primary bolted entry in CC 0.
+ */
+#define WSP_UART_VIRT 0xf000000000001000
+#endif
+
+/* A2 erativax attributes definitions */
+#define ERATIVAX_RS_IS_ALL 0x000
+#define ERATIVAX_RS_IS_TID 0x040
+#define ERATIVAX_RS_IS_CLASS 0x080
+#define ERATIVAX_RS_IS_FULLMATCH 0x0c0
+#define ERATIVAX_CLASS_00 0x000
+#define ERATIVAX_CLASS_01 0x010
+#define ERATIVAX_CLASS_10 0x020
+#define ERATIVAX_CLASS_11 0x030
+#define ERATIVAX_PSIZE_4K (TLB_PSIZE_4K >> 1)
+#define ERATIVAX_PSIZE_64K (TLB_PSIZE_64K >> 1)
+#define ERATIVAX_PSIZE_1M (TLB_PSIZE_1M >> 1)
+#define ERATIVAX_PSIZE_16M (TLB_PSIZE_16M >> 1)
+#define ERATIVAX_PSIZE_1G (TLB_PSIZE_1G >> 1)
+
+/* A2 eratilx attributes definitions */
+#define ERATILX_T_ALL 0
+#define ERATILX_T_TID 1
+#define ERATILX_T_TGS 2
+#define ERATILX_T_FULLMATCH 3
+#define ERATILX_T_CLASS0 4
+#define ERATILX_T_CLASS1 5
+#define ERATILX_T_CLASS2 6
+#define ERATILX_T_CLASS3 7
+
+/* XUCR0 bits */
+#define XUCR0_TRACE_UM_T0 0x40000000 /* Thread 0 */
+#define XUCR0_TRACE_UM_T1 0x20000000 /* Thread 1 */
+#define XUCR0_TRACE_UM_T2 0x10000000 /* Thread 2 */
+#define XUCR0_TRACE_UM_T3 0x08000000 /* Thread 3 */
+
+/* A2 CCR0 register */
+#define A2_CCR0_PME_DISABLED 0x00000000
+#define A2_CCR0_PME_SLEEP 0x40000000
+#define A2_CCR0_PME_RVW 0x80000000
+#define A2_CCR0_PME_DISABLED2 0xc0000000
+
+/* A2 CCR2 register */
+#define A2_CCR2_ERAT_ONLY_MODE 0x00000001
+#define A2_CCR2_ENABLE_ICSWX 0x00000002
+#define A2_CCR2_ENABLE_PC 0x20000000
+#define A2_CCR2_ENABLE_TRACE 0x40000000
+
+#endif /* __ASM_POWERPC_REG_A2_H__ */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index b316794aa2b..0f0ad9fa01c 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -27,10 +27,12 @@
#define MSR_CM (1<<31) /* Computation Mode (0=32-bit, 1=64-bit) */
#if defined(CONFIG_PPC_BOOK3E_64)
+#define MSR_64BIT MSR_CM
+
#define MSR_ MSR_ME | MSR_CE
-#define MSR_KERNEL MSR_ | MSR_CM
+#define MSR_KERNEL MSR_ | MSR_64BIT
#define MSR_USER32 MSR_ | MSR_PR | MSR_EE | MSR_DE
-#define MSR_USER64 MSR_USER32 | MSR_CM | MSR_DE
+#define MSR_USER64 MSR_USER32 | MSR_64BIT
#elif defined (CONFIG_40x)
#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE)
#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
@@ -81,6 +83,10 @@
#define SPRN_IVOR13 0x19D /* Interrupt Vector Offset Register 13 */
#define SPRN_IVOR14 0x19E /* Interrupt Vector Offset Register 14 */
#define SPRN_IVOR15 0x19F /* Interrupt Vector Offset Register 15 */
+#define SPRN_IVOR38 0x1B0 /* Interrupt Vector Offset Register 38 */
+#define SPRN_IVOR39 0x1B1 /* Interrupt Vector Offset Register 39 */
+#define SPRN_IVOR40 0x1B2 /* Interrupt Vector Offset Register 40 */
+#define SPRN_IVOR41 0x1B3 /* Interrupt Vector Offset Register 41 */
#define SPRN_SPEFSCR 0x200 /* SPE & Embedded FP Status & Control */
#define SPRN_BBEAR 0x201 /* Branch Buffer Entry Address Register */
#define SPRN_BBTAR 0x202 /* Branch Buffer Target Address Register */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 9a1193e30f2..58625d1e780 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -158,7 +158,50 @@ struct rtas_error_log {
unsigned long target:4; /* Target of failed operation */
unsigned long type:8; /* General event or error*/
unsigned long extended_log_length:32; /* length in bytes */
- unsigned char buffer[1];
+ unsigned char buffer[1]; /* Start of extended log */
+ /* Variable length. */
+};
+
+#define RTAS_V6EXT_LOG_FORMAT_EVENT_LOG 14
+
+#define RTAS_V6EXT_COMPANY_ID_IBM (('I' << 24) | ('B' << 16) | ('M' << 8))
+
+/* RTAS general extended event log, Version 6. The extended log starts
+ * from "buffer" field of struct rtas_error_log defined above.
+ */
+struct rtas_ext_event_log_v6 {
+ /* Byte 0 */
+ uint32_t log_valid:1; /* 1:Log valid */
+ uint32_t unrecoverable_error:1; /* 1:Unrecoverable error */
+ uint32_t recoverable_error:1; /* 1:recoverable (correctable */
+ /* or successfully retried) */
+ uint32_t degraded_operation:1; /* 1:Unrecoverable err, bypassed*/
+ /* - degraded operation (e.g. */
+ /* CPU or mem taken off-line) */
+ uint32_t predictive_error:1;
+ uint32_t new_log:1; /* 1:"New" log (Always 1 for */
+ /* data returned from RTAS */
+ uint32_t big_endian:1; /* 1: Big endian */
+ uint32_t :1; /* reserved */
+ /* Byte 1 */
+ uint32_t :8; /* reserved */
+ /* Byte 2 */
+ uint32_t powerpc_format:1; /* Set to 1 (indicating log is */
+ /* in PowerPC format */
+ uint32_t :3; /* reserved */
+ uint32_t log_format:4; /* Log format indicator. Define */
+ /* format used for byte 12-2047 */
+ /* Byte 3 */
+ uint32_t :8; /* reserved */
+ /* Byte 4-11 */
+ uint8_t reserved[8]; /* reserved */
+ /* Byte 12-15 */
+ uint32_t company_id; /* Company ID of the company */
+ /* that defines the format for */
+ /* the vendor specific log type */
+ /* Byte 16-end of log */
+ uint8_t vendor_log[1]; /* Start of vendor specific log */
+ /* Variable length. */
};
/*
diff --git a/arch/powerpc/include/asm/scom.h b/arch/powerpc/include/asm/scom.h
new file mode 100644
index 00000000000..0cabfd7bc2d
--- /dev/null
+++ b/arch/powerpc/include/asm/scom.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
+ * <benh@kernel.crashing.org>
+ * and David Gibson, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_POWERPC_SCOM_H
+#define _ASM_POWERPC_SCOM_H
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_PPC_SCOM
+
+/*
+ * The SCOM bus is a sideband bus used for accessing various internal
+ * registers of the processor or the chipset. The implementation details
+ * differ between processors and platforms, and the access method as
+ * well.
+ *
+ * This API allows to "map" ranges of SCOM register numbers associated
+ * with a given SCOM controller. The later must be represented by a
+ * device node, though some implementations might support NULL if there
+ * is no possible ambiguity
+ *
+ * Then, scom_read/scom_write can be used to accesses registers inside
+ * that range. The argument passed is a register number relative to
+ * the beginning of the range mapped.
+ */
+
+typedef void *scom_map_t;
+
+/* Value for an invalid SCOM map */
+#define SCOM_MAP_INVALID (NULL)
+
+/* The scom_controller data structure is what the platform passes
+ * to the core code in scom_init, it provides the actual implementation
+ * of all the SCOM functions
+ */
+struct scom_controller {
+ scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count);
+ void (*unmap)(scom_map_t map);
+
+ u64 (*read)(scom_map_t map, u32 reg);
+ void (*write)(scom_map_t map, u32 reg, u64 value);
+};
+
+extern const struct scom_controller *scom_controller;
+
+/**
+ * scom_init - Initialize the SCOM backend, called by the platform
+ * @controller: The platform SCOM controller
+ */
+static inline void scom_init(const struct scom_controller *controller)
+{
+ scom_controller = controller;
+}
+
+/**
+ * scom_map_ok - Test is a SCOM mapping is successful
+ * @map: The result of scom_map to test
+ */
+static inline int scom_map_ok(scom_map_t map)
+{
+ return map != SCOM_MAP_INVALID;
+}
+
+/**
+ * scom_map - Map a block of SCOM registers
+ * @ctrl_dev: Device node of the SCOM controller
+ * some implementations allow NULL here
+ * @reg: first SCOM register to map
+ * @count: Number of SCOM registers to map
+ */
+
+static inline scom_map_t scom_map(struct device_node *ctrl_dev,
+ u64 reg, u64 count)
+{
+ return scom_controller->map(ctrl_dev, reg, count);
+}
+
+/**
+ * scom_find_parent - Find the SCOM controller for a device
+ * @dev: OF node of the device
+ *
+ * This is not meant for general usage, but in combination with
+ * scom_map() allows to map registers not represented by the
+ * device own scom-reg property. Useful for applying HW workarounds
+ * on things not properly represented in the device-tree for example.
+ */
+struct device_node *scom_find_parent(struct device_node *dev);
+
+
+/**
+ * scom_map_device - Map a device's block of SCOM registers
+ * @dev: OF node of the device
+ * @index: Register bank index (index in "scom-reg" property)
+ *
+ * This function will use the device-tree binding for SCOM which
+ * is to follow "scom-parent" properties until it finds a node with
+ * a "scom-controller" property to find the controller. It will then
+ * use the "scom-reg" property which is made of reg/count pairs,
+ * each of them having a size defined by the controller's #scom-cells
+ * property
+ */
+extern scom_map_t scom_map_device(struct device_node *dev, int index);
+
+
+/**
+ * scom_unmap - Unmap a block of SCOM registers
+ * @map: Result of scom_map is to be unmapped
+ */
+static inline void scom_unmap(scom_map_t map)
+{
+ if (scom_map_ok(map))
+ scom_controller->unmap(map);
+}
+
+/**
+ * scom_read - Read a SCOM register
+ * @map: Result of scom_map
+ * @reg: Register index within that map
+ */
+static inline u64 scom_read(scom_map_t map, u32 reg)
+{
+ return scom_controller->read(map, reg);
+}
+
+/**
+ * scom_write - Write to a SCOM register
+ * @map: Result of scom_map
+ * @reg: Register index within that map
+ * @value: Value to write
+ */
+static inline void scom_write(scom_map_t map, u32 reg, u64 value)
+{
+ scom_controller->write(map, reg, value);
+}
+
+#endif /* CONFIG_PPC_SCOM */
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_SCOM_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index a902a0d3ae0..880b8c1e6e5 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -20,6 +20,7 @@
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/kernel.h>
+#include <linux/irqreturn.h>
#ifndef __ASSEMBLY__
@@ -29,14 +30,32 @@
#include <asm/percpu.h>
extern int boot_cpuid;
+extern int boot_cpu_count;
extern void cpu_die(void);
#ifdef CONFIG_SMP
-extern void smp_send_debugger_break(int cpu);
-extern void smp_message_recv(int);
+struct smp_ops_t {
+ void (*message_pass)(int cpu, int msg);
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
+ void (*cause_ipi)(int cpu, unsigned long data);
+#endif
+ int (*probe)(void);
+ int (*kick_cpu)(int nr);
+ void (*setup_cpu)(int nr);
+ void (*bringup_done)(void);
+ void (*take_timebase)(void);
+ void (*give_timebase)(void);
+ int (*cpu_disable)(void);
+ void (*cpu_die)(unsigned int nr);
+ int (*cpu_bootable)(unsigned int nr);
+};
+
+extern void smp_send_debugger_break(void);
extern void start_secondary_resume(void);
+extern void __devinit smp_generic_give_timebase(void);
+extern void __devinit smp_generic_take_timebase(void);
DECLARE_PER_CPU(unsigned int, cpu_pvr);
@@ -93,13 +112,16 @@ extern int cpu_to_core_id(int cpu);
#define PPC_MSG_CALL_FUNC_SINGLE 2
#define PPC_MSG_DEBUGGER_BREAK 3
-/*
- * irq controllers that have dedicated ipis per message and don't
- * need additional code in the action handler may use this
- */
+/* for irq controllers that have dedicated ipis per message (4) */
extern int smp_request_message_ipi(int virq, int message);
extern const char *smp_ipi_name[];
+/* for irq controllers with only a single ipi */
+extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
+extern void smp_muxed_ipi_message_pass(int cpu, int msg);
+extern void smp_muxed_ipi_resend(void);
+extern irqreturn_t smp_ipi_demux(void);
+
void smp_init_iSeries(void);
void smp_init_pSeries(void);
void smp_init_cell(void);
@@ -149,7 +171,7 @@ extern int smt_enabled_at_boot;
extern int smp_mpic_probe(void);
extern void smp_mpic_setup_cpu(int cpu);
-extern void smp_generic_kick_cpu(int nr);
+extern int smp_generic_kick_cpu(int nr);
extern void smp_generic_give_timebase(void);
extern void smp_generic_take_timebase(void);
@@ -169,6 +191,8 @@ extern unsigned long __secondary_hold_spinloop;
extern unsigned long __secondary_hold_acknowledge;
extern char __secondary_hold;
+extern irqreturn_t debug_ipi_action(int irq, void *data);
+
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 60f64b132bd..8489d372077 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at)
COMPAT_SYS_SPU(open_by_handle_at)
COMPAT_SYS_SPU(clock_adjtime)
SYSCALL_SPU(syncfs)
+COMPAT_SYS_SPU(sendmmsg)
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index 5e474ddd227..2dc595dda03 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -219,8 +219,6 @@ extern int mem_init_done; /* set on boot once kmalloc can be called */
extern int init_bootmem_done; /* set once bootmem is available */
extern phys_addr_t memory_limit;
extern unsigned long klimit;
-
-extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
extern int powersave_nap; /* set if nap mode can be used in idle loop */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index d50a380b2b6..81143fcbd11 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -79,6 +79,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
#elif defined(CONFIG_PPC_STD_MMU_64)
+#define MMU_NO_CONTEXT 0
+
/*
* TLB flushing for 64-bit hash-MMU CPUs
*/
diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
index 11ae699135b..58580e94a2b 100644
--- a/arch/powerpc/include/asm/udbg.h
+++ b/arch/powerpc/include/asm/udbg.h
@@ -52,6 +52,7 @@ extern void __init udbg_init_44x_as1(void);
extern void __init udbg_init_40x_realmode(void);
extern void __init udbg_init_cpm(void);
extern void __init udbg_init_usbgecko(void);
+extern void __init udbg_init_wsp(void);
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_UDBG_H */
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3c215648ce6..6d23c8193ca 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -371,10 +371,11 @@
#define __NR_open_by_handle_at 346
#define __NR_clock_adjtime 347
#define __NR_syncfs 348
+#define __NR_sendmmsg 349
#ifdef __KERNEL__
-#define __NR_syscalls 349
+#define __NR_syscalls 350
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
diff --git a/arch/powerpc/include/asm/wsp.h b/arch/powerpc/include/asm/wsp.h
new file mode 100644
index 00000000000..c7dc83088a3
--- /dev/null
+++ b/arch/powerpc/include/asm/wsp.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright 2011 Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef __ASM_POWERPC_WSP_H
+#define __ASM_POWERPC_WSP_H
+
+extern int wsp_get_chip_id(struct device_node *dn);
+
+#endif /* __ASM_POWERPC_WSP_H */
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
new file mode 100644
index 00000000000..b183a406201
--- /dev/null
+++ b/arch/powerpc/include/asm/xics.h
@@ -0,0 +1,142 @@
+/*
+ * Common definitions accross all variants of ICP and ICS interrupt
+ * controllers.
+ */
+
+#ifndef _XICS_H
+#define _XICS_H
+
+#include <linux/interrupt.h>
+
+#define XICS_IPI 2
+#define XICS_IRQ_SPURIOUS 0
+
+/* Want a priority other than 0. Various HW issues require this. */
+#define DEFAULT_PRIORITY 5
+
+/*
+ * Mark IPIs as higher priority so we can take them inside interrupts that
+ * arent marked IRQF_DISABLED
+ */
+#define IPI_PRIORITY 4
+
+/* The least favored priority */
+#define LOWEST_PRIORITY 0xFF
+
+/* The number of priorities defined above */
+#define MAX_NUM_PRIORITIES 3
+
+/* Native ICP */
+extern int icp_native_init(void);
+
+/* PAPR ICP */
+extern int icp_hv_init(void);
+
+/* ICP ops */
+struct icp_ops {
+ unsigned int (*get_irq)(void);
+ void (*eoi)(struct irq_data *d);
+ void (*set_priority)(unsigned char prio);
+ void (*teardown_cpu)(void);
+ void (*flush_ipi)(void);
+#ifdef CONFIG_SMP
+ void (*cause_ipi)(int cpu, unsigned long data);
+ irq_handler_t ipi_action;
+#endif
+};
+
+extern const struct icp_ops *icp_ops;
+
+/* Native ICS */
+extern int ics_native_init(void);
+
+/* RTAS ICS */
+extern int ics_rtas_init(void);
+
+/* ICS instance, hooked up to chip_data of an irq */
+struct ics {
+ struct list_head link;
+ int (*map)(struct ics *ics, unsigned int virq);
+ void (*mask_unknown)(struct ics *ics, unsigned long vec);
+ long (*get_server)(struct ics *ics, unsigned long vec);
+ int (*host_match)(struct ics *ics, struct device_node *node);
+ char data[];
+};
+
+/* Commons */
+extern unsigned int xics_default_server;
+extern unsigned int xics_default_distrib_server;
+extern unsigned int xics_interrupt_server_size;
+extern struct irq_host *xics_host;
+
+struct xics_cppr {
+ unsigned char stack[MAX_NUM_PRIORITIES];
+ int index;
+};
+
+DECLARE_PER_CPU(struct xics_cppr, xics_cppr);
+
+static inline void xics_push_cppr(unsigned int vec)
+{
+ struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+ if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
+ return;
+
+ if (vec == XICS_IPI)
+ os_cppr->stack[++os_cppr->index] = IPI_PRIORITY;
+ else
+ os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY;
+}
+
+static inline unsigned char xics_pop_cppr(void)
+{
+ struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+ if (WARN_ON(os_cppr->index < 1))
+ return LOWEST_PRIORITY;
+
+ return os_cppr->stack[--os_cppr->index];
+}
+
+static inline void xics_set_base_cppr(unsigned char cppr)
+{
+ struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+ /* we only really want to set the priority when there's
+ * just one cppr value on the stack
+ */
+ WARN_ON(os_cppr->index != 0);
+
+ os_cppr->stack[0] = cppr;
+}
+
+static inline unsigned char xics_cppr_top(void)
+{
+ struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+ return os_cppr->stack[os_cppr->index];
+}
+
+DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message);
+
+extern void xics_init(void);
+extern void xics_setup_cpu(void);
+extern void xics_update_irq_servers(void);
+extern void xics_set_cpu_giq(unsigned int gserver, unsigned int join);
+extern void xics_mask_unknown_vec(unsigned int vec);
+extern irqreturn_t xics_ipi_dispatch(int cpu);
+extern int xics_smp_probe(void);
+extern void xics_register_ics(struct ics *ics);
+extern void xics_teardown_cpu(void);
+extern void xics_kexec_teardown_cpu(int secondary);
+extern void xics_migrate_irqs_away(void);
+#ifdef CONFIG_SMP
+extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
+ unsigned int strict_check);
+#else
+#define xics_get_irq_server(virq, cpumask, strict_check) (xics_default_server)
+#endif
+
+
+#endif /* _XICS_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 3bb2a3e6a33..9aab3631257 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -38,11 +38,14 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
paca.o nvram_64.o firmware.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
+obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power7.o
obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
+obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o
obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o
obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
+obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o
obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o
obj-$(CONFIG_PPC_CLOCK) += clock.o
procfs-y := proc_powerpc.o
@@ -75,7 +78,6 @@ obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o dbell.o
obj-$(CONFIG_PPC_BOOK3E_64) += dbell.o
extra-y := head_$(CONFIG_WORD_SIZE).o
-extra-$(CONFIG_PPC_BOOK3E_32) := head_new_booke.o
extra-$(CONFIG_40x) := head_40x.o
extra-$(CONFIG_44x) := head_44x.o
extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
@@ -103,6 +105,8 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
obj-$(CONFIG_AUDIT) += audit.o
obj64-$(CONFIG_AUDIT) += compat_audit.o
+obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o
+
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 23e6a93145a..36e1c8a29be 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -74,6 +74,7 @@ int main(void)
DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
DEFINE(SIGSEGV, SIGSEGV);
DEFINE(NMI_MASK, NMI_MASK);
+ DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr));
#else
DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
#endif /* CONFIG_PPC64 */
@@ -395,6 +396,7 @@ int main(void)
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
+ DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S
new file mode 100644
index 00000000000..7f818feaa7a
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_a2.S
@@ -0,0 +1,114 @@
+/*
+ * A2 specific assembly support code
+ *
+ * Copyright 2009 Ben Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/processor.h>
+#include <asm/reg_a2.h>
+#include <asm/reg.h>
+#include <asm/thread_info.h>
+
+/*
+ * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity.
+ * This also prevents external LPID accesses but that isn't a problem when not a
+ * guest. Under PV, this setting will be ignored and MMUCR will return the right
+ * number of PID bits we can use.
+ */
+#define MMUCR1_EXTEND_PID \
+ (MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \
+ MMUCR1_DTTID | MMUCR1_DCCD)
+
+/*
+ * Use extended PIDs if enabled.
+ * Don't clear the ERATs on context sync events and enable I & D LRU.
+ * Enable ERAT back invalidate when tlbwe overwrites an entry.
+ */
+#define INITIAL_MMUCR1 \
+ (MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \
+ MMUCR1_DRRE | MMUCR1_TLBWE_BINV)
+
+_GLOBAL(__setup_cpu_a2)
+ /* Some of these are actually thread local and some are
+ * core local but doing it always won't hurt
+ */
+
+#ifdef CONFIG_PPC_WSP_COPRO
+ /* Make sure ACOP starts out as zero */
+ li r3,0
+ mtspr SPRN_ACOP,r3
+
+ /* Enable icswx instruction */
+ mfspr r3,SPRN_A2_CCR2
+ ori r3,r3,A2_CCR2_ENABLE_ICSWX
+ mtspr SPRN_A2_CCR2,r3
+
+ /* Unmask all CTs in HACOP */
+ li r3,-1
+ mtspr SPRN_HACOP,r3
+#endif /* CONFIG_PPC_WSP_COPRO */
+
+ /* Enable doorbell */
+ mfspr r3,SPRN_A2_CCR2
+ oris r3,r3,A2_CCR2_ENABLE_PC@h
+ mtspr SPRN_A2_CCR2,r3
+ isync
+
+ /* Setup CCR0 to disable power saving for now as it's busted
+ * in the current implementations. Setup CCR1 to wake on
+ * interrupts normally (we write the default value but who
+ * knows what FW may have clobbered...)
+ */
+ li r3,0
+ mtspr SPRN_A2_CCR0, r3
+ LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f)
+ mtspr SPRN_A2_CCR1, r3
+
+ /* Initialise MMUCR1 */
+ lis r3,INITIAL_MMUCR1@h
+ ori r3,r3,INITIAL_MMUCR1@l
+ mtspr SPRN_MMUCR1,r3
+
+ /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
+ LOAD_REG_IMMEDIATE(r3, 0x000a7531)
+ mtspr SPRN_MMUCR2,r3
+
+ /* Set MMUCR3 to write all thids bit to the TLB */
+ LOAD_REG_IMMEDIATE(r3, 0x0000000f)
+ mtspr SPRN_MMUCR3,r3
+
+ /* Don't do ERAT stuff if running guest mode */
+ mfmsr r3
+ andis. r0,r3,MSR_GS@h
+ bne 1f
+
+ /* Now set the I-ERAT watermark to 15 */
+ lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h
+ mtspr SPRN_MMUCR0, r4
+ li r4,A2_IERAT_SIZE-1
+ PPC_ERATWE(r4,r4,3)
+
+ /* Now set the D-ERAT watermark to 31 */
+ lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h
+ mtspr SPRN_MMUCR0, r4
+ li r4,A2_DERAT_SIZE-1
+ PPC_ERATWE(r4,r4,3)
+
+ /* And invalidate the beast just in case. That won't get rid of
+ * a bolted entry though it will be in LRU and so will go away eventually
+ * but let's not bother for now
+ */
+ PPC_ERATILX(0,0,0)
+1:
+ blr
+
+_GLOBAL(__restore_cpu_a2)
+ b __setup_cpu_a2
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 913611105c1..8053db02b85 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -88,6 +88,9 @@ _GLOBAL(__setup_cpu_e5500)
bl __e500_dcache_setup
#ifdef CONFIG_PPC_BOOK3E_64
bl .__setup_base_ivors
+ bl .setup_perfmon_ivor
+ bl .setup_doorbell_ivors
+ bl .setup_ehv_ivors
#else
bl __setup_e500mc_ivors
#endif
diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S
new file mode 100644
index 00000000000..4f9a93fcfe0
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power7.S
@@ -0,0 +1,91 @@
+/*
+ * This file contains low level CPU setup functions.
+ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+/* Entry: r3 = crap, r4 = ptr to cputable entry
+ *
+ * Note that we can be called twice for pseudo-PVRs
+ */
+_GLOBAL(__setup_cpu_power7)
+ mflr r11
+ bl __init_hvmode_206
+ mtlr r11
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ bl __init_LPCR
+ bl __init_TLB
+ mtlr r11
+ blr
+
+_GLOBAL(__restore_cpu_power7)
+ mflr r11
+ mfmsr r3
+ rldicl. r0,r3,4,63
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ bl __init_LPCR
+ bl __init_TLB
+ mtlr r11
+ blr
+
+__init_hvmode_206:
+ /* Disable CPU_FTR_HVMODE_206 and exit if MSR:HV is not set */
+ mfmsr r3
+ rldicl. r0,r3,4,63
+ bnelr
+ ld r5,CPU_SPEC_FEATURES(r4)
+ LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE_206)
+ xor r5,r5,r6
+ std r5,CPU_SPEC_FEATURES(r4)
+ blr
+
+__init_LPCR:
+ /* Setup a sane LPCR:
+ *
+ * LPES = 0b01 (HSRR0/1 used for 0x500)
+ * PECE = 0b111
+ * DPFD = 4
+ *
+ * Other bits untouched for now
+ */
+ mfspr r3,SPRN_LPCR
+ ori r3,r3,(LPCR_LPES0|LPCR_LPES1)
+ xori r3,r3, LPCR_LPES0
+ ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
+ li r5,7
+ sldi r5,r5,LPCR_DPFD_SH
+ andc r3,r3,r5
+ li r5,4
+ sldi r5,r5,LPCR_DPFD_SH
+ or r3,r3,r5
+ mtspr SPRN_LPCR,r3
+ isync
+ blr
+
+__init_TLB:
+ /* Clear the TLB */
+ li r6,128
+ mtctr r6
+ li r7,0xc00 /* IS field = 0b11 */
+ ptesync
+2: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 2b
+ ptesync
+1: blr
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b9602ee06de..34d2722b945 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -62,10 +62,12 @@ extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_a2(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_pa6t(void);
extern void __restore_cpu_ppc970(void);
extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power7(void);
+extern void __restore_cpu_a2(void);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_E500)
extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -199,7 +201,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER4 (gp)",
.cpu_features = CPU_FTRS_POWER4,
.cpu_user_features = COMMON_USER_POWER4,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_POWER4,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -214,7 +216,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER4+ (gq)",
.cpu_features = CPU_FTRS_POWER4,
.cpu_user_features = COMMON_USER_POWER4,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_POWER4,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -230,7 +232,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_features = CPU_FTRS_PPC970,
.cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_PPC970,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -248,7 +250,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_features = CPU_FTRS_PPC970,
.cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_PPC970,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -284,7 +286,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_features = CPU_FTRS_PPC970,
.cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_PPC970,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -302,7 +304,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_features = CPU_FTRS_PPC970,
.cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_PPC970,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -318,7 +320,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER5 (gr)",
.cpu_features = CPU_FTRS_POWER5,
.cpu_user_features = COMMON_USER_POWER5,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_POWER5,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
@@ -338,7 +340,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER5+ (gs)",
.cpu_features = CPU_FTRS_POWER5,
.cpu_user_features = COMMON_USER_POWER5_PLUS,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_POWER5,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
@@ -354,7 +356,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER5+ (gs)",
.cpu_features = CPU_FTRS_POWER5,
.cpu_user_features = COMMON_USER_POWER5_PLUS,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_POWER5,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
@@ -371,7 +373,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER5+",
.cpu_features = CPU_FTRS_POWER5,
.cpu_user_features = COMMON_USER_POWER5_PLUS,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_POWER5,
.icache_bsize = 128,
.dcache_bsize = 128,
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
@@ -385,7 +387,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_features = CPU_FTRS_POWER6,
.cpu_user_features = COMMON_USER_POWER6 |
PPC_FEATURE_POWER6_EXT,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_POWER6,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
@@ -404,7 +406,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER6 (architected)",
.cpu_features = CPU_FTRS_POWER6,
.cpu_user_features = COMMON_USER_POWER6,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_POWER6,
.icache_bsize = 128,
.dcache_bsize = 128,
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
@@ -417,12 +419,13 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER7 (architected)",
.cpu_features = CPU_FTRS_POWER7,
.cpu_user_features = COMMON_USER_POWER7,
- .mmu_features = MMU_FTR_HPTE_TABLE |
- MMU_FTR_TLBIE_206,
+ .mmu_features = MMU_FTRS_POWER7,
.icache_bsize = 128,
.dcache_bsize = 128,
.oprofile_type = PPC_OPROFILE_POWER4,
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
.platform = "power7",
},
{ /* Power7 */
@@ -431,14 +434,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER7 (raw)",
.cpu_features = CPU_FTRS_POWER7,
.cpu_user_features = COMMON_USER_POWER7,
- .mmu_features = MMU_FTR_HPTE_TABLE |
- MMU_FTR_TLBIE_206,
+ .mmu_features = MMU_FTRS_POWER7,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power7",
.oprofile_type = PPC_OPROFILE_POWER4,
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
.platform = "power7",
},
{ /* Power7+ */
@@ -447,14 +451,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER7+ (raw)",
.cpu_features = CPU_FTRS_POWER7,
.cpu_user_features = COMMON_USER_POWER7,
- .mmu_features = MMU_FTR_HPTE_TABLE |
- MMU_FTR_TLBIE_206,
+ .mmu_features = MMU_FTRS_POWER7,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power7",
.oprofile_type = PPC_OPROFILE_POWER4,
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
.platform = "power7+",
},
{ /* Cell Broadband Engine */
@@ -465,7 +470,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_user_features = COMMON_USER_PPC64 |
PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP |
PPC_FEATURE_SMT,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_CELL,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 4,
@@ -480,7 +485,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "PA6T",
.cpu_features = CPU_FTRS_PA6T,
.cpu_user_features = COMMON_USER_PA6T,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_PA6T,
.icache_bsize = 64,
.dcache_bsize = 64,
.num_pmcs = 6,
@@ -497,7 +502,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER4 (compatible)",
.cpu_features = CPU_FTRS_COMPATIBLE,
.cpu_user_features = COMMON_USER_PPC64,
- .mmu_features = MMU_FTR_HPTE_TABLE,
+ .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
@@ -2005,7 +2010,22 @@ static struct cpu_spec __initdata cpu_specs[] = {
#endif /* CONFIG_PPC32 */
#endif /* CONFIG_E500 */
-#ifdef CONFIG_PPC_BOOK3E_64
+#ifdef CONFIG_PPC_A2
+ { /* Standard A2 (>= DD2) + FPU core */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00480000,
+ .cpu_name = "A2 (>= DD2)",
+ .cpu_features = CPU_FTRS_A2,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTRS_A2,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 0,
+ .cpu_setup = __setup_cpu_a2,
+ .cpu_restore = __restore_cpu_a2,
+ .machine_check = machine_check_generic,
+ .platform = "ppca2",
+ },
{ /* This is a default entry to get going, to be replaced by
* a real one at some stage
*/
@@ -2026,7 +2046,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "power6",
},
-#endif
+#endif /* CONFIG_PPC_A2 */
};
static struct cpu_spec the_cpu_spec;
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 5b5e1f002a8..4e6ee944495 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -64,9 +64,9 @@ void crash_ipi_callback(struct pt_regs *regs)
return;
hard_irq_disable();
- if (!cpu_isset(cpu, cpus_in_crash))
+ if (!cpumask_test_cpu(cpu, &cpus_in_crash))
crash_save_cpu(regs, cpu);
- cpu_set(cpu, cpus_in_crash);
+ cpumask_set_cpu(cpu, &cpus_in_crash);
/*
* Entered via soft-reset - could be the kdump
@@ -77,8 +77,8 @@ void crash_ipi_callback(struct pt_regs *regs)
* Tell the kexec CPU that entered via soft-reset and ready
* to go down.
*/
- if (cpu_isset(cpu, cpus_in_sr)) {
- cpu_clear(cpu, cpus_in_sr);
+ if (cpumask_test_cpu(cpu, &cpus_in_sr)) {
+ cpumask_clear_cpu(cpu, &cpus_in_sr);
atomic_inc(&enter_on_soft_reset);
}
@@ -87,7 +87,7 @@ void crash_ipi_callback(struct pt_regs *regs)
* This barrier is needed to make sure that all CPUs are stopped.
* If not, soft-reset will be invoked to bring other CPUs.
*/
- while (!cpu_isset(crashing_cpu, cpus_in_crash))
+ while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash))
cpu_relax();
if (ppc_md.kexec_cpu_down)
@@ -109,7 +109,7 @@ static void crash_soft_reset_check(int cpu)
{
unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
- cpu_clear(cpu, cpus_in_sr);
+ cpumask_clear_cpu(cpu, &cpus_in_sr);
while (atomic_read(&enter_on_soft_reset) != ncpus)
cpu_relax();
}
@@ -132,7 +132,7 @@ static void crash_kexec_prepare_cpus(int cpu)
*/
printk(KERN_EMERG "Sending IPI to other cpus...\n");
msecs = 10000;
- while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) {
+ while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) {
cpu_relax();
mdelay(1);
}
@@ -144,52 +144,24 @@ static void crash_kexec_prepare_cpus(int cpu)
* user to do soft reset such that we get all.
* Soft-reset will be used until better mechanism is implemented.
*/
- if (cpus_weight(cpus_in_crash) < ncpus) {
+ if (cpumask_weight(&cpus_in_crash) < ncpus) {
printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n",
- ncpus - cpus_weight(cpus_in_crash));
+ ncpus - cpumask_weight(&cpus_in_crash));
printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n");
- cpus_in_sr = CPU_MASK_NONE;
+ cpumask_clear(&cpus_in_sr);
atomic_set(&enter_on_soft_reset, 0);
- while (cpus_weight(cpus_in_crash) < ncpus)
+ while (cpumask_weight(&cpus_in_crash) < ncpus)
cpu_relax();
}
/*
* Make sure all CPUs are entered via soft-reset if the kdump is
* invoked using soft-reset.
*/
- if (cpu_isset(cpu, cpus_in_sr))
+ if (cpumask_test_cpu(cpu, &cpus_in_sr))
crash_soft_reset_check(cpu);
/* Leave the IPI callback set */
}
-/* wait for all the CPUs to hit real mode but timeout if they don't come in */
-#ifdef CONFIG_PPC_STD_MMU_64
-static void crash_kexec_wait_realmode(int cpu)
-{
- unsigned int msecs;
- int i;
-
- msecs = 10000;
- for (i=0; i < NR_CPUS && msecs > 0; i++) {
- if (i == cpu)
- continue;
-
- while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
- barrier();
- if (!cpu_possible(i)) {
- break;
- }
- if (!cpu_online(i)) {
- break;
- }
- msecs--;
- mdelay(1);
- }
- }
- mb();
-}
-#endif /* CONFIG_PPC_STD_MMU_64 */
-
/*
* This function will be called by secondary cpus or by kexec cpu
* if soft-reset is activated to stop some CPUs.
@@ -210,7 +182,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
* exited using 'x'(exit and recover) or
* kexec_should_crash() failed for all running tasks.
*/
- cpu_clear(cpu, cpus_in_sr);
+ cpumask_clear_cpu(cpu, &cpus_in_sr);
local_irq_restore(flags);
return;
}
@@ -224,7 +196,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
* then start kexec boot.
*/
crash_soft_reset_check(cpu);
- cpu_set(crashing_cpu, cpus_in_crash);
+ cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(1, 0);
machine_kexec(kexec_crash_image);
@@ -234,7 +206,6 @@ void crash_kexec_secondary(struct pt_regs *regs)
}
#else /* ! CONFIG_SMP */
-static inline void crash_kexec_wait_realmode(int cpu) {}
static void crash_kexec_prepare_cpus(int cpu)
{
@@ -253,10 +224,40 @@ static void crash_kexec_prepare_cpus(int cpu)
void crash_kexec_secondary(struct pt_regs *regs)
{
- cpus_in_sr = CPU_MASK_NONE;
+ cpumask_clear(&cpus_in_sr);
}
#endif /* CONFIG_SMP */
+/* wait for all the CPUs to hit real mode but timeout if they don't come in */
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64)
+static void crash_kexec_wait_realmode(int cpu)
+{
+ unsigned int msecs;
+ int i;
+
+ msecs = 10000;
+ for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
+ if (i == cpu)
+ continue;
+
+ while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+ barrier();
+ if (!cpu_possible(i)) {
+ break;
+ }
+ if (!cpu_online(i)) {
+ break;
+ }
+ msecs--;
+ mdelay(1);
+ }
+ }
+ mb();
+}
+#else
+static inline void crash_kexec_wait_realmode(int cpu) {}
+#endif /* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */
+
/*
* Register a function to be called on shutdown. Only use this if you
* can't reset your device in the second kernel.
@@ -345,7 +346,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
crashing_cpu = smp_processor_id();
crash_save_cpu(regs, crashing_cpu);
crash_kexec_prepare_cpus(crashing_cpu);
- cpu_set(crashing_cpu, cpus_in_crash);
+ cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
crash_kexec_wait_realmode(crashing_cpu);
machine_kexec_mask_interrupts();
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 3307a52d797..2cc451aaaca 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -13,84 +13,35 @@
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/threads.h>
-#include <linux/percpu.h>
+#include <linux/hardirq.h>
#include <asm/dbell.h>
#include <asm/irq_regs.h>
#ifdef CONFIG_SMP
-struct doorbell_cpu_info {
- unsigned long messages; /* current messages bits */
- unsigned int tag; /* tag value */
-};
-
-static DEFINE_PER_CPU(struct doorbell_cpu_info, doorbell_cpu_info);
-
void doorbell_setup_this_cpu(void)
{
- struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
+ unsigned long tag = mfspr(SPRN_PIR) & 0x3fff;
- info->messages = 0;
- info->tag = mfspr(SPRN_PIR) & 0x3fff;
+ smp_muxed_ipi_set_data(smp_processor_id(), tag);
}
-void doorbell_message_pass(int target, int msg)
+void doorbell_cause_ipi(int cpu, unsigned long data)
{
- struct doorbell_cpu_info *info;
- int i;
-
- if (target < NR_CPUS) {
- info = &per_cpu(doorbell_cpu_info, target);
- set_bit(msg, &info->messages);
- ppc_msgsnd(PPC_DBELL, 0, info->tag);
- }
- else if (target == MSG_ALL_BUT_SELF) {
- for_each_online_cpu(i) {
- if (i == smp_processor_id())
- continue;
- info = &per_cpu(doorbell_cpu_info, i);
- set_bit(msg, &info->messages);
- ppc_msgsnd(PPC_DBELL, 0, info->tag);
- }
- }
- else { /* target == MSG_ALL */
- for_each_online_cpu(i) {
- info = &per_cpu(doorbell_cpu_info, i);
- set_bit(msg, &info->messages);
- }
- ppc_msgsnd(PPC_DBELL, PPC_DBELL_MSG_BRDCAST, 0);
- }
+ ppc_msgsnd(PPC_DBELL, 0, data);
}
void doorbell_exception(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
- int msg;
- /* Warning: regs can be NULL when called from irq enable */
+ irq_enter();
- if (!info->messages || (num_online_cpus() < 2))
- goto out;
+ smp_ipi_demux();
- for (msg = 0; msg < 4; msg++)
- if (test_and_clear_bit(msg, &info->messages))
- smp_message_recv(msg);
-
-out:
+ irq_exit();
set_irq_regs(old_regs);
}
-
-void doorbell_check_self(void)
-{
- struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
-
- if (!info->messages)
- return;
-
- ppc_msgsnd(PPC_DBELL, 0, info->tag);
-}
-
#else /* CONFIG_SMP */
void doorbell_exception(struct pt_regs *regs)
{
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d82878c4daa..d834425186a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -421,6 +421,12 @@ BEGIN_FTR_SECTION
std r24,THREAD_VRSAVE(r3)
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ mfspr r25,SPRN_DSCR
+ std r25,THREAD_DSCR(r3)
+END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
+#endif
and. r0,r0,r22
beq+ 1f
andc r22,r22,r0
@@ -462,10 +468,10 @@ BEGIN_FTR_SECTION
FTR_SECTION_ELSE_NESTED(95)
clrrdi r6,r8,40 /* get its 1T ESID */
clrrdi r9,r1,40 /* get current sp 1T ESID */
- ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_1T_SEGMENT, 95)
+ ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95)
FTR_SECTION_ELSE
b 2f
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_SLB)
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB)
clrldi. r0,r6,2 /* is new ESID c00000000? */
cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */
cror eq,4*cr1+eq,eq
@@ -479,7 +485,7 @@ BEGIN_FTR_SECTION
li r9,MMU_SEGSIZE_1T /* insert B field */
oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
/* Update the last bolted SLB. No write barriers are needed
* here, provided we only update the current CPU's SLB shadow
@@ -491,7 +497,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
std r7,SLBSHADOW_STACKVSID(r9) /* Save VSID */
std r0,SLBSHADOW_STACKESID(r9) /* Save ESID */
- /* No need to check for CPU_FTR_NO_SLBIE_B here, since when
+ /* No need to check for MMU_FTR_NO_SLBIE_B here, since when
* we have 1TB segments, the only CPUs known to have the errata
* only support less than 1TB of system memory and we'll never
* actually hit this code path.
@@ -522,6 +528,15 @@ BEGIN_FTR_SECTION
mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ ld r0,THREAD_DSCR(r4)
+ cmpd r0,r25
+ beq 1f
+ mtspr SPRN_DSCR,r0
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
+#endif
/* r3-r13 are destroyed -- Cort */
REST_8GPRS(14, r1)
@@ -838,7 +853,7 @@ _GLOBAL(enter_rtas)
_STATIC(rtas_return_loc)
/* relocation is off at this point */
- mfspr r4,SPRN_SPRG_PACA /* Get PACA */
+ GET_PACA(r4)
clrldi r4,r4,2 /* convert to realmode address */
bcl 20,31,$+4
@@ -869,7 +884,7 @@ _STATIC(rtas_restore_regs)
REST_8GPRS(14, r1) /* Restore the non-volatiles */
REST_10GPRS(22, r1) /* ditto */
- mfspr r13,SPRN_SPRG_PACA
+ GET_PACA(r13)
ld r4,_CCR(r1)
mtcr r4
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 9651acc3504..d24d4400cc7 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -17,6 +17,7 @@
#include <asm/cputable.h>
#include <asm/setup.h>
#include <asm/thread_info.h>
+#include <asm/reg_a2.h>
#include <asm/exception-64e.h>
#include <asm/bug.h>
#include <asm/irqflags.h>
@@ -252,9 +253,6 @@ exception_marker:
.balign 0x1000
.globl interrupt_base_book3e
interrupt_base_book3e: /* fake trap */
- /* Note: If real debug exceptions are supported by the HW, the vector
- * below will have to be patched up to point to an appropriate handler
- */
EXCEPTION_STUB(0x000, machine_check) /* 0x0200 */
EXCEPTION_STUB(0x020, critical_input) /* 0x0580 */
EXCEPTION_STUB(0x040, debug_crit) /* 0x0d00 */
@@ -271,8 +269,13 @@ interrupt_base_book3e: /* fake trap */
EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */
EXCEPTION_STUB(0x1c0, data_tlb_miss)
EXCEPTION_STUB(0x1e0, instruction_tlb_miss)
+ EXCEPTION_STUB(0x260, perfmon)
EXCEPTION_STUB(0x280, doorbell)
EXCEPTION_STUB(0x2a0, doorbell_crit)
+ EXCEPTION_STUB(0x2c0, guest_doorbell)
+ EXCEPTION_STUB(0x2e0, guest_doorbell_crit)
+ EXCEPTION_STUB(0x300, hypercall)
+ EXCEPTION_STUB(0x320, ehpriv)
.globl interrupt_end_book3e
interrupt_end_book3e:
@@ -454,6 +457,70 @@ interrupt_end_book3e:
kernel_dbg_exc:
b . /* NYI */
+/* Debug exception as a debug interrupt*/
+ START_EXCEPTION(debug_debug);
+ DBG_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS)
+
+ /*
+ * If there is a single step or branch-taken exception in an
+ * exception entry sequence, it was probably meant to apply to
+ * the code where the exception occurred (since exception entry
+ * doesn't turn off DE automatically). We simulate the effect
+ * of turning off DE on entry to an exception handler by turning
+ * off DE in the DSRR1 value and clearing the debug status.
+ */
+
+ mfspr r14,SPRN_DBSR /* check single-step/branch taken */
+ andis. r15,r14,DBSR_IC@h
+ beq+ 1f
+
+ LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
+ LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e)
+ cmpld cr0,r10,r14
+ cmpld cr1,r10,r15
+ blt+ cr0,1f
+ bge+ cr1,1f
+
+ /* here it looks like we got an inappropriate debug exception. */
+ lis r14,DBSR_IC@h /* clear the IC event */
+ rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value */
+ mtspr SPRN_DBSR,r14
+ mtspr SPRN_DSRR1,r11
+ lwz r10,PACA_EXDBG+EX_CR(r13) /* restore registers */
+ ld r1,PACA_EXDBG+EX_R1(r13)
+ ld r14,PACA_EXDBG+EX_R14(r13)
+ ld r15,PACA_EXDBG+EX_R15(r13)
+ mtcr r10
+ ld r10,PACA_EXDBG+EX_R10(r13) /* restore registers */
+ ld r11,PACA_EXDBG+EX_R11(r13)
+ mfspr r13,SPRN_SPRG_DBG_SCRATCH
+ rfdi
+
+ /* Normal debug exception */
+ /* XXX We only handle coming from userspace for now since we can't
+ * quite save properly an interrupted kernel state yet
+ */
+1: andi. r14,r11,MSR_PR; /* check for userspace again */
+ beq kernel_dbg_exc; /* if from kernel mode */
+
+ /* Now we mash up things to make it look like we are coming on a
+ * normal exception
+ */
+ mfspr r15,SPRN_SPRG_DBG_SCRATCH
+ mtspr SPRN_SPRG_GEN_SCRATCH,r15
+ mfspr r14,SPRN_DBSR
+ EXCEPTION_COMMON(0xd00, PACA_EXDBG, INTS_DISABLE_ALL)
+ std r14,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ mr r4,r14
+ ld r14,PACA_EXDBG+EX_R14(r13)
+ ld r15,PACA_EXDBG+EX_R15(r13)
+ bl .save_nvgprs
+ bl .DebugException
+ b .ret_from_except
+
+ MASKABLE_EXCEPTION(0x260, perfmon, .performance_monitor_exception, ACK_NONE)
+
/* Doorbell interrupt */
MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE)
@@ -468,6 +535,11 @@ kernel_dbg_exc:
// b ret_from_crit_except
b .
+ MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE)
+ MASKABLE_EXCEPTION(0x2e0, guest_doorbell_crit, .unknown_exception, ACK_NONE)
+ MASKABLE_EXCEPTION(0x310, hypercall, .unknown_exception, ACK_NONE)
+ MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE)
+
/*
* An interrupt came in while soft-disabled; clear EE in SRR1,
@@ -587,7 +659,12 @@ fast_exception_return:
BAD_STACK_TRAMPOLINE(0x000)
BAD_STACK_TRAMPOLINE(0x100)
BAD_STACK_TRAMPOLINE(0x200)
+BAD_STACK_TRAMPOLINE(0x260)
+BAD_STACK_TRAMPOLINE(0x2c0)
+BAD_STACK_TRAMPOLINE(0x2e0)
BAD_STACK_TRAMPOLINE(0x300)
+BAD_STACK_TRAMPOLINE(0x310)
+BAD_STACK_TRAMPOLINE(0x320)
BAD_STACK_TRAMPOLINE(0x400)
BAD_STACK_TRAMPOLINE(0x500)
BAD_STACK_TRAMPOLINE(0x600)
@@ -864,8 +941,23 @@ have_hes:
* that will have to be made dependent on whether we are running under
* a hypervisor I suppose.
*/
- ori r3,r3,MAS0_HES | MAS0_WQ_ALLWAYS
- mtspr SPRN_MAS0,r3
+
+ /* BEWARE, MAGIC
+ * This code is called as an ordinary function on the boot CPU. But to
+ * avoid duplication, this code is also used in SCOM bringup of
+ * secondary CPUs. We read the code between the initial_tlb_code_start
+ * and initial_tlb_code_end labels one instruction at a time and RAM it
+ * into the new core via SCOM. That doesn't process branches, so there
+ * must be none between those two labels. It also means if this code
+ * ever takes any parameters, the SCOM code must also be updated to
+ * provide them.
+ */
+ .globl a2_tlbinit_code_start
+a2_tlbinit_code_start:
+
+ ori r11,r3,MAS0_WQ_ALLWAYS
+ oris r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */
+ mtspr SPRN_MAS0,r11
lis r3,(MAS1_VALID | MAS1_IPROT)@h
ori r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT
mtspr SPRN_MAS1,r3
@@ -879,18 +971,86 @@ have_hes:
/* Write the TLB entry */
tlbwe
+ .globl a2_tlbinit_after_linear_map
+a2_tlbinit_after_linear_map:
+
/* Now we branch the new virtual address mapped by this entry */
LOAD_REG_IMMEDIATE(r3,1f)
mtctr r3
bctr
1: /* We are now running at PAGE_OFFSET, clean the TLB of everything
- * else (XXX we should scan for bolted crap from the firmware too)
+ * else (including IPROTed things left by firmware)
+ * r4 = TLBnCFG
+ * r3 = current address (more or less)
*/
+
+ li r5,0
+ mtspr SPRN_MAS6,r5
+ tlbsx 0,r3
+
+ rlwinm r9,r4,0,TLBnCFG_N_ENTRY
+ rlwinm r10,r4,8,0xff
+ addi r10,r10,-1 /* Get inner loop mask */
+
+ li r3,1
+
+ mfspr r5,SPRN_MAS1
+ rlwinm r5,r5,0,(~(MAS1_VALID|MAS1_IPROT))
+
+ mfspr r6,SPRN_MAS2
+ rldicr r6,r6,0,51 /* Extract EPN */
+
+ mfspr r7,SPRN_MAS0
+ rlwinm r7,r7,0,0xffff0fff /* Clear HES and WQ */
+
+ rlwinm r8,r7,16,0xfff /* Extract ESEL */
+
+2: add r4,r3,r8
+ and r4,r4,r10
+
+ rlwimi r7,r4,16,MAS0_ESEL_MASK
+
+ mtspr SPRN_MAS0,r7
+ mtspr SPRN_MAS1,r5
+ mtspr SPRN_MAS2,r6
+ tlbwe
+
+ addi r3,r3,1
+ and. r4,r3,r10
+
+ bne 3f
+ addis r6,r6,(1<<30)@h
+3:
+ cmpw r3,r9
+ blt 2b
+
+ .globl a2_tlbinit_after_iprot_flush
+a2_tlbinit_after_iprot_flush:
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+ /* Now establish early debug mappings if applicable */
+ /* Restore the MAS0 we used for linear mapping load */
+ mtspr SPRN_MAS0,r11
+
+ lis r3,(MAS1_VALID | MAS1_IPROT)@h
+ ori r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT)
+ mtspr SPRN_MAS1,r3
+ LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G)
+ mtspr SPRN_MAS2,r3
+ LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW)
+ mtspr SPRN_MAS7_MAS3,r3
+ /* re-use the MAS8 value from the linear mapping */
+ tlbwe
+#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
+
PPC_TLBILX(0,0,0)
sync
isync
+ .globl a2_tlbinit_code_end
+a2_tlbinit_code_end:
+
/* We translate LR and return */
mflr r3
tovirt(r3,r3)
@@ -1040,3 +1200,33 @@ _GLOBAL(__setup_base_ivors)
sync
blr
+
+_GLOBAL(setup_perfmon_ivor)
+ SET_IVOR(35, 0x260) /* Performance Monitor */
+ blr
+
+_GLOBAL(setup_doorbell_ivors)
+ SET_IVOR(36, 0x280) /* Processor Doorbell */
+ SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */
+
+ /* Check MMUCFG[LPIDSIZE] to determine if we have category E.HV */
+ mfspr r10,SPRN_MMUCFG
+ rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
+ beqlr
+
+ SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */
+ SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */
+ blr
+
+_GLOBAL(setup_ehv_ivors)
+ /*
+ * We may be running as a guest and lack E.HV even on a chip
+ * that normally has it.
+ */
+ mfspr r10,SPRN_MMUCFG
+ rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
+ beqlr
+
+ SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */
+ SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */
+ blr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index aeb739e1876..a85f4874cba 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -37,23 +37,51 @@
.globl __start_interrupts
__start_interrupts:
- STD_EXCEPTION_PSERIES(0x100, system_reset)
+ .globl system_reset_pSeries;
+system_reset_pSeries:
+ HMT_MEDIUM;
+ DO_KVM 0x100;
+ SET_SCRATCH0(r13)
+#ifdef CONFIG_PPC_P7_NAP
+BEGIN_FTR_SECTION
+ /* Running native on arch 2.06 or later, check if we are
+ * waking up from nap. We only handle no state loss and
+ * supervisor state loss. We do -not- handle hypervisor
+ * state loss at this time.
+ */
+ mfspr r13,SPRN_SRR1
+ rlwinm r13,r13,47-31,30,31
+ cmpwi cr0,r13,1
+ bne 1f
+ b .power7_wakeup_noloss
+1: cmpwi cr0,r13,2
+ bne 1f
+ b .power7_wakeup_loss
+ /* Total loss of HV state is fatal, we could try to use the
+ * PIR to locate a PACA, then use an emergency stack etc...
+ * but for now, let's just stay stuck here
+ */
+1: cmpwi cr0,r13,3
+ beq .
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206)
+#endif /* CONFIG_PPC_P7_NAP */
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD)
. = 0x200
_machine_check_pSeries:
HMT_MEDIUM
DO_KVM 0x200
- mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */
- EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD)
. = 0x300
.globl data_access_pSeries
data_access_pSeries:
HMT_MEDIUM
DO_KVM 0x300
- mtspr SPRN_SPRG_SCRATCH0,r13
+ SET_SCRATCH0(r13)
BEGIN_FTR_SECTION
- mfspr r13,SPRN_SPRG_PACA
+ GET_PACA(r13)
std r9,PACA_EXSLB+EX_R9(r13)
std r10,PACA_EXSLB+EX_R10(r13)
mfspr r10,SPRN_DAR
@@ -67,22 +95,22 @@ BEGIN_FTR_SECTION
std r11,PACA_EXGEN+EX_R11(r13)
ld r11,PACA_EXSLB+EX_R9(r13)
std r12,PACA_EXGEN+EX_R12(r13)
- mfspr r12,SPRN_SPRG_SCRATCH0
+ GET_SCRATCH0(r12)
std r10,PACA_EXGEN+EX_R10(r13)
std r11,PACA_EXGEN+EX_R9(r13)
std r12,PACA_EXGEN+EX_R13(r13)
- EXCEPTION_PROLOG_PSERIES_1(data_access_common)
+ EXCEPTION_PROLOG_PSERIES_1(data_access_common, EXC_STD)
FTR_SECTION_ELSE
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common)
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB)
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB)
. = 0x380
.globl data_access_slb_pSeries
data_access_slb_pSeries:
HMT_MEDIUM
DO_KVM 0x380
- mtspr SPRN_SPRG_SCRATCH0,r13
- mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */
+ SET_SCRATCH0(r13)
+ GET_PACA(r13)
std r3,PACA_EXSLB+EX_R3(r13)
mfspr r3,SPRN_DAR
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
@@ -95,7 +123,7 @@ data_access_slb_pSeries:
std r10,PACA_EXSLB+EX_R10(r13)
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
- mfspr r10,SPRN_SPRG_SCRATCH0
+ GET_SCRATCH0(r10)
std r10,PACA_EXSLB+EX_R13(r13)
mfspr r12,SPRN_SRR1 /* and SRR1 */
#ifndef CONFIG_RELOCATABLE
@@ -113,15 +141,15 @@ data_access_slb_pSeries:
bctr
#endif
- STD_EXCEPTION_PSERIES(0x400, instruction_access)
+ STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access)
. = 0x480
.globl instruction_access_slb_pSeries
instruction_access_slb_pSeries:
HMT_MEDIUM
DO_KVM 0x480
- mtspr SPRN_SPRG_SCRATCH0,r13
- mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */
+ SET_SCRATCH0(r13)
+ GET_PACA(r13)
std r3,PACA_EXSLB+EX_R3(r13)
mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
@@ -134,7 +162,7 @@ instruction_access_slb_pSeries:
std r10,PACA_EXSLB+EX_R10(r13)
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
- mfspr r10,SPRN_SPRG_SCRATCH0
+ GET_SCRATCH0(r10)
std r10,PACA_EXSLB+EX_R13(r13)
mfspr r12,SPRN_SRR1 /* and SRR1 */
#ifndef CONFIG_RELOCATABLE
@@ -147,13 +175,29 @@ instruction_access_slb_pSeries:
bctr
#endif
- MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt)
- STD_EXCEPTION_PSERIES(0x600, alignment)
- STD_EXCEPTION_PSERIES(0x700, program_check)
- STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
- MASKABLE_EXCEPTION_PSERIES(0x900, decrementer)
- STD_EXCEPTION_PSERIES(0xa00, trap_0a)
- STD_EXCEPTION_PSERIES(0xb00, trap_0b)
+ /* We open code these as we can't have a ". = x" (even with
+ * x = "." within a feature section
+ */
+ . = 0x500;
+ .globl hardware_interrupt_pSeries;
+ .globl hardware_interrupt_hv;
+hardware_interrupt_pSeries:
+hardware_interrupt_hv:
+ BEGIN_FTR_SECTION
+ _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD)
+ FTR_SECTION_ELSE
+ _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV)
+ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206)
+
+ STD_EXCEPTION_PSERIES(0x600, 0x600, alignment)
+ STD_EXCEPTION_PSERIES(0x700, 0x700, program_check)
+ STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable)
+
+ MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer)
+ MASKABLE_EXCEPTION_HV(0x980, 0x980, decrementer)
+
+ STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a)
+ STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b)
. = 0xc00
.globl system_call_pSeries
@@ -165,13 +209,13 @@ BEGIN_FTR_SECTION
beq- 1f
END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
mr r9,r13
- mfspr r13,SPRN_SPRG_PACA
+ GET_PACA(r13)
mfspr r11,SPRN_SRR0
- ld r12,PACAKBASE(r13)
- ld r10,PACAKMSR(r13)
- LOAD_HANDLER(r12, system_call_entry)
- mtspr SPRN_SRR0,r12
mfspr r12,SPRN_SRR1
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10, system_call_entry)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
rfid
b . /* prevent speculative execution */
@@ -183,8 +227,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
rfid /* return to userspace */
b .
- STD_EXCEPTION_PSERIES(0xd00, single_step)
- STD_EXCEPTION_PSERIES(0xe00, trap_0e)
+ STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
+
+ /* At 0xe??? we have a bunch of hypervisor exceptions, we branch
+ * out of line to handle them
+ */
+ . = 0xe00
+ b h_data_storage_hv
+ . = 0xe20
+ b h_instr_storage_hv
+ . = 0xe40
+ b emulation_assist_hv
+ . = 0xe50
+ b hmi_exception_hv
+ . = 0xe60
+ b hmi_exception_hv
/* We need to deal with the Altivec unavailable exception
* here which is at 0xf20, thus in the middle of the
@@ -193,39 +250,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
*/
performance_monitor_pSeries_1:
. = 0xf00
- DO_KVM 0xf00
b performance_monitor_pSeries
altivec_unavailable_pSeries_1:
. = 0xf20
- DO_KVM 0xf20
b altivec_unavailable_pSeries
vsx_unavailable_pSeries_1:
. = 0xf40
- DO_KVM 0xf40
b vsx_unavailable_pSeries
#ifdef CONFIG_CBE_RAS
- HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error)
+ STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
#endif /* CONFIG_CBE_RAS */
- STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
+ STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint)
#ifdef CONFIG_CBE_RAS
- HSTD_EXCEPTION_PSERIES(0x1600, cbe_maintenance)
+ STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance)
#endif /* CONFIG_CBE_RAS */
- STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
+ STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist)
#ifdef CONFIG_CBE_RAS
- HSTD_EXCEPTION_PSERIES(0x1800, cbe_thermal)
+ STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
#endif /* CONFIG_CBE_RAS */
. = 0x3000
-/*** pSeries interrupt support ***/
+/*** Out of line interrupts support ***/
+
+ /* moved from 0xe00 */
+ STD_EXCEPTION_HV(., 0xe00, h_data_storage)
+ STD_EXCEPTION_HV(., 0xe20, h_instr_storage)
+ STD_EXCEPTION_HV(., 0xe40, emulation_assist)
+ STD_EXCEPTION_HV(., 0xe60, hmi_exception) /* need to flush cache ? */
/* moved from 0xf00 */
- STD_EXCEPTION_PSERIES(., performance_monitor)
- STD_EXCEPTION_PSERIES(., altivec_unavailable)
- STD_EXCEPTION_PSERIES(., vsx_unavailable)
+ STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor)
+ STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable)
+ STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable)
/*
* An interrupt came in while soft-disabled; clear EE in SRR1,
@@ -240,17 +300,30 @@ masked_interrupt:
rotldi r10,r10,16
mtspr SPRN_SRR1,r10
ld r10,PACA_EXGEN+EX_R10(r13)
- mfspr r13,SPRN_SPRG_SCRATCH0
+ GET_SCRATCH0(r13)
rfid
b .
+masked_Hinterrupt:
+ stb r10,PACAHARDIRQEN(r13)
+ mtcrf 0x80,r9
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ mfspr r10,SPRN_HSRR1
+ rldicl r10,r10,48,1 /* clear MSR_EE */
+ rotldi r10,r10,16
+ mtspr SPRN_HSRR1,r10
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ GET_SCRATCH0(r13)
+ hrfid
+ b .
+
.align 7
do_stab_bolted_pSeries:
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
- mfspr r10,SPRN_SPRG_SCRATCH0
+ GET_SCRATCH0(r10)
std r10,PACA_EXSLB+EX_R13(r13)
- EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted)
+ EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
#ifdef CONFIG_PPC_PSERIES
/*
@@ -260,15 +333,15 @@ do_stab_bolted_pSeries:
.align 7
system_reset_fwnmi:
HMT_MEDIUM
- mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
+ SET_SCRATCH0(r13) /* save r13 */
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD)
.globl machine_check_fwnmi
.align 7
machine_check_fwnmi:
HMT_MEDIUM
- mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */
- EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
+ SET_SCRATCH0(r13) /* save r13 */
+ EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD)
#endif /* CONFIG_PPC_PSERIES */
@@ -282,7 +355,7 @@ slb_miss_user_pseries:
std r10,PACA_EXGEN+EX_R10(r13)
std r11,PACA_EXGEN+EX_R11(r13)
std r12,PACA_EXGEN+EX_R12(r13)
- mfspr r10,SPRG_SCRATCH0
+ GET_SCRATCH0(r10)
ld r11,PACA_EXSLB+EX_R9(r13)
ld r12,PACA_EXSLB+EX_R3(r13)
std r10,PACA_EXGEN+EX_R13(r13)
@@ -342,6 +415,8 @@ machine_check_common:
STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
+ STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
+ STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception)
STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
#ifdef CONFIG_ALTIVEC
@@ -386,9 +461,24 @@ bad_stack:
std r12,_XER(r1)
SAVE_GPR(0,r1)
SAVE_GPR(2,r1)
- SAVE_4GPRS(3,r1)
- SAVE_2GPRS(7,r1)
- SAVE_10GPRS(12,r1)
+ ld r10,EX_R3(r3)
+ std r10,GPR3(r1)
+ SAVE_GPR(4,r1)
+ SAVE_4GPRS(5,r1)
+ ld r9,EX_R9(r3)
+ ld r10,EX_R10(r3)
+ SAVE_2GPRS(9,r1)
+ ld r9,EX_R11(r3)
+ ld r10,EX_R12(r3)
+ ld r11,EX_R13(r3)
+ std r9,GPR11(r1)
+ std r10,GPR12(r1)
+ std r11,GPR13(r1)
+BEGIN_FTR_SECTION
+ ld r10,EX_CFAR(r3)
+ std r10,ORIG_GPR3(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ SAVE_8GPRS(14,r1)
SAVE_10GPRS(22,r1)
lhz r12,PACA_TRAP_SAVE(r13)
std r12,_TRAP(r1)
@@ -397,6 +487,9 @@ bad_stack:
li r12,0
std r12,0(r11)
ld r2,PACATOC(r13)
+ ld r11,exception_marker@toc(r2)
+ std r12,RESULT(r1)
+ std r11,STACK_FRAME_OVERHEAD-16(r1)
1: addi r3,r1,STACK_FRAME_OVERHEAD
bl .kernel_bad_stack
b 1b
@@ -419,6 +512,19 @@ data_access_common:
li r5,0x300
b .do_hash_page /* Try to handle as hpte fault */
+ .align 7
+ .globl h_data_storage_common
+h_data_storage_common:
+ mfspr r10,SPRN_HDAR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ mfspr r10,SPRN_HDSISR
+ stw r10,PACA_EXGEN+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
+ bl .save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unknown_exception
+ b .ret_from_except
+
.align 7
.globl instruction_access_common
instruction_access_common:
@@ -428,6 +534,8 @@ instruction_access_common:
li r5,0x400
b .do_hash_page /* Try to handle as hpte fault */
+ STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception)
+
/*
* Here is the common SLB miss user that is used when going to virtual
* mode for SLB misses, that is currently not used
@@ -750,7 +858,7 @@ _STATIC(do_hash_page)
BEGIN_FTR_SECTION
andis. r0,r4,0x0020 /* Is it a segment table fault? */
bne- do_ste_alloc /* If so handle it */
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
clrrdi r11,r1,THREAD_SHIFT
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index c5c24beb838..ba250d505e0 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -805,19 +805,6 @@ _ENTRY(copy_and_flush)
blr
#ifdef CONFIG_SMP
-#ifdef CONFIG_GEMINI
- .globl __secondary_start_gemini
-__secondary_start_gemini:
- mfspr r4,SPRN_HID0
- ori r4,r4,HID0_ICFI
- li r3,0
- ori r3,r3,HID0_ICE
- andc r4,r4,r3
- mtspr SPRN_HID0,r4
- sync
- b __secondary_start
-#endif /* CONFIG_GEMINI */
-
.globl __secondary_start_mpc86xx
__secondary_start_mpc86xx:
mfspr r3, SPRN_PIR
@@ -890,15 +877,6 @@ __secondary_start:
mtspr SPRN_SRR1,r4
SYNC
RFI
-
-_GLOBAL(start_secondary_resume)
- /* Reset stack */
- rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
- li r3,0
- std r3,0(r1) /* Zero the stack frame pointer */
- bl start_secondary
- b .
#endif /* CONFIG_SMP */
#ifdef CONFIG_KVM_BOOK3S_HANDLER
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 3a319f9c9d3..ba504099844 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -147,6 +147,8 @@ __secondary_hold:
mtctr r4
mr r3,r24
li r4,0
+ /* Make sure that patched code is visible */
+ isync
bctr
#else
BUG_OPCODE
@@ -216,19 +218,25 @@ generic_secondary_common_init:
*/
LOAD_REG_ADDR(r13, paca) /* Load paca pointer */
ld r13,0(r13) /* Get base vaddr of paca array */
+#ifndef CONFIG_SMP
+ addi r13,r13,PACA_SIZE /* know r13 if used accidentally */
+ b .kexec_wait /* wait for next kernel if !SMP */
+#else
+ LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */
+ lwz r7,0(r7) /* also the max paca allocated */
li r5,0 /* logical cpu id */
1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
cmpw r6,r24 /* Compare to our id */
beq 2f
addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */
addi r5,r5,1
- cmpwi r5,NR_CPUS
+ cmpw r5,r7 /* Check if more pacas exist */
blt 1b
mr r3,r24 /* not found, copy phys to r3 */
b .kexec_wait /* next kernel might do better */
-2: mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG */
+2: SET_PACA(r13)
#ifdef CONFIG_PPC_BOOK3E
addi r12,r13,PACA_EXTLB /* and TLB exc frame in another */
mtspr SPRN_SPRG_TLB_EXFRAME,r12
@@ -236,34 +244,39 @@ generic_secondary_common_init:
/* From now on, r24 is expected to be logical cpuid */
mr r24,r5
-3: HMT_LOW
- lbz r23,PACAPROCSTART(r13) /* Test if this processor should */
- /* start. */
-
-#ifndef CONFIG_SMP
- b 3b /* Never go on non-SMP */
-#else
- cmpwi 0,r23,0
- beq 3b /* Loop until told to go */
-
- sync /* order paca.run and cur_cpu_spec */
/* See if we need to call a cpu state restore handler */
LOAD_REG_ADDR(r23, cur_cpu_spec)
ld r23,0(r23)
ld r23,CPU_SPEC_RESTORE(r23)
cmpdi 0,r23,0
- beq 4f
+ beq 3f
ld r23,0(r23)
mtctr r23
bctrl
-4: /* Create a temp kernel stack for use before relocation is on. */
+3: LOAD_REG_ADDR(r3, boot_cpu_count) /* Decrement boot_cpu_count */
+ lwarx r4,0,r3
+ subi r4,r4,1
+ stwcx. r4,0,r3
+ bne 3b
+ isync
+
+4: HMT_LOW
+ lbz r23,PACAPROCSTART(r13) /* Test if this processor should */
+ /* start. */
+ cmpwi 0,r23,0
+ beq 4b /* Loop until told to go */
+
+ sync /* order paca.run and cur_cpu_spec */
+ isync /* In case code patching happened */
+
+ /* Create a temp kernel stack for use before relocation is on. */
ld r1,PACAEMERGSP(r13)
subi r1,r1,STACK_FRAME_OVERHEAD
b __secondary_start
-#endif
+#endif /* SMP */
/*
* Turn the MMU off.
@@ -534,7 +547,7 @@ _GLOBAL(pmac_secondary_start)
ld r4,0(r4) /* Get base vaddr of paca array */
mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
add r13,r13,r4 /* for this processor. */
- mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG*/
+ SET_PACA(r13) /* Save vaddr of paca in an SPRG*/
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
@@ -645,7 +658,7 @@ _GLOBAL(enable_64b_mode)
oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */
mtmsr r11
#else /* CONFIG_PPC_BOOK3E */
- li r12,(MSR_SF | MSR_ISF)@highest
+ li r12,(MSR_64BIT | MSR_ISF)@highest
sldi r12,r12,48
or r11,r11,r12
mtmsrd r11
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
new file mode 100644
index 00000000000..f8f0bc7f1d4
--- /dev/null
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -0,0 +1,97 @@
+/*
+ * This file contains the power_save function for 970-family CPUs.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ppc-opcode.h>
+
+#undef DEBUG
+
+ .text
+
+_GLOBAL(power7_idle)
+ /* Now check if user or arch enabled NAP mode */
+ LOAD_REG_ADDRBASE(r3,powersave_nap)
+ lwz r4,ADDROFF(powersave_nap)(r3)
+ cmpwi 0,r4,0
+ beqlr
+
+ /* NAP is a state loss, we create a regs frame on the
+ * stack, fill it up with the state we care about and
+ * stick a pointer to it in PACAR1. We really only
+ * need to save PC, some CR bits and the NV GPRs,
+ * but for now an interrupt frame will do.
+ */
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-INT_FRAME_SIZE(r1)
+ std r0,_LINK(r1)
+ std r0,_NIP(r1)
+
+#ifndef CONFIG_SMP
+ /* Make sure FPU, VSX etc... are flushed as we may lose
+ * state when going to nap mode
+ */
+ bl .discard_lazy_cpu_state
+#endif /* CONFIG_SMP */
+
+ /* Hard disable interrupts */
+ mfmsr r9
+ rldicl r9,r9,48,1
+ rotldi r9,r9,16
+ mtmsrd r9,1 /* hard-disable interrupts */
+ li r0,0
+ stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */
+ stb r0,PACAHARDIRQEN(r13)
+
+ /* Continue saving state */
+ SAVE_GPR(2, r1)
+ SAVE_NVGPRS(r1)
+ mfcr r3
+ std r3,_CCR(r1)
+ std r9,_MSR(r1)
+ std r1,PACAR1(r13)
+
+ /* Magic NAP mode enter sequence */
+ std r0,0(r1)
+ ptesync
+ ld r0,0(r1)
+1: cmp cr0,r0,r0
+ bne 1b
+ PPC_NAP
+ b .
+
+_GLOBAL(power7_wakeup_loss)
+ GET_PACA(r13)
+ ld r1,PACAR1(r13)
+ REST_NVGPRS(r1)
+ REST_GPR(2, r1)
+ ld r3,_CCR(r1)
+ ld r4,_MSR(r1)
+ ld r5,_NIP(r1)
+ addi r1,r1,INT_FRAME_SIZE
+ mtcr r3
+ mtspr SPRN_SRR1,r4
+ mtspr SPRN_SRR0,r5
+ rfid
+
+_GLOBAL(power7_wakeup_noloss)
+ GET_PACA(r13)
+ ld r1,PACAR1(r13)
+ ld r4,_MSR(r1)
+ ld r5,_NIP(r1)
+ addi r1,r1,INT_FRAME_SIZE
+ mtspr SPRN_SRR1,r4
+ mtspr SPRN_SRR0,r5
+ rfid
diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 5c1118e3194..ffafaea3d26 100644
--- a/arch/powerpc/platforms/cell/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -17,8 +17,7 @@
#include <asm/machdep.h>
#include <asm/pgtable.h>
#include <asm/ppc-pci.h>
-
-#include "io-workarounds.h"
+#include <asm/io-workarounds.h>
#define IOWA_MAX_BUS 8
@@ -145,7 +144,19 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
return res;
}
-/* Regist new bus to support workaround */
+/* Enable IO workaround */
+static void __devinit io_workaround_init(void)
+{
+ static int io_workaround_inited;
+
+ if (io_workaround_inited)
+ return;
+ ppc_pci_io = iowa_pci_io;
+ ppc_md.ioremap = iowa_ioremap;
+ io_workaround_inited = 1;
+}
+
+/* Register new bus to support workaround */
void __devinit iowa_register_bus(struct pci_controller *phb,
struct ppc_pci_io *ops,
int (*initfunc)(struct iowa_bus *, void *), void *data)
@@ -153,6 +164,8 @@ void __devinit iowa_register_bus(struct pci_controller *phb,
struct iowa_bus *bus;
struct device_node *np = phb->dn;
+ io_workaround_init();
+
if (iowa_bus_count >= IOWA_MAX_BUS) {
pr_err("IOWA:Too many pci bridges, "
"workarounds disabled for %s\n", np->full_name);
@@ -162,6 +175,7 @@ void __devinit iowa_register_bus(struct pci_controller *phb,
bus = &iowa_busses[iowa_bus_count];
bus->phb = phb;
bus->ops = ops;
+ bus->private = data;
if (initfunc)
if ((*initfunc)(bus, data))
@@ -172,14 +186,3 @@ void __devinit iowa_register_bus(struct pci_controller *phb,
pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
}
-/* enable IO workaround */
-void __devinit io_workaround_init(void)
-{
- static int io_workaround_inited;
-
- if (io_workaround_inited)
- return;
- ppc_pci_io = iowa_pci_io;
- ppc_md.ioremap = iowa_ioremap;
- io_workaround_inited = 1;
-}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index f621b7d2d86..a24d37d4cf5 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -66,7 +66,6 @@
#include <asm/ptrace.h>
#include <asm/machdep.h>
#include <asm/udbg.h>
-#include <asm/dbell.h>
#include <asm/smp.h>
#ifdef CONFIG_PPC64
@@ -160,7 +159,8 @@ notrace void arch_local_irq_restore(unsigned long en)
#if defined(CONFIG_BOOKE) && defined(CONFIG_SMP)
/* Check for pending doorbell interrupts and resend to ourself */
- doorbell_check_self();
+ if (cpu_has_feature(CPU_FTR_DBELL))
+ smp_muxed_ipi_resend();
#endif
/*
@@ -397,24 +397,28 @@ struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly;
void exc_lvl_ctx_init(void)
{
struct thread_info *tp;
- int i, hw_cpu;
+ int i, cpu_nr;
for_each_possible_cpu(i) {
- hw_cpu = get_hard_smp_processor_id(i);
- memset((void *)critirq_ctx[hw_cpu], 0, THREAD_SIZE);
- tp = critirq_ctx[hw_cpu];
- tp->cpu = i;
+#ifdef CONFIG_PPC64
+ cpu_nr = i;
+#else
+ cpu_nr = get_hard_smp_processor_id(i);
+#endif
+ memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);
+ tp = critirq_ctx[cpu_nr];
+ tp->cpu = cpu_nr;
tp->preempt_count = 0;
#ifdef CONFIG_BOOKE
- memset((void *)dbgirq_ctx[hw_cpu], 0, THREAD_SIZE);
- tp = dbgirq_ctx[hw_cpu];
- tp->cpu = i;
+ memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE);
+ tp = dbgirq_ctx[cpu_nr];
+ tp->cpu = cpu_nr;
tp->preempt_count = 0;
- memset((void *)mcheckirq_ctx[hw_cpu], 0, THREAD_SIZE);
- tp = mcheckirq_ctx[hw_cpu];
- tp->cpu = i;
+ memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE);
+ tp = mcheckirq_ctx[cpu_nr];
+ tp->cpu = cpu_nr;
tp->preempt_count = HARDIRQ_OFFSET;
#endif
}
@@ -477,20 +481,41 @@ void do_softirq(void)
* IRQ controller and virtual interrupts
*/
+/* The main irq map itself is an array of NR_IRQ entries containing the
+ * associate host and irq number. An entry with a host of NULL is free.
+ * An entry can be allocated if it's free, the allocator always then sets
+ * hwirq first to the host's invalid irq number and then fills ops.
+ */
+struct irq_map_entry {
+ irq_hw_number_t hwirq;
+ struct irq_host *host;
+};
+
static LIST_HEAD(irq_hosts);
static DEFINE_RAW_SPINLOCK(irq_big_lock);
-static unsigned int revmap_trees_allocated;
static DEFINE_MUTEX(revmap_trees_mutex);
-struct irq_map_entry irq_map[NR_IRQS];
+static struct irq_map_entry irq_map[NR_IRQS];
static unsigned int irq_virq_count = NR_IRQS;
static struct irq_host *irq_default_host;
+irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
+{
+ return irq_map[d->irq].hwirq;
+}
+EXPORT_SYMBOL_GPL(irqd_to_hwirq);
+
irq_hw_number_t virq_to_hw(unsigned int virq)
{
return irq_map[virq].hwirq;
}
EXPORT_SYMBOL_GPL(virq_to_hw);
+bool virq_is_host(unsigned int virq, struct irq_host *host)
+{
+ return irq_map[virq].host == host;
+}
+EXPORT_SYMBOL_GPL(virq_is_host);
+
static int default_irq_host_match(struct irq_host *h, struct device_node *np)
{
return h->of_node != NULL && h->of_node == np;
@@ -511,7 +536,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
/* Allocate structure and revmap table if using linear mapping */
if (revmap_type == IRQ_HOST_MAP_LINEAR)
size += revmap_arg * sizeof(unsigned int);
- host = zalloc_maybe_bootmem(size, GFP_KERNEL);
+ host = kzalloc(size, GFP_KERNEL);
if (host == NULL)
return NULL;
@@ -561,14 +586,14 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
irq_map[i].host = host;
smp_wmb();
- /* Clear norequest flags */
- irq_clear_status_flags(i, IRQ_NOREQUEST);
-
/* Legacy flags are left to default at this point,
* one can then use irq_create_mapping() to
* explicitly change them
*/
ops->map(host, i, i);
+
+ /* Clear norequest flags */
+ irq_clear_status_flags(i, IRQ_NOREQUEST);
}
break;
case IRQ_HOST_MAP_LINEAR:
@@ -579,6 +604,9 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
smp_wmb();
host->revmap_data.linear.revmap = rmap;
break;
+ case IRQ_HOST_MAP_TREE:
+ INIT_RADIX_TREE(&host->revmap_data.tree, GFP_KERNEL);
+ break;
default:
break;
}
@@ -636,8 +664,6 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq,
goto error;
}
- irq_clear_status_flags(virq, IRQ_NOREQUEST);
-
/* map it */
smp_wmb();
irq_map[virq].hwirq = hwirq;
@@ -648,6 +674,8 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq,
goto errdesc;
}
+ irq_clear_status_flags(virq, IRQ_NOREQUEST);
+
return 0;
errdesc:
@@ -704,8 +732,6 @@ unsigned int irq_create_mapping(struct irq_host *host,
*/
virq = irq_find_mapping(host, hwirq);
if (virq != NO_IRQ) {
- if (host->ops->remap)
- host->ops->remap(host, virq, hwirq);
pr_debug("irq: -> existing mapping on virq %d\n", virq);
return virq;
}
@@ -786,14 +812,15 @@ void irq_dispose_mapping(unsigned int virq)
return;
host = irq_map[virq].host;
- WARN_ON (host == NULL);
- if (host == NULL)
+ if (WARN_ON(host == NULL))
return;
/* Never unmap legacy interrupts */
if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
return;
+ irq_set_status_flags(virq, IRQ_NOREQUEST);
+
/* remove chip and handler */
irq_set_chip_and_handler(virq, NULL, NULL);
@@ -813,13 +840,6 @@ void irq_dispose_mapping(unsigned int virq)
host->revmap_data.linear.revmap[hwirq] = NO_IRQ;
break;
case IRQ_HOST_MAP_TREE:
- /*
- * Check if radix tree allocated yet, if not then nothing to
- * remove.
- */
- smp_rmb();
- if (revmap_trees_allocated < 1)
- break;
mutex_lock(&revmap_trees_mutex);
radix_tree_delete(&host->revmap_data.tree, hwirq);
mutex_unlock(&revmap_trees_mutex);
@@ -830,8 +850,6 @@ void irq_dispose_mapping(unsigned int virq)
smp_mb();
irq_map[virq].hwirq = host->inval_irq;
- irq_set_status_flags(virq, IRQ_NOREQUEST);
-
irq_free_descs(virq, 1);
/* Free it */
irq_free_virt(virq, 1);
@@ -877,16 +895,9 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host,
struct irq_map_entry *ptr;
unsigned int virq;
- WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
-
- /*
- * Check if the radix tree exists and has bee initialized.
- * If not, we fallback to slow mode
- */
- if (revmap_trees_allocated < 2)
+ if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_TREE))
return irq_find_mapping(host, hwirq);
- /* Now try to resolve */
/*
* No rcu_read_lock(ing) needed, the ptr returned can't go under us
* as it's referencing an entry in the static irq_map table.
@@ -909,16 +920,7 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host,
void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
irq_hw_number_t hwirq)
{
-
- WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
-
- /*
- * Check if the radix tree exists yet.
- * If not, then the irq will be inserted into the tree when it gets
- * initialized.
- */
- smp_rmb();
- if (revmap_trees_allocated < 1)
+ if (WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE))
return;
if (virq != NO_IRQ) {
@@ -934,7 +936,8 @@ unsigned int irq_linear_revmap(struct irq_host *host,
{
unsigned int *revmap;
- WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR);
+ if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_LINEAR))
+ return irq_find_mapping(host, hwirq);
/* Check revmap bounds */
if (unlikely(hwirq >= host->revmap_data.linear.size))
@@ -1028,53 +1031,6 @@ int arch_early_irq_init(void)
return 0;
}
-/* We need to create the radix trees late */
-static int irq_late_init(void)
-{
- struct irq_host *h;
- unsigned int i;
-
- /*
- * No mutual exclusion with respect to accessors of the tree is needed
- * here as the synchronization is done via the state variable
- * revmap_trees_allocated.
- */
- list_for_each_entry(h, &irq_hosts, link) {
- if (h->revmap_type == IRQ_HOST_MAP_TREE)
- INIT_RADIX_TREE(&h->revmap_data.tree, GFP_KERNEL);
- }
-
- /*
- * Make sure the radix trees inits are visible before setting
- * the flag
- */
- smp_wmb();
- revmap_trees_allocated = 1;
-
- /*
- * Insert the reverse mapping for those interrupts already present
- * in irq_map[].
- */
- mutex_lock(&revmap_trees_mutex);
- for (i = 0; i < irq_virq_count; i++) {
- if (irq_map[i].host &&
- (irq_map[i].host->revmap_type == IRQ_HOST_MAP_TREE))
- radix_tree_insert(&irq_map[i].host->revmap_data.tree,
- irq_map[i].hwirq, &irq_map[i]);
- }
- mutex_unlock(&revmap_trees_mutex);
-
- /*
- * Make sure the radix trees insertions are visible before setting
- * the flag
- */
- smp_wmb();
- revmap_trees_allocated = 2;
-
- return 0;
-}
-arch_initcall(irq_late_init);
-
#ifdef CONFIG_VIRQ_DEBUG
static int virq_debug_show(struct seq_file *m, void *private)
{
@@ -1082,10 +1038,11 @@ static int virq_debug_show(struct seq_file *m, void *private)
struct irq_desc *desc;
const char *p;
static const char none[] = "none";
+ void *data;
int i;
- seq_printf(m, "%-5s %-7s %-15s %s\n", "virq", "hwirq",
- "chip name", "host name");
+ seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq",
+ "chip name", "chip data", "host name");
for (i = 1; i < nr_irqs; i++) {
desc = irq_to_desc(i);
@@ -1098,7 +1055,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
struct irq_chip *chip;
seq_printf(m, "%5d ", i);
- seq_printf(m, "0x%05lx ", virq_to_hw(i));
+ seq_printf(m, "0x%05lx ", irq_map[i].hwirq);
chip = irq_desc_get_chip(desc);
if (chip && chip->name)
@@ -1107,6 +1064,9 @@ static int virq_debug_show(struct seq_file *m, void *private)
p = none;
seq_printf(m, "%-15s ", p);
+ data = irq_desc_get_chip_data(desc);
+ seq_printf(m, "0x%16p ", data);
+
if (irq_map[i].host && irq_map[i].host->of_node)
p = irq_map[i].host->of_node->full_name;
else
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 42850ee00ad..bd9d35f59cf 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -109,7 +109,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs)
#ifdef CONFIG_SMP
void kgdb_roundup_cpus(unsigned long flags)
{
- smp_send_debugger_break(MSG_ALL_BUT_SELF);
+ smp_send_debugger_break();
}
#endif
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 301db65f05a..84daabe2fcb 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -132,34 +132,6 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v)
/*
* Methods used to fetch LPAR data when running on a pSeries platform.
*/
-/**
- * h_get_mpp
- * H_GET_MPP hcall returns info in 7 parms
- */
-int h_get_mpp(struct hvcall_mpp_data *mpp_data)
-{
- int rc;
- unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
-
- rc = plpar_hcall9(H_GET_MPP, retbuf);
-
- mpp_data->entitled_mem = retbuf[0];
- mpp_data->mapped_mem = retbuf[1];
-
- mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
- mpp_data->pool_num = retbuf[2] & 0xffff;
-
- mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
- mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
- mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
-
- mpp_data->pool_size = retbuf[4];
- mpp_data->loan_request = retbuf[5];
- mpp_data->backing_mem = retbuf[6];
-
- return rc;
-}
-EXPORT_SYMBOL(h_get_mpp);
struct hvcall_ppp_data {
u64 entitlement;
@@ -345,6 +317,30 @@ static void parse_mpp_data(struct seq_file *m)
seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
}
+/**
+ * parse_mpp_x_data
+ * Parse out data returned from h_get_mpp_x
+ */
+static void parse_mpp_x_data(struct seq_file *m)
+{
+ struct hvcall_mpp_x_data mpp_x_data;
+
+ if (!firmware_has_feature(FW_FEATURE_XCMO))
+ return;
+ if (h_get_mpp_x(&mpp_x_data))
+ return;
+
+ seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes);
+
+ if (mpp_x_data.pool_coalesced_bytes)
+ seq_printf(m, "pool_coalesced_bytes=%ld\n",
+ mpp_x_data.pool_coalesced_bytes);
+ if (mpp_x_data.pool_purr_cycles)
+ seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles);
+ if (mpp_x_data.pool_spurr_cycles)
+ seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles);
+}
+
#define SPLPAR_CHARACTERISTICS_TOKEN 20
#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
@@ -520,6 +516,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
parse_system_parameter_string(m);
parse_ppp_data(m);
parse_mpp_data(m);
+ parse_mpp_x_data(m);
pseries_cmo_data(m);
splpar_dispatch_data(m);
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 094bd9821ad..998a1002860 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -694,6 +694,17 @@ _GLOBAL(kernel_thread)
addi r1,r1,16
blr
+#ifdef CONFIG_SMP
+_GLOBAL(start_secondary_resume)
+ /* Reset stack */
+ rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ li r3,0
+ stw r3,0(r1) /* Zero the stack frame pointer */
+ bl start_secondary
+ b .
+#endif /* CONFIG_SMP */
+
/*
* This routine is just here to keep GCC happy - sigh...
*/
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 206a321a71d..e89df59cdc5 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -462,7 +462,8 @@ _GLOBAL(disable_kernel_fp)
* wait for the flag to change, indicating this kernel is going away but
* the slave code for the next one is at addresses 0 to 100.
*
- * This is used by all slaves.
+ * This is used by all slaves, even those that did not find a matching
+ * paca in the secondary startup code.
*
* Physical (hardware) cpu id should be in r3.
*/
@@ -471,10 +472,6 @@ _GLOBAL(kexec_wait)
1: mflr r5
addi r5,r5,kexec_flag-1b
- li r4,KEXEC_STATE_REAL_MODE
- stb r4,PACAKEXECSTATE(r13)
- SYNC
-
99: HMT_LOW
#ifdef CONFIG_KEXEC /* use no memory without kexec */
lwz r4,0(r5)
@@ -499,11 +496,17 @@ kexec_flag:
*
* get phys id from paca
* switch to real mode
+ * mark the paca as no longer used
* join other cpus in kexec_wait(phys_id)
*/
_GLOBAL(kexec_smp_wait)
lhz r3,PACAHWCPUID(r13)
bl real_mode
+
+ li r4,KEXEC_STATE_REAL_MODE
+ stb r4,PACAKEXECSTATE(r13)
+ SYNC
+
b .kexec_wait
/*
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 10f0aadee95..efeb8818418 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -7,7 +7,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/threads.h>
+#include <linux/smp.h>
#include <linux/module.h>
#include <linux/memblock.h>
@@ -156,18 +156,29 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
/* Put the paca pointer into r13 and SPRG_PACA */
void setup_paca(struct paca_struct *new_paca)
{
+ /* Setup r13 */
local_paca = new_paca;
- mtspr(SPRN_SPRG_PACA, local_paca);
+
#ifdef CONFIG_PPC_BOOK3E
+ /* On Book3E, initialize the TLB miss exception frames */
mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
+#else
+ /* In HV mode, we setup both HPACA and PACA to avoid problems
+ * if we do a GET_PACA() before the feature fixups have been
+ * applied
+ */
+ if (cpu_has_feature(CPU_FTR_HVMODE_206))
+ mtspr(SPRN_SPRG_HPACA, local_paca);
#endif
+ mtspr(SPRN_SPRG_PACA, local_paca);
+
}
static int __initdata paca_size;
void __init allocate_pacas(void)
{
- int nr_cpus, cpu, limit;
+ int cpu, limit;
/*
* We can't take SLB misses on the paca, and we want to access them
@@ -179,23 +190,18 @@ void __init allocate_pacas(void)
if (firmware_has_feature(FW_FEATURE_ISERIES))
limit = min(limit, HvPagesToMap * HVPAGESIZE);
- nr_cpus = NR_CPUS;
- /* On iSeries we know we can never have more than 64 cpus */
- if (firmware_has_feature(FW_FEATURE_ISERIES))
- nr_cpus = min(64, nr_cpus);
-
- paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus);
+ paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
memset(paca, 0, paca_size);
printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n",
- paca_size, nr_cpus, paca);
+ paca_size, nr_cpu_ids, paca);
- allocate_lppacas(nr_cpus, limit);
+ allocate_lppacas(nr_cpu_ids, limit);
/* Can't use for_each_*_cpu, as they aren't functional yet */
- for (cpu = 0; cpu < nr_cpus; cpu++)
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
initialise_paca(&paca[cpu], cpu);
}
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index d225d99fe39..6baabc13306 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -43,10 +43,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
const u32 *regs;
struct pci_dn *pdn;
- pdn = alloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
+ pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
if (pdn == NULL)
return NULL;
- memset(pdn, 0, sizeof(*pdn));
dn->data = pdn;
pdn->node = dn;
pdn->phb = phb;
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index ef3ef566235..7d28f540200 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -54,7 +54,6 @@ extern void single_step_exception(struct pt_regs *regs);
extern int sys_sigreturn(struct pt_regs *regs);
EXPORT_SYMBOL(clear_pages);
-EXPORT_SYMBOL(copy_page);
EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
EXPORT_SYMBOL(DMA_MODE_READ);
EXPORT_SYMBOL(DMA_MODE_WRITE);
@@ -88,9 +87,7 @@ EXPORT_SYMBOL(__copy_tofrom_user);
EXPORT_SYMBOL(__clear_user);
EXPORT_SYMBOL(__strncpy_from_user);
EXPORT_SYMBOL(__strnlen_user);
-#ifdef CONFIG_PPC64
-EXPORT_SYMBOL(copy_4K_page);
-#endif
+EXPORT_SYMBOL(copy_page);
#if defined(CONFIG_PCI) && defined(CONFIG_PPC32)
EXPORT_SYMBOL(isa_io_base);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index f74f355a961..095043d7994 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -702,6 +702,8 @@ void prepare_to_copy(struct task_struct *tsk)
/*
* Copy a thread..
*/
+extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
+
int copy_thread(unsigned long clone_flags, unsigned long usp,
unsigned long unused, struct task_struct *p,
struct pt_regs *regs)
@@ -755,11 +757,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
_ALIGN_UP(sizeof(struct thread_info), 16);
#ifdef CONFIG_PPC_STD_MMU_64
- if (cpu_has_feature(CPU_FTR_SLB)) {
+ if (mmu_has_feature(MMU_FTR_SLB)) {
unsigned long sp_vsid;
unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
- if (cpu_has_feature(CPU_FTR_1T_SEGMENT))
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
<< SLB_VSID_SHIFT_1T;
else
@@ -769,6 +771,20 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
p->thread.ksp_vsid = sp_vsid;
}
#endif /* CONFIG_PPC_STD_MMU_64 */
+#ifdef CONFIG_PPC64
+ if (cpu_has_feature(CPU_FTR_DSCR)) {
+ if (current->thread.dscr_inherit) {
+ p->thread.dscr_inherit = 1;
+ p->thread.dscr = current->thread.dscr;
+ } else if (0 != dscr_default) {
+ p->thread.dscr_inherit = 1;
+ p->thread.dscr = dscr_default;
+ } else {
+ p->thread.dscr_inherit = 0;
+ p->thread.dscr = 0;
+ }
+ }
+#endif
/*
* The PPC64 ABI makes use of a TOC to contain function
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index e74fa12afc8..48aeb55faae 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -68,6 +68,7 @@ int __initdata iommu_force_on;
unsigned long tce_alloc_start, tce_alloc_end;
u64 ppc64_rma_size;
#endif
+static phys_addr_t first_memblock_size;
static int __init early_parse_mem(char *p)
{
@@ -123,18 +124,19 @@ static void __init move_device_tree(void)
*/
static struct ibm_pa_feature {
unsigned long cpu_features; /* CPU_FTR_xxx bit */
+ unsigned long mmu_features; /* MMU_FTR_xxx bit */
unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */
unsigned char pabyte; /* byte number in ibm,pa-features */
unsigned char pabit; /* bit number (big-endian) */
unsigned char invert; /* if 1, pa bit set => clear feature */
} ibm_pa_features[] __initdata = {
- {0, PPC_FEATURE_HAS_MMU, 0, 0, 0},
- {0, PPC_FEATURE_HAS_FPU, 0, 1, 0},
- {CPU_FTR_SLB, 0, 0, 2, 0},
- {CPU_FTR_CTRL, 0, 0, 3, 0},
- {CPU_FTR_NOEXECUTE, 0, 0, 6, 0},
- {CPU_FTR_NODSISRALIGN, 0, 1, 1, 1},
- {CPU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0},
+ {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0},
+ {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0},
+ {0, MMU_FTR_SLB, 0, 0, 2, 0},
+ {CPU_FTR_CTRL, 0, 0, 0, 3, 0},
+ {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0},
+ {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1},
+ {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0},
{CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},
};
@@ -166,9 +168,11 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs,
if (bit ^ fp->invert) {
cur_cpu_spec->cpu_features |= fp->cpu_features;
cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs;
+ cur_cpu_spec->mmu_features |= fp->mmu_features;
} else {
cur_cpu_spec->cpu_features &= ~fp->cpu_features;
cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs;
+ cur_cpu_spec->mmu_features &= ~fp->mmu_features;
}
}
}
@@ -268,13 +272,13 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
const char *uname, int depth,
void *data)
{
- static int logical_cpuid = 0;
char *type = of_get_flat_dt_prop(node, "device_type", NULL);
const u32 *prop;
const u32 *intserv;
int i, nthreads;
unsigned long len;
- int found = 0;
+ int found = -1;
+ int found_thread = 0;
/* We are scanning "cpu" nodes only */
if (type == NULL || strcmp(type, "cpu") != 0)
@@ -298,11 +302,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* version 2 of the kexec param format adds the phys cpuid of
* booted proc.
*/
- if (initial_boot_params && initial_boot_params->version >= 2) {
- if (intserv[i] ==
- initial_boot_params->boot_cpuid_phys) {
- found = 1;
- break;
+ if (initial_boot_params->version >= 2) {
+ if (intserv[i] == initial_boot_params->boot_cpuid_phys) {
+ found = boot_cpu_count;
+ found_thread = i;
}
} else {
/*
@@ -311,23 +314,20 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* off secondary threads.
*/
if (of_get_flat_dt_prop(node,
- "linux,boot-cpu", NULL) != NULL) {
- found = 1;
- break;
- }
+ "linux,boot-cpu", NULL) != NULL)
+ found = boot_cpu_count;
}
-
#ifdef CONFIG_SMP
/* logical cpu id is always 0 on UP kernels */
- logical_cpuid++;
+ boot_cpu_count++;
#endif
}
- if (found) {
- DBG("boot cpu: logical %d physical %d\n", logical_cpuid,
- intserv[i]);
- boot_cpuid = logical_cpuid;
- set_hard_smp_processor_id(boot_cpuid, intserv[i]);
+ if (found >= 0) {
+ DBG("boot cpu: logical %d physical %d\n", found,
+ intserv[found_thread]);
+ boot_cpuid = found;
+ set_hard_smp_processor_id(found, intserv[found_thread]);
/*
* PAPR defines "logical" PVR values for cpus that
@@ -509,11 +509,14 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
size = 0x80000000ul - base;
}
#endif
-
- /* First MEMBLOCK added, do some special initializations */
- if (memstart_addr == ~(phys_addr_t)0)
- setup_initial_memory_limit(base, size);
- memstart_addr = min((u64)memstart_addr, base);
+ /* Keep track of the beginning of memory -and- the size of
+ * the very first block in the device-tree as it represents
+ * the RMA on ppc64 server
+ */
+ if (base < memstart_addr) {
+ memstart_addr = base;
+ first_memblock_size = size;
+ }
/* Add the chunk to the MEMBLOCK list */
memblock_add(base, size);
@@ -698,6 +701,7 @@ void __init early_init_devtree(void *params)
of_scan_flat_dt(early_init_dt_scan_root, NULL);
of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+ setup_initial_memory_limit(memstart_addr, first_memblock_size);
/* Save command line for /proc/cmdline and then parse parameters */
strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 941ff4dbc56..c016033ba78 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -335,6 +335,7 @@ static void __init prom_printf(const char *format, ...)
const char *p, *q, *s;
va_list args;
unsigned long v;
+ long vs;
struct prom_t *_prom = &RELOC(prom);
va_start(args, format);
@@ -368,12 +369,35 @@ static void __init prom_printf(const char *format, ...)
v = va_arg(args, unsigned long);
prom_print_hex(v);
break;
+ case 'd':
+ ++q;
+ vs = va_arg(args, int);
+ if (vs < 0) {
+ prom_print(RELOC("-"));
+ vs = -vs;
+ }
+ prom_print_dec(vs);
+ break;
case 'l':
++q;
- if (*q == 'u') { /* '%lu' */
+ if (*q == 0)
+ break;
+ else if (*q == 'x') {
+ ++q;
+ v = va_arg(args, unsigned long);
+ prom_print_hex(v);
+ } else if (*q == 'u') { /* '%lu' */
++q;
v = va_arg(args, unsigned long);
prom_print_dec(v);
+ } else if (*q == 'd') { /* %ld */
+ ++q;
+ vs = va_arg(args, long);
+ if (vs < 0) {
+ prom_print(RELOC("-"));
+ vs = -vs;
+ }
+ prom_print_dec(vs);
}
break;
}
@@ -676,8 +700,10 @@ static void __init early_cmdline_parse(void)
#endif /* CONFIG_PCI_MSI */
#ifdef CONFIG_PPC_SMLPAR
#define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */
+#define OV5_XCMO 0x40 /* Page Coalescing */
#else
#define OV5_CMO 0x00
+#define OV5_XCMO 0x00
#endif
#define OV5_TYPE1_AFFINITY 0x80 /* Type 1 NUMA affinity */
@@ -732,7 +758,7 @@ static unsigned char ibm_architecture_vec[] = {
OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
OV5_DONATE_DEDICATE_CPU | OV5_MSI,
0,
- OV5_CMO,
+ OV5_CMO | OV5_XCMO,
OV5_TYPE1_AFFINITY,
0,
0,
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 2097f2b3cba..271ff6318ed 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -42,6 +42,7 @@
#include <asm/time.h>
#include <asm/mmu.h>
#include <asm/topology.h>
+#include <asm/pSeries_reconfig.h>
struct rtas_t rtas = {
.lock = __ARCH_SPIN_LOCK_UNLOCKED
@@ -494,7 +495,7 @@ unsigned int rtas_busy_delay(int status)
might_sleep();
ms = rtas_busy_delay_time(status);
- if (ms)
+ if (ms && need_resched())
msleep(ms);
return ms;
@@ -731,6 +732,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w
atomic_set(&data->error, rc);
start_topology_update();
+ pSeries_coalesce_init();
if (wake_when_done) {
atomic_set(&data->done, 1);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 21f30cb6807..79fca2651b6 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -381,7 +381,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
int i;
threads_per_core = tpc;
- threads_core_mask = CPU_MASK_NONE;
+ cpumask_clear(&threads_core_mask);
/* This implementation only supports power of 2 number of threads
* for simplicity and performance
@@ -390,7 +390,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
BUG_ON(tpc != (1 << threads_shift));
for (i = 0; i < tpc; i++)
- cpu_set(i, threads_core_mask);
+ cpumask_set_cpu(i, &threads_core_mask);
printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n",
tpc, tpc > 1 ? "s" : "");
@@ -404,7 +404,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
* cpu_present_mask
*
* Having the possible map set up early allows us to restrict allocations
- * of things like irqstacks to num_possible_cpus() rather than NR_CPUS.
+ * of things like irqstacks to nr_cpu_ids rather than NR_CPUS.
*
* We do not initialize the online map here; cpus set their own bits in
* cpu_online_mask as they come up.
@@ -424,7 +424,7 @@ void __init smp_setup_cpu_maps(void)
DBG("smp_setup_cpu_maps()\n");
- while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) {
+ while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) {
const int *intserv;
int j, len;
@@ -443,7 +443,7 @@ void __init smp_setup_cpu_maps(void)
intserv = &cpu; /* assume logical == phys */
}
- for (j = 0; j < nthreads && cpu < NR_CPUS; j++) {
+ for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
DBG(" thread %d -> cpu %d (hard id %d)\n",
j, cpu, intserv[j]);
set_cpu_present(cpu, true);
@@ -483,12 +483,12 @@ void __init smp_setup_cpu_maps(void)
if (cpu_has_feature(CPU_FTR_SMT))
maxcpus *= nthreads;
- if (maxcpus > NR_CPUS) {
+ if (maxcpus > nr_cpu_ids) {
printk(KERN_WARNING
"Partition configured for %d cpus, "
"operating system maximum is %d.\n",
- maxcpus, NR_CPUS);
- maxcpus = NR_CPUS;
+ maxcpus, nr_cpu_ids);
+ maxcpus = nr_cpu_ids;
} else
printk(KERN_INFO "Partition configured for %d cpus.\n",
maxcpus);
@@ -510,7 +510,7 @@ void __init smp_setup_cpu_maps(void)
cpu_init_thread_core_maps(nthreads);
/* Now that possible cpus are set, set nr_cpu_ids for later use */
- nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
+ setup_nr_cpu_ids();
free_unused_pacas();
}
@@ -602,6 +602,10 @@ int check_legacy_ioport(unsigned long base_port)
* name instead */
if (!np)
np = of_find_node_by_name(NULL, "8042");
+ if (np) {
+ of_i8042_kbd_irq = 1;
+ of_i8042_aux_irq = 12;
+ }
break;
case FDC_BASE: /* FDC1 */
np = of_find_node_by_type(NULL, "fdc");
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 1d2fbc90530..620d792b52e 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -48,6 +48,7 @@ extern void bootx_init(unsigned long r4, unsigned long phys);
int boot_cpuid = -1;
EXPORT_SYMBOL_GPL(boot_cpuid);
+int __initdata boot_cpu_count;
int boot_cpuid_phys;
int smp_hw_index[NR_CPUS];
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5a0401fcaeb..a88bf2713d4 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -62,6 +62,7 @@
#include <asm/udbg.h>
#include <asm/kexec.h>
#include <asm/mmu_context.h>
+#include <asm/code-patching.h>
#include "setup.h"
@@ -72,6 +73,7 @@
#endif
int boot_cpuid = 0;
+int __initdata boot_cpu_count;
u64 ppc64_pft_size;
/* Pick defaults since we might want to patch instructions
@@ -233,6 +235,7 @@ void early_setup_secondary(void)
void smp_release_cpus(void)
{
unsigned long *ptr;
+ int i;
DBG(" -> smp_release_cpus()\n");
@@ -245,7 +248,16 @@ void smp_release_cpus(void)
ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
- PHYSICAL_START);
*ptr = __pa(generic_secondary_smp_init);
- mb();
+
+ /* And wait a bit for them to catch up */
+ for (i = 0; i < 100000; i++) {
+ mb();
+ HMT_low();
+ if (boot_cpu_count == 0)
+ break;
+ udelay(1);
+ }
+ DBG("boot_cpu_count = %d\n", boot_cpu_count);
DBG(" <- smp_release_cpus()\n");
}
@@ -423,17 +435,30 @@ void __init setup_system(void)
DBG(" <- setup_system()\n");
}
-static u64 slb0_limit(void)
+/* This returns the limit below which memory accesses to the linear
+ * mapping are guarnateed not to cause a TLB or SLB miss. This is
+ * used to allocate interrupt or emergency stacks for which our
+ * exception entry path doesn't deal with being interrupted.
+ */
+static u64 safe_stack_limit(void)
{
- if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) {
+#ifdef CONFIG_PPC_BOOK3E
+ /* Freescale BookE bolts the entire linear mapping */
+ if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+ return linear_map_top;
+ /* Other BookE, we assume the first GB is bolted */
+ return 1ul << 30;
+#else
+ /* BookS, the first segment is bolted */
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
return 1UL << SID_SHIFT_1T;
- }
return 1UL << SID_SHIFT;
+#endif
}
static void __init irqstack_early_init(void)
{
- u64 limit = slb0_limit();
+ u64 limit = safe_stack_limit();
unsigned int i;
/*
@@ -453,6 +478,9 @@ static void __init irqstack_early_init(void)
#ifdef CONFIG_PPC_BOOK3E
static void __init exc_lvl_early_init(void)
{
+ extern unsigned int interrupt_base_book3e;
+ extern unsigned int exc_debug_debug_book3e;
+
unsigned int i;
for_each_possible_cpu(i) {
@@ -463,6 +491,10 @@ static void __init exc_lvl_early_init(void)
mcheckirq_ctx[i] = (struct thread_info *)
__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
}
+
+ if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
+ patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1,
+ (unsigned long)&exc_debug_debug_book3e, 0);
}
#else
#define exc_lvl_early_init()
@@ -486,7 +518,7 @@ static void __init emergency_stack_init(void)
* bringup, we need to get at them in real mode. This means they
* must also be within the RMO region.
*/
- limit = min(slb0_limit(), ppc64_rma_size);
+ limit = min(safe_stack_limit(), ppc64_rma_size);
for_each_possible_cpu(i) {
unsigned long sp;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 27c4a4584f8..da989fff19c 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -381,7 +381,7 @@ badframe:
regs, uc, &uc->uc_mcontext);
#endif
if (show_unhandled_signals && printk_ratelimit())
- printk(regs->msr & MSR_SF ? fmt64 : fmt32,
+ printk(regs->msr & MSR_64BIT ? fmt64 : fmt32,
current->comm, current->pid, "rt_sigreturn",
(long)uc, regs->nip, regs->link);
@@ -469,7 +469,7 @@ badframe:
regs, frame, newsp);
#endif
if (show_unhandled_signals && printk_ratelimit())
- printk(regs->msr & MSR_SF ? fmt64 : fmt32,
+ printk(regs->msr & MSR_64BIT ? fmt64 : fmt32,
current->comm, current->pid, "setup_rt_frame",
(long)frame, regs->nip, regs->link);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9f9c204bef6..4a6f2ec7e76 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -95,7 +95,7 @@ int smt_enabled_at_boot = 1;
static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
#ifdef CONFIG_PPC64
-void __devinit smp_generic_kick_cpu(int nr)
+int __devinit smp_generic_kick_cpu(int nr)
{
BUG_ON(nr < 0 || nr >= NR_CPUS);
@@ -106,37 +106,10 @@ void __devinit smp_generic_kick_cpu(int nr)
*/
paca[nr].cpu_start = 1;
smp_mb();
-}
-#endif
-void smp_message_recv(int msg)
-{
- switch(msg) {
- case PPC_MSG_CALL_FUNCTION:
- generic_smp_call_function_interrupt();
- break;
- case PPC_MSG_RESCHEDULE:
- scheduler_ipi();
- break;
- case PPC_MSG_CALL_FUNC_SINGLE:
- generic_smp_call_function_single_interrupt();
- break;
- case PPC_MSG_DEBUGGER_BREAK:
- if (crash_ipi_function_ptr) {
- crash_ipi_function_ptr(get_irq_regs());
- break;
- }
-#ifdef CONFIG_DEBUGGER
- debugger_ipi(get_irq_regs());
- break;
-#endif /* CONFIG_DEBUGGER */
- /* FALLTHROUGH */
- default:
- printk("SMP %d: smp_message_recv(): unknown msg %d\n",
- smp_processor_id(), msg);
- break;
- }
+ return 0;
}
+#endif
static irqreturn_t call_function_action(int irq, void *data)
{
@@ -156,9 +129,17 @@ static irqreturn_t call_function_single_action(int irq, void *data)
return IRQ_HANDLED;
}
-static irqreturn_t debug_ipi_action(int irq, void *data)
+irqreturn_t debug_ipi_action(int irq, void *data)
{
- smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
+ if (crash_ipi_function_ptr) {
+ crash_ipi_function_ptr(get_irq_regs());
+ return IRQ_HANDLED;
+ }
+
+#ifdef CONFIG_DEBUGGER
+ debugger_ipi(get_irq_regs());
+#endif /* CONFIG_DEBUGGER */
+
return IRQ_HANDLED;
}
@@ -197,6 +178,66 @@ int smp_request_message_ipi(int virq, int msg)
return err;
}
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
+struct cpu_messages {
+ int messages; /* current messages */
+ unsigned long data; /* data for cause ipi */
+};
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
+
+void smp_muxed_ipi_set_data(int cpu, unsigned long data)
+{
+ struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+
+ info->data = data;
+}
+
+void smp_muxed_ipi_message_pass(int cpu, int msg)
+{
+ struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+ char *message = (char *)&info->messages;
+
+ message[msg] = 1;
+ mb();
+ smp_ops->cause_ipi(cpu, info->data);
+}
+
+void smp_muxed_ipi_resend(void)
+{
+ struct cpu_messages *info = &__get_cpu_var(ipi_message);
+
+ if (info->messages)
+ smp_ops->cause_ipi(smp_processor_id(), info->data);
+}
+
+irqreturn_t smp_ipi_demux(void)
+{
+ struct cpu_messages *info = &__get_cpu_var(ipi_message);
+ unsigned int all;
+
+ mb(); /* order any irq clear */
+
+ do {
+ all = xchg_local(&info->messages, 0);
+
+#ifdef __BIG_ENDIAN
+ if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION)))
+ generic_smp_call_function_interrupt();
+ if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE)))
+ scheduler_ipi();
+ if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE)))
+ generic_smp_call_function_single_interrupt();
+ if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK)))
+ debug_ipi_action(0, NULL);
+#else
+#error Unsupported ENDIAN
+#endif
+ } while (info->messages);
+
+ return IRQ_HANDLED;
+}
+#endif /* CONFIG_PPC_SMP_MUXED_IPI */
+
void smp_send_reschedule(int cpu)
{
if (likely(smp_ops))
@@ -216,11 +257,18 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION);
}
-#ifdef CONFIG_DEBUGGER
-void smp_send_debugger_break(int cpu)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+void smp_send_debugger_break(void)
{
- if (likely(smp_ops))
- smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+ int cpu;
+ int me = raw_smp_processor_id();
+
+ if (unlikely(!smp_ops))
+ return;
+
+ for_each_online_cpu(cpu)
+ if (cpu != me)
+ smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
}
#endif
@@ -228,9 +276,9 @@ void smp_send_debugger_break(int cpu)
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
crash_ipi_function_ptr = crash_ipi_callback;
- if (crash_ipi_callback && smp_ops) {
+ if (crash_ipi_callback) {
mb();
- smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK);
+ smp_send_debugger_break();
}
}
#endif
@@ -410,8 +458,6 @@ int __cpuinit __cpu_up(unsigned int cpu)
{
int rc, c;
- secondary_ti = current_set[cpu];
-
if (smp_ops == NULL ||
(smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
return -EINVAL;
@@ -421,6 +467,8 @@ int __cpuinit __cpu_up(unsigned int cpu)
if (rc)
return rc;
+ secondary_ti = current_set[cpu];
+
/* Make sure callin-map entry is 0 (can be leftover a CPU
* hotplug
*/
@@ -434,7 +482,11 @@ int __cpuinit __cpu_up(unsigned int cpu)
/* wake up cpus */
DBG("smp: kicking cpu %d\n", cpu);
- smp_ops->kick_cpu(cpu);
+ rc = smp_ops->kick_cpu(cpu);
+ if (rc) {
+ pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
+ return rc;
+ }
/*
* wait to see if the cpu made a callin (is actually up).
@@ -507,7 +559,7 @@ int cpu_first_thread_of_core(int core)
}
EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
-/* Must be called when no change can occur to cpu_present_map,
+/* Must be called when no change can occur to cpu_present_mask,
* i.e. during cpu online or offline.
*/
static struct device_node *cpu_to_l2cache(int cpu)
@@ -608,7 +660,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
* se we pin us down to CPU 0 for a short while
*/
alloc_cpumask_var(&old_mask, GFP_NOWAIT);
- cpumask_copy(old_mask, &current->cpus_allowed);
+ cpumask_copy(old_mask, tsk_cpus_allowed(current));
set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
if (smp_ops && smp_ops->setup_cpu)
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index c0d8c2006bf..f0f2199e64e 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -182,6 +182,41 @@ static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL);
static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr);
static SYSDEV_ATTR(purr, 0600, show_purr, store_purr);
+
+unsigned long dscr_default = 0;
+EXPORT_SYMBOL(dscr_default);
+
+static ssize_t show_dscr_default(struct sysdev_class *class,
+ struct sysdev_class_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lx\n", dscr_default);
+}
+
+static ssize_t __used store_dscr_default(struct sysdev_class *class,
+ struct sysdev_class_attribute *attr, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ int ret = 0;
+
+ ret = sscanf(buf, "%lx", &val);
+ if (ret != 1)
+ return -EINVAL;
+ dscr_default = val;
+
+ return count;
+}
+
+static SYSDEV_CLASS_ATTR(dscr_default, 0600,
+ show_dscr_default, store_dscr_default);
+
+static void sysfs_create_dscr_default(void)
+{
+ int err = 0;
+ if (cpu_has_feature(CPU_FTR_DSCR))
+ err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ &attr_dscr_default.attr);
+}
#endif /* CONFIG_PPC64 */
#ifdef HAS_PPC_PMC_PA6T
@@ -617,6 +652,9 @@ static int __init topology_init(void)
if (cpu_online(cpu))
register_cpu_online(cpu);
}
+#ifdef CONFIG_PPC64
+ sysfs_create_dscr_default();
+#endif /* CONFIG_PPC64 */
return 0;
}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index d782cd71c07..b13306b0d92 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -198,7 +198,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
} else if (show_unhandled_signals &&
unhandled_signal(current, signr) &&
printk_ratelimit()) {
- printk(regs->msr & MSR_SF ? fmt64 : fmt32,
+ printk(regs->msr & MSR_64BIT ? fmt64 : fmt32,
current->comm, current->pid, signr,
addr, regs->nip, regs->link, code);
}
@@ -220,7 +220,7 @@ void system_reset_exception(struct pt_regs *regs)
}
#ifdef CONFIG_KEXEC
- cpu_set(smp_processor_id(), cpus_in_sr);
+ cpumask_set_cpu(smp_processor_id(), &cpus_in_sr);
#endif
die("System Reset", regs, SIGABRT);
@@ -908,6 +908,26 @@ static int emulate_instruction(struct pt_regs *regs)
return emulate_isel(regs, instword);
}
+#ifdef CONFIG_PPC64
+ /* Emulate the mfspr rD, DSCR. */
+ if (((instword & PPC_INST_MFSPR_DSCR_MASK) == PPC_INST_MFSPR_DSCR) &&
+ cpu_has_feature(CPU_FTR_DSCR)) {
+ PPC_WARN_EMULATED(mfdscr, regs);
+ rd = (instword >> 21) & 0x1f;
+ regs->gpr[rd] = mfspr(SPRN_DSCR);
+ return 0;
+ }
+ /* Emulate the mtspr DSCR, rD. */
+ if (((instword & PPC_INST_MTSPR_DSCR_MASK) == PPC_INST_MTSPR_DSCR) &&
+ cpu_has_feature(CPU_FTR_DSCR)) {
+ PPC_WARN_EMULATED(mtdscr, regs);
+ rd = (instword >> 21) & 0x1f;
+ mtspr(SPRN_DSCR, regs->gpr[rd]);
+ current->thread.dscr_inherit = 1;
+ return 0;
+ }
+#endif
+
return -EINVAL;
}
@@ -1505,6 +1525,10 @@ struct ppc_emulated ppc_emulated = {
#ifdef CONFIG_VSX
WARN_EMULATED_SETUP(vsx),
#endif
+#ifdef CONFIG_PPC64
+ WARN_EMULATED_SETUP(mfdscr),
+ WARN_EMULATED_SETUP(mtdscr),
+#endif
};
u32 ppc_warn_emulated;
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index e39cad83c88..23d65abbedc 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -62,6 +62,8 @@ void __init udbg_early_init(void)
udbg_init_cpm();
#elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO)
udbg_init_usbgecko();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP)
+ udbg_init_wsp();
#endif
#ifdef CONFIG_PPC_EARLY_DEBUG
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index baa33a7517b..6837f839ab7 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -11,6 +11,7 @@
#include <linux/types.h>
#include <asm/udbg.h>
#include <asm/io.h>
+#include <asm/reg_a2.h>
extern u8 real_readb(volatile u8 __iomem *addr);
extern void real_writeb(u8 data, volatile u8 __iomem *addr);
@@ -298,3 +299,53 @@ void __init udbg_init_40x_realmode(void)
udbg_getc_poll = NULL;
}
#endif /* CONFIG_PPC_EARLY_DEBUG_40x */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+static void udbg_wsp_flush(void)
+{
+ if (udbg_comport) {
+ while ((readb(&udbg_comport->lsr) & LSR_THRE) == 0)
+ /* wait for idle */;
+ }
+}
+
+static void udbg_wsp_putc(char c)
+{
+ if (udbg_comport) {
+ if (c == '\n')
+ udbg_wsp_putc('\r');
+ udbg_wsp_flush();
+ writeb(c, &udbg_comport->thr); eieio();
+ }
+}
+
+static int udbg_wsp_getc(void)
+{
+ if (udbg_comport) {
+ while ((readb(&udbg_comport->lsr) & LSR_DR) == 0)
+ ; /* wait for char */
+ return readb(&udbg_comport->rbr);
+ }
+ return -1;
+}
+
+static int udbg_wsp_getc_poll(void)
+{
+ if (udbg_comport)
+ if (readb(&udbg_comport->lsr) & LSR_DR)
+ return readb(&udbg_comport->rbr);
+ return -1;
+}
+
+void __init udbg_init_wsp(void)
+{
+ udbg_comport = (struct NS16550 __iomem *)WSP_UART_VIRT;
+
+ udbg_init_uart(udbg_comport, 57600, 50000000);
+
+ udbg_putc = udbg_wsp_putc;
+ udbg_flush = udbg_wsp_flush;
+ udbg_getc = udbg_wsp_getc;
+ udbg_getc_poll = udbg_wsp_getc_poll;
+}
+#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 9de6f396cf8..4d5a3edff49 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -102,7 +102,7 @@ _GLOBAL(giveup_altivec)
MTMSRD(r5) /* enable use of VMX now */
isync
PPC_LCMPI 0,r3,0
- beqlr- /* if no previous owner, done */
+ beqlr /* if no previous owner, done */
addi r3,r3,THREAD /* want THREAD of task */
PPC_LL r5,PT_REGS(r3)
PPC_LCMPI 0,r5,0
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 74d0e742114..da3a1225c0a 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -107,6 +107,16 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
return 0;
}
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_44x *vcpu_44x;
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 65ea083a5b2..549bb2c9a47 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -158,7 +158,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
}
- kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
return emulated;
}
@@ -179,7 +178,6 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
}
- kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
return emulated;
}
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index c961de40c67..0f95b5cce03 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -236,7 +236,7 @@ void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
{
- return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
+ return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
}
void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 2b9c9088d00..1a1b34487e7 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -35,9 +35,7 @@
#if defined(CONFIG_PPC_BOOK3S_64)
-#define LOAD_SHADOW_VCPU(reg) \
- mfspr reg, SPRN_SPRG_PACA
-
+#define LOAD_SHADOW_VCPU(reg) GET_PACA(reg)
#define SHADOW_VCPU_OFF PACA_KVM_SVCPU
#define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR)
#define FUNC(name) GLUE(.,name)
@@ -72,7 +70,7 @@
.global kvmppc_trampoline_\intno
kvmppc_trampoline_\intno:
- mtspr SPRN_SPRG_SCRATCH0, r13 /* Save r13 */
+ SET_SCRATCH0(r13) /* Save r13 */
/*
* First thing to do is to find out if we're coming
@@ -91,7 +89,7 @@ kvmppc_trampoline_\intno:
lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
mtcr r12
PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
- mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */
+ GET_SCRATCH0(r13) /* r13 = original r13 */
b kvmppc_resume_\intno /* Get back original handler */
/* Now we know we're handling a KVM guest */
@@ -114,6 +112,9 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL
+#ifdef CONFIG_PPC_BOOK3S_64
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL_HV
+#endif
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_FP_UNAVAIL
@@ -158,7 +159,7 @@ kvmppc_handler_skip_ins:
lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
mtcr r12
PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
- mfspr r13, SPRN_SPRG_SCRATCH0
+ GET_SCRATCH0(r13)
/* And get back into the code */
RFI
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 7c52ed0b705..451264274b8 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -155,14 +155,20 @@ kvmppc_handler_trampoline_exit:
PPC_LL r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13)
/* Save guest PC and MSR */
- mfsrr0 r3
+ andi. r0,r12,0x2
+ beq 1f
+ mfspr r3,SPRN_HSRR0
+ mfspr r4,SPRN_HSRR1
+ andi. r12,r12,0x3ffd
+ b 2f
+1: mfsrr0 r3
mfsrr1 r4
-
+2:
PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13)
PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13)
/* Get scratch'ed off registers */
- mfspr r9, SPRN_SPRG_SCRATCH0
+ GET_SCRATCH0(r9)
PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
lwz r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ef76acb455c..8462b3a1c1c 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
kvmppc_set_msr(vcpu, regs->msr);
vcpu->arch.shared->srr0 = regs->srr0;
vcpu->arch.shared->srr1 = regs->srr1;
+ kvmppc_set_pid(vcpu, regs->pid);
vcpu->arch.shared->sprg0 = regs->sprg0;
vcpu->arch.shared->sprg1 = regs->sprg1;
vcpu->arch.shared->sprg2 = regs->sprg2;
@@ -584,16 +585,165 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return 0;
}
+static void get_sregs_base(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ u64 tb = get_tb();
+
+ sregs->u.e.features |= KVM_SREGS_E_BASE;
+
+ sregs->u.e.csrr0 = vcpu->arch.csrr0;
+ sregs->u.e.csrr1 = vcpu->arch.csrr1;
+ sregs->u.e.mcsr = vcpu->arch.mcsr;
+ sregs->u.e.esr = vcpu->arch.esr;
+ sregs->u.e.dear = vcpu->arch.shared->dar;
+ sregs->u.e.tsr = vcpu->arch.tsr;
+ sregs->u.e.tcr = vcpu->arch.tcr;
+ sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
+ sregs->u.e.tb = tb;
+ sregs->u.e.vrsave = vcpu->arch.vrsave;
+}
+
+static int set_sregs_base(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ if (!(sregs->u.e.features & KVM_SREGS_E_BASE))
+ return 0;
+
+ vcpu->arch.csrr0 = sregs->u.e.csrr0;
+ vcpu->arch.csrr1 = sregs->u.e.csrr1;
+ vcpu->arch.mcsr = sregs->u.e.mcsr;
+ vcpu->arch.esr = sregs->u.e.esr;
+ vcpu->arch.shared->dar = sregs->u.e.dear;
+ vcpu->arch.vrsave = sregs->u.e.vrsave;
+ vcpu->arch.tcr = sregs->u.e.tcr;
+
+ if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
+ vcpu->arch.dec = sregs->u.e.dec;
+
+ kvmppc_emulate_dec(vcpu);
+
+ if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+ /*
+ * FIXME: existing KVM timer handling is incomplete.
+ * TSR cannot be read by the guest, and its value in
+ * vcpu->arch is always zero. For now, just handle
+ * the case where the caller is trying to inject a
+ * decrementer interrupt.
+ */
+
+ if ((sregs->u.e.tsr & TSR_DIS) &&
+ (vcpu->arch.tcr & TCR_DIE))
+ kvmppc_core_queue_dec(vcpu);
+ }
+
+ return 0;
+}
+
+static void get_sregs_arch206(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ sregs->u.e.features |= KVM_SREGS_E_ARCH206;
+
+ sregs->u.e.pir = 0;
+ sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
+ sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
+ sregs->u.e.decar = vcpu->arch.decar;
+ sregs->u.e.ivpr = vcpu->arch.ivpr;
+}
+
+static int set_sregs_arch206(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
+ return 0;
+
+ if (sregs->u.e.pir != 0)
+ return -EINVAL;
+
+ vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
+ vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1;
+ vcpu->arch.decar = sregs->u.e.decar;
+ vcpu->arch.ivpr = sregs->u.e.ivpr;
+
+ return 0;
+}
+
+void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ sregs->u.e.features |= KVM_SREGS_E_IVOR;
+
+ sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+ sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+ sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+ sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+ sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+ sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+ sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+ sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+ sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+ sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+ sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+ sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+ sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+ sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+ sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+ sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+}
+
+int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+ return 0;
+
+ vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15];
+
+ return 0;
+}
+
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- return -ENOTSUPP;
+ sregs->pvr = vcpu->arch.pvr;
+
+ get_sregs_base(vcpu, sregs);
+ get_sregs_arch206(vcpu, sregs);
+ kvmppc_core_get_sregs(vcpu, sregs);
+ return 0;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- return -ENOTSUPP;
+ int ret;
+
+ if (vcpu->arch.pvr != sregs->pvr)
+ return -EINVAL;
+
+ ret = set_sregs_base(vcpu, sregs);
+ if (ret < 0)
+ return ret;
+
+ ret = set_sregs_arch206(vcpu, sregs);
+ if (ret < 0)
+ return ret;
+
+ return kvmppc_core_set_sregs(vcpu, sregs);
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 1cc471faac2..b58ccae9590 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -380,7 +380,6 @@ lightweight_exit:
* because host interrupt handlers would get confused. */
lwz r1, VCPU_GPR(r1)(r4)
- /* XXX handle USPRG0 */
/* Host interrupt handlers may have clobbered these guest-readable
* SPRGs, so we need to reload them here with the guest's values. */
lwz r3, VCPU_SPRG4(r4)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index e3768ee9b59..318dbc61ba4 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -63,6 +63,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
/* Registers init */
vcpu->arch.pvr = mfspr(SPRN_PVR);
+ vcpu_e500->svr = mfspr(SPRN_SVR);
/* Since booke kvm only support one core, update all vcpus' PIR to 0 */
vcpu->vcpu_id = 0;
@@ -96,6 +97,81 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
return 0;
}
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE |
+ KVM_SREGS_E_PM;
+ sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
+
+ sregs->u.e.impl.fsl.features = 0;
+ sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
+ sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
+ sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
+
+ sregs->u.e.mas0 = vcpu_e500->mas0;
+ sregs->u.e.mas1 = vcpu_e500->mas1;
+ sregs->u.e.mas2 = vcpu_e500->mas2;
+ sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3;
+ sregs->u.e.mas4 = vcpu_e500->mas4;
+ sregs->u.e.mas6 = vcpu_e500->mas6;
+
+ sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
+ sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
+ sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg;
+ sregs->u.e.tlbcfg[2] = 0;
+ sregs->u.e.tlbcfg[3] = 0;
+
+ sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+ sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+ sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+ sregs->u.e.ivor_high[3] =
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+
+ kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
+ vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
+ vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
+ vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
+ }
+
+ if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
+ vcpu_e500->mas0 = sregs->u.e.mas0;
+ vcpu_e500->mas1 = sregs->u.e.mas1;
+ vcpu_e500->mas2 = sregs->u.e.mas2;
+ vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32;
+ vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
+ vcpu_e500->mas4 = sregs->u.e.mas4;
+ vcpu_e500->mas6 = sregs->u.e.mas6;
+ }
+
+ if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+ return 0;
+
+ if (sregs->u.e.features & KVM_SREGS_E_SPE) {
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] =
+ sregs->u.e.ivor_high[0];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] =
+ sregs->u.e.ivor_high[1];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] =
+ sregs->u.e.ivor_high[2];
+ }
+
+ if (sregs->u.e.features & KVM_SREGS_E_PM) {
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
+ sregs->u.e.ivor_high[3];
+ }
+
+ return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 8e3edfbc963..69cd665a0ca 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Yu Liu, <yu.liu@freescale.com>
*
@@ -78,8 +78,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
switch (sprn) {
case SPRN_PID:
- vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
- vcpu->arch.pid = spr_val;
+ kvmppc_set_pid(vcpu, spr_val);
break;
case SPRN_PID1:
vcpu_e500->pid[1] = spr_val; break;
@@ -175,6 +174,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
case SPRN_HID1:
kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
+ case SPRN_SVR:
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break;
case SPRN_MMUCSR0:
kvmppc_set_gpr(vcpu, rt, 0); break;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index d6d6d47a75a..b18fe353397 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Yu Liu, yu.liu@freescale.com
*
@@ -24,6 +24,7 @@
#include "../mm/mmu_decl.h"
#include "e500_tlb.h"
#include "trace.h"
+#include "timing.h"
#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
@@ -506,6 +507,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
vcpu_e500->mas7 = 0;
}
+ kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
return EMULATE_DONE;
}
@@ -571,6 +573,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
write_host_tlbe(vcpu_e500, stlbsel, sesel);
}
+ kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
return EMULATE_DONE;
}
@@ -672,6 +675,14 @@ int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
return -1;
}
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
+ vcpu->arch.pid = pid;
+}
+
void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
{
struct tlbe *tlbe;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index c64fd2909bb..141dce3c681 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -114,6 +114,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
}
}
+u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
+{
+ u64 jd = tb - vcpu->arch.dec_jiffies;
+ return vcpu->arch.dec - jd;
+}
+
/* XXX to do:
* lhax
* lhaux
@@ -279,11 +285,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case SPRN_DEC:
{
- u64 jd = get_tb() - vcpu->arch.dec_jiffies;
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
- pr_debug("mfDEC: %x - %llx = %lx\n",
- vcpu->arch.dec, jd,
- kvmppc_get_gpr(vcpu, rt));
+ kvmppc_set_gpr(vcpu, rt,
+ kvmppc_get_dec(vcpu, get_tb()));
break;
}
default:
@@ -294,6 +297,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
break;
}
+ kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
break;
case OP_31_XOP_STHX:
@@ -363,6 +367,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
printk("mtspr: unknown spr %x\n", sprn);
break;
}
+ kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
break;
case OP_31_XOP_DCBI:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 99758460efd..616dd516ca1 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -175,7 +175,11 @@ int kvm_dev_ioctl_check_extension(long ext)
int r;
switch (ext) {
+#ifdef CONFIG_BOOKE
+ case KVM_CAP_PPC_BOOKE_SREGS:
+#else
case KVM_CAP_PPC_SEGSTATE:
+#endif
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_UNSET_IRQ:
case KVM_CAP_PPC_IRQ_LEVEL:
@@ -284,6 +288,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
+#ifdef CONFIG_KVM_EXIT_TIMING
+ mutex_init(&vcpu->arch.exit_timing_lock);
+#endif
+
return 0;
}
@@ -294,12 +302,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+#ifdef CONFIG_BOOKE
+ /*
+ * vrsave (formerly usprg0) isn't used by Linux, but may
+ * be used by the guest.
+ *
+ * On non-booke this is associated with Altivec and
+ * is handled by code in book3s.c.
+ */
+ mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
+#endif
kvmppc_core_vcpu_load(vcpu, cpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
kvmppc_core_vcpu_put(vcpu);
+#ifdef CONFIG_BOOKE
+ vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
+#endif
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index a021f5827a3..319177df958 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -34,8 +34,8 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
{
int i;
- /* pause guest execution to avoid concurrent updates */
- mutex_lock(&vcpu->mutex);
+ /* Take a lock to avoid concurrent updates */
+ mutex_lock(&vcpu->arch.exit_timing_lock);
vcpu->arch.last_exit_type = 0xDEAD;
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
@@ -49,7 +49,7 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
vcpu->arch.timing_exit.tv64 = 0;
vcpu->arch.timing_last_enter.tv64 = 0;
- mutex_unlock(&vcpu->mutex);
+ mutex_unlock(&vcpu->arch.exit_timing_lock);
}
static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
@@ -65,6 +65,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
return;
}
+ mutex_lock(&vcpu->arch.exit_timing_lock);
+
vcpu->arch.timing_count_type[type]++;
/* sum */
@@ -93,6 +95,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
vcpu->arch.timing_min_duration[type] = duration;
if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
vcpu->arch.timing_max_duration[type] = duration;
+
+ mutex_unlock(&vcpu->arch.exit_timing_lock);
}
void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
@@ -147,17 +151,30 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
{
struct kvm_vcpu *vcpu = m->private;
int i;
+ u64 min, max, sum, sum_quad;
seq_printf(m, "%s", "type count min max sum sum_squared\n");
+
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+
+ min = vcpu->arch.timing_min_duration[i];
+ do_div(min, tb_ticks_per_usec);
+ max = vcpu->arch.timing_max_duration[i];
+ do_div(max, tb_ticks_per_usec);
+ sum = vcpu->arch.timing_sum_duration[i];
+ do_div(sum, tb_ticks_per_usec);
+ sum_quad = vcpu->arch.timing_sum_quad_duration[i];
+ do_div(sum_quad, tb_ticks_per_usec);
+
seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n",
kvm_exit_names[i],
vcpu->arch.timing_count_type[i],
- vcpu->arch.timing_min_duration[i],
- vcpu->arch.timing_max_duration[i],
- vcpu->arch.timing_sum_duration[i],
- vcpu->arch.timing_sum_quad_duration[i]);
+ min,
+ max,
+ sum,
+ sum_quad);
+
}
return 0;
}
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index f53e09c7dac..13b676c20d1 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -6,14 +6,6 @@
#include <asm/system.h>
-void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask)
-{
- if (mem_init_done)
- return kmalloc(size, mask);
- else
- return alloc_bootmem(size);
-}
-
void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask)
{
void *p;
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 4d4eeb90048..53dcb6b1b70 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -6,6 +6,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+#include <asm/page.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
@@ -15,9 +16,9 @@ PPC64_CACHES:
.tc ppc64_caches[TC],ppc64_caches
.section ".text"
-
-_GLOBAL(copy_4K_page)
- li r5,4096 /* 4K page size */
+_GLOBAL(copy_page)
+ lis r5,PAGE_SIZE@h
+ ori r5,r5,PAGE_SIZE@l
BEGIN_FTR_SECTION
ld r10,PPC64_CACHES@toc(r2)
lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */
diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c
index deac4d30daf..e91615abae6 100644
--- a/arch/powerpc/lib/devres.c
+++ b/arch/powerpc/lib/devres.c
@@ -9,11 +9,11 @@
#include <linux/device.h> /* devres_*(), devm_ioremap_release() */
#include <linux/gfp.h>
-#include <linux/io.h> /* ioremap_flags() */
+#include <linux/io.h> /* ioremap_prot() */
#include <linux/module.h> /* EXPORT_SYMBOL() */
/**
- * devm_ioremap_prot - Managed ioremap_flags()
+ * devm_ioremap_prot - Managed ioremap_prot()
* @dev: Generic device to remap IO address for
* @offset: BUS offset to map
* @size: Size of map
@@ -31,7 +31,7 @@ void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
if (!ptr)
return NULL;
- addr = ioremap_flags(offset, size, flags);
+ addr = ioremap_prot(offset, size, flags);
if (addr) {
*ptr = addr;
devres_add(dev, ptr);
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index ae5189ab004..9a52349874e 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/ptrace.h>
+#include <linux/prefetch.h>
#include <asm/sstep.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
@@ -45,6 +46,18 @@ extern int do_stxvd2x(int rn, unsigned long ea);
#endif
/*
+ * Emulate the truncation of 64 bit values in 32-bit mode.
+ */
+static unsigned long truncate_if_32bit(unsigned long msr, unsigned long val)
+{
+#ifdef __powerpc64__
+ if ((msr & MSR_64BIT) == 0)
+ val &= 0xffffffffUL;
+#endif
+ return val;
+}
+
+/*
* Determine whether a conditional branch instruction would branch.
*/
static int __kprobes branch_taken(unsigned int instr, struct pt_regs *regs)
@@ -90,11 +103,8 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs
if (instr & 0x04000000) /* update forms */
regs->gpr[ra] = ea;
}
-#ifdef __powerpc64__
- if (!(regs->msr & MSR_SF))
- ea &= 0xffffffffUL;
-#endif
- return ea;
+
+ return truncate_if_32bit(regs->msr, ea);
}
#ifdef __powerpc64__
@@ -113,9 +123,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg
if ((instr & 3) == 1) /* update forms */
regs->gpr[ra] = ea;
}
- if (!(regs->msr & MSR_SF))
- ea &= 0xffffffffUL;
- return ea;
+
+ return truncate_if_32bit(regs->msr, ea);
}
#endif /* __powerpc64 */
@@ -136,11 +145,8 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs
if (do_update) /* update forms */
regs->gpr[ra] = ea;
}
-#ifdef __powerpc64__
- if (!(regs->msr & MSR_SF))
- ea &= 0xffffffffUL;
-#endif
- return ea;
+
+ return truncate_if_32bit(regs->msr, ea);
}
/*
@@ -466,7 +472,7 @@ static void __kprobes set_cr0(struct pt_regs *regs, int rd)
regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
#ifdef __powerpc64__
- if (!(regs->msr & MSR_SF))
+ if (!(regs->msr & MSR_64BIT))
val = (int) val;
#endif
if (val < 0)
@@ -487,7 +493,7 @@ static void __kprobes add_with_carry(struct pt_regs *regs, int rd,
++val;
regs->gpr[rd] = val;
#ifdef __powerpc64__
- if (!(regs->msr & MSR_SF)) {
+ if (!(regs->msr & MSR_64BIT)) {
val = (unsigned int) val;
val1 = (unsigned int) val1;
}
@@ -570,8 +576,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
if ((instr & 2) == 0)
imm += regs->nip;
regs->nip += 4;
- if ((regs->msr & MSR_SF) == 0)
- regs->nip &= 0xffffffffUL;
+ regs->nip = truncate_if_32bit(regs->msr, regs->nip);
if (instr & 1)
regs->link = regs->nip;
if (branch_taken(instr, regs))
@@ -604,13 +609,9 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
imm -= 0x04000000;
if ((instr & 2) == 0)
imm += regs->nip;
- if (instr & 1) {
- regs->link = regs->nip + 4;
- if ((regs->msr & MSR_SF) == 0)
- regs->link &= 0xffffffffUL;
- }
- if ((regs->msr & MSR_SF) == 0)
- imm &= 0xffffffffUL;
+ if (instr & 1)
+ regs->link = truncate_if_32bit(regs->msr, regs->nip + 4);
+ imm = truncate_if_32bit(regs->msr, imm);
regs->nip = imm;
return 1;
case 19:
@@ -618,11 +619,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
case 16: /* bclr */
case 528: /* bcctr */
imm = (instr & 0x400)? regs->ctr: regs->link;
- regs->nip += 4;
- if ((regs->msr & MSR_SF) == 0) {
- regs->nip &= 0xffffffffUL;
- imm &= 0xffffffffUL;
- }
+ regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
+ imm = truncate_if_32bit(regs->msr, imm);
if (instr & 1)
regs->link = regs->nip;
if (branch_taken(instr, regs))
@@ -1616,11 +1614,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
return 0; /* invoke DSI if -EFAULT? */
}
instr_done:
- regs->nip += 4;
-#ifdef __powerpc64__
- if ((regs->msr & MSR_SF) == 0)
- regs->nip &= 0xffffffffUL;
-#endif
+ regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
return 1;
logical_done:
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 5b7dd4ea02b..a242b5d7cbe 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -118,7 +118,7 @@ _GLOBAL(__hash_page_4K)
BEGIN_FTR_SECTION
cmpdi r9,0 /* check segment size */
bne 3f
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
/* Calc va and put it in r29 */
rldicr r29,r5,28,63-28
rldicl r3,r3,0,36
@@ -401,7 +401,7 @@ _GLOBAL(__hash_page_4K)
BEGIN_FTR_SECTION
cmpdi r9,0 /* check segment size */
bne 3f
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
/* Calc va and put it in r29 */
rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */
rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */
@@ -715,7 +715,7 @@ BEGIN_FTR_SECTION
andi. r0,r31,_PAGE_NO_CACHE
/* If so, bail out and refault as a 4k page */
bne- ht64_bail_ok
-END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_CI_LARGE_PAGE)
/* Prepare new PTE value (turn access RW into DIRTY, then
* add BUSY and ACCESSED)
*/
@@ -736,7 +736,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
BEGIN_FTR_SECTION
cmpdi r9,0 /* check segment size */
bne 3f
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
/* Calc va and put it in r29 */
rldicr r29,r5,28,63-28
rldicl r3,r3,0,36
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 784a400e078..dfd764896db 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -50,9 +50,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
case MMU_PAGE_4K:
va &= ~0xffful;
va |= ssize << 8;
- asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0),
- %2)
- : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
+ asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
+ : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206)
: "memory");
break;
default:
@@ -61,9 +60,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
va |= penc << 12;
va |= ssize << 8;
va |= 1; /* L */
- asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0),
- %2)
- : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
+ asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
+ : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206)
: "memory");
break;
}
@@ -98,8 +96,8 @@ static inline void __tlbiel(unsigned long va, int psize, int ssize)
static inline void tlbie(unsigned long va, int psize, int ssize, int local)
{
- unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+ unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
if (use_local)
use_local = mmu_psize_defs[psize].tlbiel;
@@ -503,7 +501,7 @@ static void native_flush_hash_range(unsigned long number, int local)
} pte_iterate_hashed_end();
}
- if (cpu_has_feature(CPU_FTR_TLBIEL) &&
+ if (mmu_has_feature(MMU_FTR_TLBIEL) &&
mmu_psize_defs[psize].tlbiel && local) {
asm volatile("ptesync":::"memory");
for (i = 0; i < number; i++) {
@@ -517,7 +515,7 @@ static void native_flush_hash_range(unsigned long number, int local)
}
asm volatile("ptesync":::"memory");
} else {
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
if (lock_tlbie)
raw_spin_lock(&native_tlbie_lock);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 58a022d0f46..26b2872b3d0 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -53,6 +53,7 @@
#include <asm/sections.h>
#include <asm/spu.h>
#include <asm/udbg.h>
+#include <asm/code-patching.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -258,11 +259,11 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
for (; size >= 4; size -= 4, ++prop) {
if (prop[0] == 40) {
DBG("1T segment support detected\n");
- cur_cpu_spec->cpu_features |= CPU_FTR_1T_SEGMENT;
+ cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
return 1;
}
}
- cur_cpu_spec->cpu_features &= ~CPU_FTR_NO_SLBIE_B;
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
return 0;
}
@@ -288,7 +289,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
if (prop != NULL) {
DBG("Page sizes from device-tree:\n");
size /= 4;
- cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE);
+ cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
while(size > 0) {
unsigned int shift = prop[0];
unsigned int slbenc = prop[1];
@@ -316,7 +317,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
break;
case 0x18:
idx = MMU_PAGE_16M;
- cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE;
+ cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
break;
case 0x22:
idx = MMU_PAGE_16G;
@@ -411,7 +412,7 @@ static void __init htab_init_page_sizes(void)
* Not in the device-tree, let's fallback on known size
* list for 16M capable GP & GR
*/
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
+ if (mmu_has_feature(MMU_FTR_16M_PAGE))
memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
sizeof(mmu_psize_defaults_gp));
found:
@@ -441,7 +442,7 @@ static void __init htab_init_page_sizes(void)
mmu_vmalloc_psize = MMU_PAGE_64K;
if (mmu_linear_psize == MMU_PAGE_4K)
mmu_linear_psize = MMU_PAGE_64K;
- if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) {
+ if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
/*
* Don't use 64k pages for ioremap on pSeries, since
* that would stop us accessing the HEA ethernet.
@@ -547,15 +548,7 @@ int remove_section_mapping(unsigned long start, unsigned long end)
}
#endif /* CONFIG_MEMORY_HOTPLUG */
-static inline void make_bl(unsigned int *insn_addr, void *func)
-{
- unsigned long funcp = *((unsigned long *)func);
- int offset = funcp - (unsigned long)insn_addr;
-
- *insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc));
- flush_icache_range((unsigned long)insn_addr, 4+
- (unsigned long)insn_addr);
-}
+#define FUNCTION_TEXT(A) ((*(unsigned long *)(A)))
static void __init htab_finish_init(void)
{
@@ -570,16 +563,33 @@ static void __init htab_finish_init(void)
extern unsigned int *ht64_call_hpte_remove;
extern unsigned int *ht64_call_hpte_updatepp;
- make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert);
- make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert);
- make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove);
- make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp);
+ patch_branch(ht64_call_hpte_insert1,
+ FUNCTION_TEXT(ppc_md.hpte_insert),
+ BRANCH_SET_LINK);
+ patch_branch(ht64_call_hpte_insert2,
+ FUNCTION_TEXT(ppc_md.hpte_insert),
+ BRANCH_SET_LINK);
+ patch_branch(ht64_call_hpte_remove,
+ FUNCTION_TEXT(ppc_md.hpte_remove),
+ BRANCH_SET_LINK);
+ patch_branch(ht64_call_hpte_updatepp,
+ FUNCTION_TEXT(ppc_md.hpte_updatepp),
+ BRANCH_SET_LINK);
+
#endif /* CONFIG_PPC_HAS_HASH_64K */
- make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
- make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
- make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
- make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp);
+ patch_branch(htab_call_hpte_insert1,
+ FUNCTION_TEXT(ppc_md.hpte_insert),
+ BRANCH_SET_LINK);
+ patch_branch(htab_call_hpte_insert2,
+ FUNCTION_TEXT(ppc_md.hpte_insert),
+ BRANCH_SET_LINK);
+ patch_branch(htab_call_hpte_remove,
+ FUNCTION_TEXT(ppc_md.hpte_remove),
+ BRANCH_SET_LINK);
+ patch_branch(htab_call_hpte_updatepp,
+ FUNCTION_TEXT(ppc_md.hpte_updatepp),
+ BRANCH_SET_LINK);
}
static void __init htab_initialize(void)
@@ -598,7 +608,7 @@ static void __init htab_initialize(void)
/* Initialize page sizes */
htab_init_page_sizes();
- if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) {
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
mmu_kernel_ssize = MMU_SEGSIZE_1T;
mmu_highuser_ssize = MMU_SEGSIZE_1T;
printk(KERN_INFO "Using 1TB segments\n");
@@ -739,7 +749,7 @@ void __init early_init_mmu(void)
/* Initialize stab / SLB management except on iSeries
*/
- if (cpu_has_feature(CPU_FTR_SLB))
+ if (mmu_has_feature(MMU_FTR_SLB))
slb_initialize();
else if (!firmware_has_feature(FW_FEATURE_ISERIES))
stab_initialize(get_paca()->stab_real);
@@ -756,7 +766,7 @@ void __cpuinit early_init_mmu_secondary(void)
* in real mode on pSeries and we want a virtual address on
* iSeries anyway
*/
- if (cpu_has_feature(CPU_FTR_SLB))
+ if (mmu_has_feature(MMU_FTR_SLB))
slb_initialize();
else
stab_initialize(get_paca()->stab_addr);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9bb249c3046..0b9a5c1901b 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -529,7 +529,7 @@ static int __init hugetlbpage_init(void)
{
int psize;
- if (!cpu_has_feature(CPU_FTR_16M_PAGE))
+ if (!mmu_has_feature(MMU_FTR_16M_PAGE))
return -ENODEV;
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 2535828aa84..3bafc3deca6 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -20,9 +20,205 @@
#include <linux/idr.h>
#include <linux/module.h>
#include <linux/gfp.h>
+#include <linux/slab.h>
#include <asm/mmu_context.h>
+#ifdef CONFIG_PPC_ICSWX
+/*
+ * The processor and its L2 cache cause the icswx instruction to
+ * generate a COP_REQ transaction on PowerBus. The transaction has
+ * no address, and the processor does not perform an MMU access
+ * to authenticate the transaction. The command portion of the
+ * PowerBus COP_REQ transaction includes the LPAR_ID (LPID) and
+ * the coprocessor Process ID (PID), which the coprocessor compares
+ * to the authorized LPID and PID held in the coprocessor, to determine
+ * if the process is authorized to generate the transaction.
+ * The data of the COP_REQ transaction is 128-byte or less and is
+ * placed in cacheable memory on a 128-byte cache line boundary.
+ *
+ * The task to use a coprocessor should use use_cop() to allocate
+ * a coprocessor PID before executing icswx instruction. use_cop()
+ * also enables the coprocessor context switching. Drop_cop() is
+ * used to free the coprocessor PID.
+ *
+ * Example:
+ * Host Fabric Interface (HFI) is a PowerPC network coprocessor.
+ * Each HFI have multiple windows. Each HFI window serves as a
+ * network device sending to and receiving from HFI network.
+ * HFI immediate send function uses icswx instruction. The immediate
+ * send function allows small (single cache-line) packets be sent
+ * without using the regular HFI send FIFO and doorbell, which are
+ * much slower than immediate send.
+ *
+ * For each task intending to use HFI immediate send, the HFI driver
+ * calls use_cop() to obtain a coprocessor PID for the task.
+ * The HFI driver then allocate a free HFI window and save the
+ * coprocessor PID to the HFI window to allow the task to use the
+ * HFI window.
+ *
+ * The HFI driver repeatedly creates immediate send packets and
+ * issues icswx instruction to send data through the HFI window.
+ * The HFI compares the coprocessor PID in the CPU PID register
+ * to the PID held in the HFI window to determine if the transaction
+ * is allowed.
+ *
+ * When the task to release the HFI window, the HFI driver calls
+ * drop_cop() to release the coprocessor PID.
+ */
+
+#define COP_PID_NONE 0
+#define COP_PID_MIN (COP_PID_NONE + 1)
+#define COP_PID_MAX (0xFFFF)
+
+static DEFINE_SPINLOCK(mmu_context_acop_lock);
+static DEFINE_IDA(cop_ida);
+
+void switch_cop(struct mm_struct *next)
+{
+ mtspr(SPRN_PID, next->context.cop_pid);
+ mtspr(SPRN_ACOP, next->context.acop);
+}
+
+static int new_cop_pid(struct ida *ida, int min_id, int max_id,
+ spinlock_t *lock)
+{
+ int index;
+ int err;
+
+again:
+ if (!ida_pre_get(ida, GFP_KERNEL))
+ return -ENOMEM;
+
+ spin_lock(lock);
+ err = ida_get_new_above(ida, min_id, &index);
+ spin_unlock(lock);
+
+ if (err == -EAGAIN)
+ goto again;
+ else if (err)
+ return err;
+
+ if (index > max_id) {
+ spin_lock(lock);
+ ida_remove(ida, index);
+ spin_unlock(lock);
+ return -ENOMEM;
+ }
+
+ return index;
+}
+
+static void sync_cop(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ if (mm == current->active_mm)
+ switch_cop(current->active_mm);
+}
+
+/**
+ * Start using a coprocessor.
+ * @acop: mask of coprocessor to be used.
+ * @mm: The mm the coprocessor to associate with. Most likely current mm.
+ *
+ * Return a positive PID if successful. Negative errno otherwise.
+ * The returned PID will be fed to the coprocessor to determine if an
+ * icswx transaction is authenticated.
+ */
+int use_cop(unsigned long acop, struct mm_struct *mm)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_ICSWX))
+ return -ENODEV;
+
+ if (!mm || !acop)
+ return -EINVAL;
+
+ /* We need to make sure mm_users doesn't change */
+ down_read(&mm->mmap_sem);
+ spin_lock(mm->context.cop_lockp);
+
+ if (mm->context.cop_pid == COP_PID_NONE) {
+ ret = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX,
+ &mmu_context_acop_lock);
+ if (ret < 0)
+ goto out;
+
+ mm->context.cop_pid = ret;
+ }
+ mm->context.acop |= acop;
+
+ sync_cop(mm);
+
+ /*
+ * If this is a threaded process then there might be other threads
+ * running. We need to send an IPI to force them to pick up any
+ * change in PID and ACOP.
+ */
+ if (atomic_read(&mm->mm_users) > 1)
+ smp_call_function(sync_cop, mm, 1);
+
+ ret = mm->context.cop_pid;
+
+out:
+ spin_unlock(mm->context.cop_lockp);
+ up_read(&mm->mmap_sem);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(use_cop);
+
+/**
+ * Stop using a coprocessor.
+ * @acop: mask of coprocessor to be stopped.
+ * @mm: The mm the coprocessor associated with.
+ */
+void drop_cop(unsigned long acop, struct mm_struct *mm)
+{
+ int free_pid = COP_PID_NONE;
+
+ if (!cpu_has_feature(CPU_FTR_ICSWX))
+ return;
+
+ if (WARN_ON_ONCE(!mm))
+ return;
+
+ /* We need to make sure mm_users doesn't change */
+ down_read(&mm->mmap_sem);
+ spin_lock(mm->context.cop_lockp);
+
+ mm->context.acop &= ~acop;
+
+ if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) {
+ free_pid = mm->context.cop_pid;
+ mm->context.cop_pid = COP_PID_NONE;
+ }
+
+ sync_cop(mm);
+
+ /*
+ * If this is a threaded process then there might be other threads
+ * running. We need to send an IPI to force them to pick up any
+ * change in PID and ACOP.
+ */
+ if (atomic_read(&mm->mm_users) > 1)
+ smp_call_function(sync_cop, mm, 1);
+
+ if (free_pid != COP_PID_NONE) {
+ spin_lock(&mmu_context_acop_lock);
+ ida_remove(&cop_ida, free_pid);
+ spin_unlock(&mmu_context_acop_lock);
+ }
+
+ spin_unlock(mm->context.cop_lockp);
+ up_read(&mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(drop_cop);
+
+#endif /* CONFIG_PPC_ICSWX */
+
static DEFINE_SPINLOCK(mmu_context_lock);
static DEFINE_IDA(mmu_context_ida);
@@ -31,7 +227,6 @@ static DEFINE_IDA(mmu_context_ida);
* Each segment contains 2^28 bytes. Each context maps 2^44 bytes,
* so we can support 2^19-1 contexts (19 == 35 + 28 - 44).
*/
-#define NO_CONTEXT 0
#define MAX_CONTEXT ((1UL << 19) - 1)
int __init_new_context(void)
@@ -79,6 +274,16 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
slice_set_user_psize(mm, mmu_virtual_psize);
subpage_prot_init_new_context(mm);
mm->context.id = index;
+#ifdef CONFIG_PPC_ICSWX
+ mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+ if (!mm->context.cop_lockp) {
+ __destroy_context(index);
+ subpage_prot_free(mm);
+ mm->context.id = MMU_NO_CONTEXT;
+ return -ENOMEM;
+ }
+ spin_lock_init(mm->context.cop_lockp);
+#endif /* CONFIG_PPC_ICSWX */
return 0;
}
@@ -93,7 +298,12 @@ EXPORT_SYMBOL_GPL(__destroy_context);
void destroy_context(struct mm_struct *mm)
{
+#ifdef CONFIG_PPC_ICSWX
+ drop_cop(mm->context.acop, mm);
+ kfree(mm->context.cop_lockp);
+ mm->context.cop_lockp = NULL;
+#endif /* CONFIG_PPC_ICSWX */
__destroy_context(mm->context.id);
subpage_prot_free(mm);
- mm->context.id = NO_CONTEXT;
+ mm->context.id = MMU_NO_CONTEXT;
}
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index c0aab52da3a..336807de550 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -338,12 +338,14 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
break;
#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
@@ -407,7 +409,17 @@ void __init mmu_context_init(void)
} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
first_context = 1;
last_context = 65535;
- } else {
+ } else
+#ifdef CONFIG_PPC_BOOK3E_MMU
+ if (mmu_has_feature(MMU_FTR_TYPE_3E)) {
+ u32 mmucfg = mfspr(SPRN_MMUCFG);
+ u32 pid_bits = (mmucfg & MMUCFG_PIDSIZE_MASK)
+ >> MMUCFG_PIDSIZE_SHIFT;
+ first_context = 1;
+ last_context = (1UL << (pid_bits + 1)) - 1;
+ } else
+#endif
+ {
first_context = 1;
last_context = 255;
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 5ec1dad2a19..2164006fe17 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -311,14 +311,13 @@ EXPORT_SYMBOL_GPL(of_node_to_nid);
static int __init find_min_common_depth(void)
{
int depth;
- struct device_node *rtas_root;
struct device_node *chosen;
+ struct device_node *root;
const char *vec5;
- rtas_root = of_find_node_by_path("/rtas");
-
- if (!rtas_root)
- return -1;
+ root = of_find_node_by_path("/rtas");
+ if (!root)
+ root = of_find_node_by_path("/");
/*
* This property is a set of 32-bit integers, each representing
@@ -332,7 +331,7 @@ static int __init find_min_common_depth(void)
* NUMA boundary and the following are progressively less significant
* boundaries. There can be more than one level of NUMA.
*/
- distance_ref_points = of_get_property(rtas_root,
+ distance_ref_points = of_get_property(root,
"ibm,associativity-reference-points",
&distance_ref_points_depth);
@@ -376,11 +375,11 @@ static int __init find_min_common_depth(void)
distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
}
- of_node_put(rtas_root);
+ of_node_put(root);
return depth;
err:
- of_node_put(rtas_root);
+ of_node_put(root);
return -1;
}
@@ -1453,7 +1452,7 @@ int arch_update_cpu_topology(void)
unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
struct sys_device *sysdev;
- for_each_cpu_mask(cpu, cpu_associativity_changes_mask) {
+ for_each_cpu(cpu,&cpu_associativity_changes_mask) {
vphn_get_associativity(cpu, associativity);
nid = associativity_to_nid(associativity);
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 8dc41c0157f..51f87956f8f 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -133,7 +133,15 @@ ioremap(phys_addr_t addr, unsigned long size)
EXPORT_SYMBOL(ioremap);
void __iomem *
-ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
+ioremap_wc(phys_addr_t addr, unsigned long size)
+{
+ return __ioremap_caller(addr, size, _PAGE_NO_CACHE,
+ __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iomem *
+ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
{
/* writeable implies dirty for kernel addresses */
if (flags & _PAGE_RW)
@@ -152,7 +160,7 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
}
-EXPORT_SYMBOL(ioremap_flags);
+EXPORT_SYMBOL(ioremap_prot);
void __iomem *
__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 88927a05cdc..6e595f6496d 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -255,7 +255,17 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size)
return __ioremap_caller(addr, size, flags, caller);
}
-void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size,
+void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
+{
+ unsigned long flags = _PAGE_NO_CACHE;
+ void *caller = __builtin_return_address(0);
+
+ if (ppc_md.ioremap)
+ return ppc_md.ioremap(addr, size, flags, caller);
+ return __ioremap_caller(addr, size, flags, caller);
+}
+
+void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
unsigned long flags)
{
void *caller = __builtin_return_address(0);
@@ -311,7 +321,8 @@ void iounmap(volatile void __iomem *token)
}
EXPORT_SYMBOL(ioremap);
-EXPORT_SYMBOL(ioremap_flags);
+EXPORT_SYMBOL(ioremap_wc);
+EXPORT_SYMBOL(ioremap_prot);
EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(__ioremap_at);
EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 1d98ecc8eec..e22276cb67a 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -24,6 +24,7 @@
#include <asm/firmware.h>
#include <linux/compiler.h>
#include <asm/udbg.h>
+#include <asm/code-patching.h>
extern void slb_allocate_realmode(unsigned long ea);
@@ -166,7 +167,7 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2)
int esid_1t_count;
/* System is not 1T segment size capable. */
- if (!cpu_has_feature(CPU_FTR_1T_SEGMENT))
+ if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
return (GET_ESID(addr1) == GET_ESID(addr2));
esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) +
@@ -201,7 +202,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
*/
hard_irq_disable();
offset = get_paca()->slb_cache_ptr;
- if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) &&
+ if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
offset <= SLB_CACHE_ENTRIES) {
int i;
asm volatile("isync" : : : "memory");
@@ -249,9 +250,8 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
static inline void patch_slb_encoding(unsigned int *insn_addr,
unsigned int immed)
{
- *insn_addr = (*insn_addr & 0xffff0000) | immed;
- flush_icache_range((unsigned long)insn_addr, 4+
- (unsigned long)insn_addr);
+ int insn = (*insn_addr & 0xffff0000) | immed;
+ patch_instruction(insn_addr, insn);
}
void slb_set_size(u16 size)
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 95ce3558169..ef653dc95b6 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -58,7 +58,7 @@ _GLOBAL(slb_miss_kernel_load_linear)
li r11,0
BEGIN_FTR_SECTION
b slb_finish_load
-END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
b slb_finish_load_1T
1:
@@ -87,7 +87,7 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
6:
BEGIN_FTR_SECTION
b slb_finish_load
-END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
b slb_finish_load_1T
0: /* user address: proto-VSID = context << 15 | ESID. First check
@@ -138,11 +138,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
ld r9,PACACONTEXTID(r13)
BEGIN_FTR_SECTION
cmpldi r10,0x1000
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
rldimi r10,r9,USER_ESID_BITS,0
BEGIN_FTR_SECTION
bge slb_finish_load_1T
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
b slb_finish_load
8: /* invalid EA */
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 446a01842a7..41e31642a86 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -243,7 +243,7 @@ void __init stabs_alloc(void)
{
int cpu;
- if (cpu_has_feature(CPU_FTR_SLB))
+ if (mmu_has_feature(MMU_FTR_SLB))
return;
for_each_possible_cpu(cpu) {
diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c
index aa46e9d1e77..19395f18b1d 100644
--- a/arch/powerpc/platforms/44x/iss4xx.c
+++ b/arch/powerpc/platforms/44x/iss4xx.c
@@ -87,7 +87,7 @@ static void __cpuinit smp_iss4xx_setup_cpu(int cpu)
mpic_setup_this_cpu();
}
-static void __cpuinit smp_iss4xx_kick_cpu(int cpu)
+static int __cpuinit smp_iss4xx_kick_cpu(int cpu)
{
struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
const u64 *spin_table_addr_prop;
@@ -104,7 +104,7 @@ static void __cpuinit smp_iss4xx_kick_cpu(int cpu)
NULL);
if (spin_table_addr_prop == NULL) {
pr_err("CPU%d: Can't start, missing cpu-release-addr !\n", cpu);
- return;
+ return -ENOENT;
}
/* Assume it's mapped as part of the linear mapping. This is a bit
@@ -117,6 +117,8 @@ static void __cpuinit smp_iss4xx_kick_cpu(int cpu)
smp_wmb();
spin_table[1] = __pa(start_secondary_47x);
mb();
+
+ return 0;
}
static struct smp_ops_t iss_smp_ops = {
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index cfc4b200998..9f09319352c 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -61,7 +61,7 @@ irq_to_pic_bit(unsigned int irq)
static void
cpld_mask_irq(struct irq_data *d)
{
- unsigned int cpld_irq = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
out_8(pic_mask,
@@ -71,7 +71,7 @@ cpld_mask_irq(struct irq_data *d)
static void
cpld_unmask_irq(struct irq_data *d)
{
- unsigned int cpld_irq = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
out_8(pic_mask,
@@ -97,7 +97,7 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
status |= (ignore | mask);
if (status == 0xff)
- return NO_IRQ_IGNORE;
+ return NO_IRQ;
cpld_irq = ffz(status) + offset;
@@ -109,14 +109,14 @@ cpld_pic_cascade(unsigned int irq, struct irq_desc *desc)
{
irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
&cpld_regs->pci_mask);
- if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) {
+ if (irq != NO_IRQ) {
generic_handle_irq(irq);
return;
}
irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
&cpld_regs->misc_mask);
- if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) {
+ if (irq != NO_IRQ) {
generic_handle_irq(irq);
return;
}
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index 57a6a349e93..96f85e5e0cd 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -56,7 +56,7 @@ static void media5200_irq_unmask(struct irq_data *d)
spin_lock_irqsave(&media5200_irq.lock, flags);
val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
- val |= 1 << (MEDIA5200_IRQ_SHIFT + irq_map[d->irq].hwirq);
+ val |= 1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d));
out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
spin_unlock_irqrestore(&media5200_irq.lock, flags);
}
@@ -68,7 +68,7 @@ static void media5200_irq_mask(struct irq_data *d)
spin_lock_irqsave(&media5200_irq.lock, flags);
val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
- val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irq_map[d->irq].hwirq));
+ val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d)));
out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
spin_unlock_irqrestore(&media5200_irq.lock, flags);
}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
index 1dd15400f6f..1a9a4957057 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -157,48 +157,30 @@ static inline void io_be_clrbit(u32 __iomem *addr, int bitno)
*/
static void mpc52xx_extirq_mask(struct irq_data *d)
{
- int irq;
- int l2irq;
-
- irq = irq_map[d->irq].hwirq;
- l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
io_be_clrbit(&intr->ctrl, 11 - l2irq);
}
static void mpc52xx_extirq_unmask(struct irq_data *d)
{
- int irq;
- int l2irq;
-
- irq = irq_map[d->irq].hwirq;
- l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
io_be_setbit(&intr->ctrl, 11 - l2irq);
}
static void mpc52xx_extirq_ack(struct irq_data *d)
{
- int irq;
- int l2irq;
-
- irq = irq_map[d->irq].hwirq;
- l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
io_be_setbit(&intr->ctrl, 27-l2irq);
}
static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type)
{
u32 ctrl_reg, type;
- int irq;
- int l2irq;
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
void *handler = handle_level_irq;
- irq = irq_map[d->irq].hwirq;
- l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
- pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__, irq, l2irq, flow_type);
+ pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__,
+ (int) irqd_to_hwirq(d), l2irq, flow_type);
switch (flow_type) {
case IRQF_TRIGGER_HIGH: type = 0; break;
@@ -237,23 +219,13 @@ static int mpc52xx_null_set_type(struct irq_data *d, unsigned int flow_type)
static void mpc52xx_main_mask(struct irq_data *d)
{
- int irq;
- int l2irq;
-
- irq = irq_map[d->irq].hwirq;
- l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
io_be_setbit(&intr->main_mask, 16 - l2irq);
}
static void mpc52xx_main_unmask(struct irq_data *d)
{
- int irq;
- int l2irq;
-
- irq = irq_map[d->irq].hwirq;
- l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
io_be_clrbit(&intr->main_mask, 16 - l2irq);
}
@@ -270,23 +242,13 @@ static struct irq_chip mpc52xx_main_irqchip = {
*/
static void mpc52xx_periph_mask(struct irq_data *d)
{
- int irq;
- int l2irq;
-
- irq = irq_map[d->irq].hwirq;
- l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
io_be_setbit(&intr->per_mask, 31 - l2irq);
}
static void mpc52xx_periph_unmask(struct irq_data *d)
{
- int irq;
- int l2irq;
-
- irq = irq_map[d->irq].hwirq;
- l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
io_be_clrbit(&intr->per_mask, 31 - l2irq);
}
@@ -303,34 +265,19 @@ static struct irq_chip mpc52xx_periph_irqchip = {
*/
static void mpc52xx_sdma_mask(struct irq_data *d)
{
- int irq;
- int l2irq;
-
- irq = irq_map[d->irq].hwirq;
- l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
io_be_setbit(&sdma->IntMask, l2irq);
}
static void mpc52xx_sdma_unmask(struct irq_data *d)
{
- int irq;
- int l2irq;
-
- irq = irq_map[d->irq].hwirq;
- l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
io_be_clrbit(&sdma->IntMask, l2irq);
}
static void mpc52xx_sdma_ack(struct irq_data *d)
{
- int irq;
- int l2irq;
-
- irq = irq_map[d->irq].hwirq;
- l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
out_be32(&sdma->IntPend, 1 << l2irq);
}
@@ -539,7 +486,7 @@ void __init mpc52xx_init_irq(void)
unsigned int mpc52xx_get_irq(void)
{
u32 status;
- int irq = NO_IRQ_IGNORE;
+ int irq;
status = in_be32(&intr->enc_status);
if (status & 0x00000400) { /* critical */
@@ -562,6 +509,8 @@ unsigned int mpc52xx_get_irq(void)
} else {
irq |= (MPC52xx_IRQ_L1_PERP << MPC52xx_IRQ_L1_OFFSET);
}
+ } else {
+ return NO_IRQ;
}
return irq_linear_revmap(mpc52xx_irqhost, irq);
diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
index 4a4eb6ffa12..8ccf9ed62fe 100644
--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
@@ -42,7 +42,7 @@ struct pq2ads_pci_pic {
static void pq2ads_pci_mask_irq(struct irq_data *d)
{
struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d);
- int irq = NUM_IRQS - virq_to_hw(d->irq) - 1;
+ int irq = NUM_IRQS - irqd_to_hwirq(d) - 1;
if (irq != -1) {
unsigned long flags;
@@ -58,7 +58,7 @@ static void pq2ads_pci_mask_irq(struct irq_data *d)
static void pq2ads_pci_unmask_irq(struct irq_data *d)
{
struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d);
- int irq = NUM_IRQS - virq_to_hw(d->irq) - 1;
+ int irq = NUM_IRQS - irqd_to_hwirq(d) - 1;
if (irq != -1) {
unsigned long flags;
@@ -112,16 +112,8 @@ static int pci_pic_host_map(struct irq_host *h, unsigned int virq,
return 0;
}
-static void pci_host_unmap(struct irq_host *h, unsigned int virq)
-{
- /* remove chip and handler */
- irq_set_chip_data(virq, NULL);
- irq_set_chip(virq, NULL);
-}
-
static struct irq_host_ops pci_pic_host_ops = {
.map = pci_pic_host_map,
- .unmap = pci_host_unmap,
};
int __init pq2ads_pci_init_irq(void)
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 0d00ff9d05a..d6a93a10c0f 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -41,7 +41,7 @@ extern void __early_start(void);
#define NUM_BOOT_ENTRY 8
#define SIZE_BOOT_ENTRY (NUM_BOOT_ENTRY * sizeof(u32))
-static void __init
+static int __init
smp_85xx_kick_cpu(int nr)
{
unsigned long flags;
@@ -60,7 +60,7 @@ smp_85xx_kick_cpu(int nr)
if (cpu_rel_addr == NULL) {
printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr);
- return;
+ return -ENOENT;
}
/*
@@ -107,6 +107,8 @@ smp_85xx_kick_cpu(int nr)
iounmap(bptr_vaddr);
pr_debug("waited %d msecs for CPU #%d.\n", n, nr);
+
+ return 0;
}
static void __init
@@ -233,8 +235,10 @@ void __init mpc85xx_smp_init(void)
smp_85xx_ops.message_pass = smp_mpic_message_pass;
}
- if (cpu_has_feature(CPU_FTR_DBELL))
- smp_85xx_ops.message_pass = doorbell_message_pass;
+ if (cpu_has_feature(CPU_FTR_DBELL)) {
+ smp_85xx_ops.message_pass = smp_muxed_ipi_message_pass;
+ smp_85xx_ops.cause_ipi = doorbell_cause_ipi;
+ }
BUG_ON(!smp_85xx_ops.message_pass);
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index db864623b4a..12cb9bb2cc6 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -48,8 +48,6 @@ static struct socrates_fpga_irq_info fpga_irqs[SOCRATES_FPGA_NUM_IRQS] = {
[8] = {0, IRQ_TYPE_LEVEL_HIGH},
};
-#define socrates_fpga_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq)
-
static DEFINE_RAW_SPINLOCK(socrates_fpga_pic_lock);
static void __iomem *socrates_fpga_pic_iobase;
@@ -110,11 +108,9 @@ void socrates_fpga_pic_cascade(unsigned int irq, struct irq_desc *desc)
static void socrates_fpga_pic_ack(struct irq_data *d)
{
unsigned long flags;
- unsigned int hwirq, irq_line;
+ unsigned int irq_line, hwirq = irqd_to_hwirq(d);
uint32_t mask;
- hwirq = socrates_fpga_irq_to_hw(d->irq);
-
irq_line = fpga_irqs[hwirq].irq_line;
raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -127,12 +123,10 @@ static void socrates_fpga_pic_ack(struct irq_data *d)
static void socrates_fpga_pic_mask(struct irq_data *d)
{
unsigned long flags;
- unsigned int hwirq;
+ unsigned int hwirq = irqd_to_hwirq(d);
int irq_line;
u32 mask;
- hwirq = socrates_fpga_irq_to_hw(d->irq);
-
irq_line = fpga_irqs[hwirq].irq_line;
raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -145,12 +139,10 @@ static void socrates_fpga_pic_mask(struct irq_data *d)
static void socrates_fpga_pic_mask_ack(struct irq_data *d)
{
unsigned long flags;
- unsigned int hwirq;
+ unsigned int hwirq = irqd_to_hwirq(d);
int irq_line;
u32 mask;
- hwirq = socrates_fpga_irq_to_hw(d->irq);
-
irq_line = fpga_irqs[hwirq].irq_line;
raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -164,12 +156,10 @@ static void socrates_fpga_pic_mask_ack(struct irq_data *d)
static void socrates_fpga_pic_unmask(struct irq_data *d)
{
unsigned long flags;
- unsigned int hwirq;
+ unsigned int hwirq = irqd_to_hwirq(d);
int irq_line;
u32 mask;
- hwirq = socrates_fpga_irq_to_hw(d->irq);
-
irq_line = fpga_irqs[hwirq].irq_line;
raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -182,12 +172,10 @@ static void socrates_fpga_pic_unmask(struct irq_data *d)
static void socrates_fpga_pic_eoi(struct irq_data *d)
{
unsigned long flags;
- unsigned int hwirq;
+ unsigned int hwirq = irqd_to_hwirq(d);
int irq_line;
u32 mask;
- hwirq = socrates_fpga_irq_to_hw(d->irq);
-
irq_line = fpga_irqs[hwirq].irq_line;
raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -201,12 +189,10 @@ static int socrates_fpga_pic_set_type(struct irq_data *d,
unsigned int flow_type)
{
unsigned long flags;
- unsigned int hwirq;
+ unsigned int hwirq = irqd_to_hwirq(d);
int polarity;
u32 mask;
- hwirq = socrates_fpga_irq_to_hw(d->irq);
-
if (fpga_irqs[hwirq].type != IRQ_TYPE_NONE)
return -EINVAL;
diff --git a/arch/powerpc/platforms/86xx/gef_pic.c b/arch/powerpc/platforms/86xx/gef_pic.c
index 0beec7d5566..94594e58594 100644
--- a/arch/powerpc/platforms/86xx/gef_pic.c
+++ b/arch/powerpc/platforms/86xx/gef_pic.c
@@ -46,8 +46,6 @@
#define GEF_PIC_CPU0_MCP_MASK GEF_PIC_MCP_MASK(0)
#define GEF_PIC_CPU1_MCP_MASK GEF_PIC_MCP_MASK(1)
-#define gef_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq)
-
static DEFINE_RAW_SPINLOCK(gef_pic_lock);
@@ -113,11 +111,9 @@ void gef_pic_cascade(unsigned int irq, struct irq_desc *desc)
static void gef_pic_mask(struct irq_data *d)
{
unsigned long flags;
- unsigned int hwirq;
+ unsigned int hwirq = irqd_to_hwirq(d);
u32 mask;
- hwirq = gef_irq_to_hw(d->irq);
-
raw_spin_lock_irqsave(&gef_pic_lock, flags);
mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
mask &= ~(1 << hwirq);
@@ -136,11 +132,9 @@ static void gef_pic_mask_ack(struct irq_data *d)
static void gef_pic_unmask(struct irq_data *d)
{
unsigned long flags;
- unsigned int hwirq;
+ unsigned int hwirq = irqd_to_hwirq(d);
u32 mask;
- hwirq = gef_irq_to_hw(d->irq);
-
raw_spin_lock_irqsave(&gef_pic_lock, flags);
mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
mask |= (1 << hwirq);
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 018cc67be42..a896511690c 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -66,7 +66,7 @@ static void __init mpc8610_suspend_init(void)
return;
}
- ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9/wakeup", NULL);
+ ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9:wakeup", NULL);
if (ret) {
pr_err("%s: can't request pixis event IRQ: %d\n",
__func__, ret);
@@ -105,45 +105,77 @@ machine_device_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices);
#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
-static u32 get_busfreq(void)
-{
- struct device_node *node;
-
- u32 fs_busfreq = 0;
- node = of_find_node_by_type(NULL, "cpu");
- if (node) {
- unsigned int size;
- const unsigned int *prop =
- of_get_property(node, "bus-frequency", &size);
- if (prop)
- fs_busfreq = *prop;
- of_node_put(node);
- };
- return fs_busfreq;
-}
+/*
+ * DIU Area Descriptor
+ *
+ * The MPC8610 reference manual shows the bits of the AD register in
+ * little-endian order, which causes the BLUE_C field to be split into two
+ * parts. To simplify the definition of the MAKE_AD() macro, we define the
+ * fields in big-endian order and byte-swap the result.
+ *
+ * So even though the registers don't look like they're in the
+ * same bit positions as they are on the P1022, the same value is written to
+ * the AD register on the MPC8610 and on the P1022.
+ */
+#define AD_BYTE_F 0x10000000
+#define AD_ALPHA_C_MASK 0x0E000000
+#define AD_ALPHA_C_SHIFT 25
+#define AD_BLUE_C_MASK 0x01800000
+#define AD_BLUE_C_SHIFT 23
+#define AD_GREEN_C_MASK 0x00600000
+#define AD_GREEN_C_SHIFT 21
+#define AD_RED_C_MASK 0x00180000
+#define AD_RED_C_SHIFT 19
+#define AD_PALETTE 0x00040000
+#define AD_PIXEL_S_MASK 0x00030000
+#define AD_PIXEL_S_SHIFT 16
+#define AD_COMP_3_MASK 0x0000F000
+#define AD_COMP_3_SHIFT 12
+#define AD_COMP_2_MASK 0x00000F00
+#define AD_COMP_2_SHIFT 8
+#define AD_COMP_1_MASK 0x000000F0
+#define AD_COMP_1_SHIFT 4
+#define AD_COMP_0_MASK 0x0000000F
+#define AD_COMP_0_SHIFT 0
+
+#define MAKE_AD(alpha, red, blue, green, size, c0, c1, c2, c3) \
+ cpu_to_le32(AD_BYTE_F | (alpha << AD_ALPHA_C_SHIFT) | \
+ (blue << AD_BLUE_C_SHIFT) | (green << AD_GREEN_C_SHIFT) | \
+ (red << AD_RED_C_SHIFT) | (c3 << AD_COMP_3_SHIFT) | \
+ (c2 << AD_COMP_2_SHIFT) | (c1 << AD_COMP_1_SHIFT) | \
+ (c0 << AD_COMP_0_SHIFT) | (size << AD_PIXEL_S_SHIFT))
unsigned int mpc8610hpcd_get_pixel_format(unsigned int bits_per_pixel,
int monitor_port)
{
static const unsigned long pixelformat[][3] = {
- {0x88882317, 0x88083218, 0x65052119},
- {0x88883316, 0x88082219, 0x65053118},
+ {
+ MAKE_AD(3, 0, 2, 1, 3, 8, 8, 8, 8),
+ MAKE_AD(4, 2, 0, 1, 2, 8, 8, 8, 0),
+ MAKE_AD(4, 0, 2, 1, 1, 5, 6, 5, 0)
+ },
+ {
+ MAKE_AD(3, 2, 0, 1, 3, 8, 8, 8, 8),
+ MAKE_AD(4, 0, 2, 1, 2, 8, 8, 8, 0),
+ MAKE_AD(4, 2, 0, 1, 1, 5, 6, 5, 0)
+ },
};
- unsigned int pix_fmt, arch_monitor;
+ unsigned int arch_monitor;
+ /* The DVI port is mis-wired on revision 1 of this board. */
arch_monitor = ((*pixis_arch == 0x01) && (monitor_port == 0))? 0 : 1;
- /* DVI port for board version 0x01 */
-
- if (bits_per_pixel == 32)
- pix_fmt = pixelformat[arch_monitor][0];
- else if (bits_per_pixel == 24)
- pix_fmt = pixelformat[arch_monitor][1];
- else if (bits_per_pixel == 16)
- pix_fmt = pixelformat[arch_monitor][2];
- else
- pix_fmt = pixelformat[1][0];
-
- return pix_fmt;
+
+ switch (bits_per_pixel) {
+ case 32:
+ return pixelformat[arch_monitor][0];
+ case 24:
+ return pixelformat[arch_monitor][1];
+ case 16:
+ return pixelformat[arch_monitor][2];
+ default:
+ pr_err("fsl-diu: unsupported pixel depth %u\n", bits_per_pixel);
+ return 0;
+ }
}
void mpc8610hpcd_set_gamma_table(int monitor_port, char *gamma_table_base)
@@ -190,8 +222,7 @@ void mpc8610hpcd_set_pixel_clock(unsigned int pixclock)
}
/* Pixel Clock configuration */
- pr_debug("DIU: Bus Frequency = %d\n", get_busfreq());
- speed_ccb = get_busfreq();
+ speed_ccb = fsl_get_sys_freq();
/* Calculate the pixel clock with the smallest error */
/* calculate the following in steps to avoid overflow */
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
index eacea0e3fcc..af09baee22c 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -56,7 +56,7 @@ smp_86xx_release_core(int nr)
}
-static void __init
+static int __init
smp_86xx_kick_cpu(int nr)
{
unsigned int save_vector;
@@ -65,7 +65,7 @@ smp_86xx_kick_cpu(int nr)
unsigned int *vector = (unsigned int *)(KERNELBASE + 0x100);
if (nr < 0 || nr >= NR_CPUS)
- return;
+ return -ENOENT;
pr_debug("smp_86xx_kick_cpu: kick CPU #%d\n", nr);
@@ -92,6 +92,8 @@ smp_86xx_kick_cpu(int nr)
local_irq_restore(flags);
pr_debug("wait CPU #%d for %d msecs.\n", nr, n);
+
+ return 0;
}
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 9ecce995dd4..1e121088826 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -150,7 +150,7 @@ void __init mpc8xx_calibrate_decr(void)
*/
cpu = of_find_node_by_type(NULL, "cpu");
virq= irq_of_parse_and_map(cpu, 0);
- irq = irq_map[virq].hwirq;
+ irq = virq_to_hw(virq);
sys_tmr2 = immr_map(im_sit);
out_be16(&sys_tmr2->sit_tbscr, ((1 << (7 - (irq/2))) << 8) |
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index f7b07720aa3..f970ca2b180 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -20,6 +20,7 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig"
source "arch/powerpc/platforms/44x/Kconfig"
source "arch/powerpc/platforms/40x/Kconfig"
source "arch/powerpc/platforms/amigaone/Kconfig"
+source "arch/powerpc/platforms/wsp/Kconfig"
config KVM_GUEST
bool "KVM Guest support"
@@ -56,16 +57,19 @@ config UDBG_RTAS_CONSOLE
depends on PPC_RTAS
default n
+config PPC_SMP_MUXED_IPI
+ bool
+ help
+ Select this opton if your platform supports SMP and your
+ interrupt controller provides less than 4 interrupts to each
+ cpu. This will enable the generic code to multiplex the 4
+ messages on to one ipi.
+
config PPC_UDBG_BEAT
bool "BEAT based debug console"
depends on PPC_CELLEB
default n
-config XICS
- depends on PPC_PSERIES
- bool
- default y
-
config IPIC
bool
default n
@@ -147,14 +151,27 @@ config PPC_970_NAP
bool
default n
+config PPC_P7_NAP
+ bool
+ default n
+
config PPC_INDIRECT_IO
bool
select GENERIC_IOMAP
- default n
+
+config PPC_INDIRECT_PIO
+ bool
+ select PPC_INDIRECT_IO
+
+config PPC_INDIRECT_MMIO
+ bool
+ select PPC_INDIRECT_IO
+
+config PPC_IO_WORKAROUNDS
+ bool
config GENERIC_IOMAP
bool
- default n
source "drivers/cpufreq/Kconfig"
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 111138c55f9..2165b65876f 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -73,6 +73,7 @@ config PPC_BOOK3S_64
config PPC_BOOK3E_64
bool "Embedded processors"
select PPC_FPU # Make it a choice ?
+ select PPC_SMP_MUXED_IPI
endchoice
@@ -107,6 +108,10 @@ config POWER4
depends on PPC64 && PPC_BOOK3S
def_bool y
+config PPC_A2
+ bool
+ depends on PPC_BOOK3E_64
+
config TUNE_CELL
bool "Optimize for Cell Broadband Engine"
depends on PPC64 && PPC_BOOK3S
@@ -174,6 +179,7 @@ config FSL_BOOKE
config PPC_FSL_BOOK3E
bool
select FSL_EMB_PERFMON
+ select PPC_SMP_MUXED_IPI
default y if FSL_BOOKE
config PTE_64BIT
@@ -226,6 +232,24 @@ config VSX
If in doubt, say Y here.
+config PPC_ICSWX
+ bool "Support for PowerPC icswx coprocessor instruction"
+ depends on POWER4
+ default n
+ ---help---
+
+ This option enables kernel support for the PowerPC Initiate
+ Coprocessor Store Word (icswx) coprocessor instruction on POWER7
+ or newer processors.
+
+ This option is only useful if you have a processor that supports
+ the icswx coprocessor instruction. It does not have any effect
+ on processors without the icswx coprocessor instruction.
+
+ This option slightly increases kernel memory usage.
+
+ If in doubt, say N here.
+
config SPE
bool "SPE Support"
depends on E200 || (E500 && !PPC_E500MC)
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index fdb9f0b0d7a..73e2116cfee 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -22,3 +22,4 @@ obj-$(CONFIG_PPC_CELL) += cell/
obj-$(CONFIG_PPC_PS3) += ps3/
obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/
obj-$(CONFIG_AMIGAONE) += amigaone/
+obj-$(CONFIG_PPC_WSP) += wsp/
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 81239ebed83..67d5009b4e8 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -6,7 +6,8 @@ config PPC_CELL_COMMON
bool
select PPC_CELL
select PPC_DCR_MMIO
- select PPC_INDIRECT_IO
+ select PPC_INDIRECT_PIO
+ select PPC_INDIRECT_MMIO
select PPC_NATIVE
select PPC_RTAS
select IRQ_EDGE_EOI_HANDLER
@@ -15,6 +16,7 @@ config PPC_CELL_NATIVE
bool
select PPC_CELL_COMMON
select MPIC
+ select PPC_IO_WORKAROUNDS
select IBM_NEW_EMAC_EMAC4
select IBM_NEW_EMAC_RGMII
select IBM_NEW_EMAC_ZMII #test only
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index 83fafe92264..a4a89350bcf 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -1,7 +1,7 @@
obj-$(CONFIG_PPC_CELL_COMMON) += cbe_regs.o interrupt.o pervasive.o
obj-$(CONFIG_PPC_CELL_NATIVE) += iommu.o setup.o spider-pic.o \
- pmu.o io-workarounds.o spider-pci.o
+ pmu.o spider-pci.o
obj-$(CONFIG_CBE_RAS) += ras.o
obj-$(CONFIG_CBE_THERM) += cbe_thermal.o
@@ -39,11 +39,10 @@ obj-y += celleb_setup.o \
celleb_pci.o celleb_scc_epci.o \
celleb_scc_pciex.o \
celleb_scc_uhc.o \
- io-workarounds.o spider-pci.o \
- beat.o beat_htab.o beat_hvCall.o \
- beat_interrupt.o beat_iommu.o
+ spider-pci.o beat.o beat_htab.o \
+ beat_hvCall.o beat_interrupt.o \
+ beat_iommu.o
-obj-$(CONFIG_SMP) += beat_smp.o
obj-$(CONFIG_PPC_UDBG_BEAT) += beat_udbg.o
obj-$(CONFIG_SERIAL_TXX9) += celleb_scc_sio.o
obj-$(CONFIG_SPU_BASE) += beat_spu_priv1.o
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index bb5ebf8fa80..ac06903e136 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -113,7 +113,7 @@ static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
pr_devel("axon_msi: woff %x roff %x msi %x\n",
write_offset, msic->read_offset, msi);
- if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) {
+ if (msi < NR_IRQS && irq_get_chip_data(msi) == msic) {
generic_handle_irq(msi);
msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
} else {
@@ -320,6 +320,7 @@ static struct irq_chip msic_irq_chip = {
static int msic_host_map(struct irq_host *h, unsigned int virq,
irq_hw_number_t hw)
{
+ irq_set_chip_data(virq, h->host_data);
irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
return 0;
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
index 4cb9e147c30..55015e1f693 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.c
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -148,16 +148,6 @@ static int beatic_pic_host_map(struct irq_host *h, unsigned int virq,
}
/*
- * Update binding hardware IRQ number (hw) and Virtuql
- * IRQ number (virq). This is called only once for a given mapping.
- */
-static void beatic_pic_host_remap(struct irq_host *h, unsigned int virq,
- irq_hw_number_t hw)
-{
- beat_construct_and_connect_irq_plug(virq, hw);
-}
-
-/*
* Translate device-tree interrupt spec to irq_hw_number_t style (ulong),
* to pass away to irq_create_mapping().
*
@@ -184,7 +174,6 @@ static int beatic_pic_host_match(struct irq_host *h, struct device_node *np)
static struct irq_host_ops beatic_pic_host_ops = {
.map = beatic_pic_host_map,
- .remap = beatic_pic_host_remap,
.unmap = beatic_pic_host_unmap,
.xlate = beatic_pic_host_xlate,
.match = beatic_pic_host_match,
@@ -257,22 +246,6 @@ void __init beatic_init_IRQ(void)
irq_set_default_host(beatic_host);
}
-#ifdef CONFIG_SMP
-
-/* Nullified to compile with SMP mode */
-void beatic_setup_cpu(int cpu)
-{
-}
-
-void beatic_cause_IPI(int cpu, int mesg)
-{
-}
-
-void beatic_request_IPIs(void)
-{
-}
-#endif /* CONFIG_SMP */
-
void beatic_deinit_IRQ(void)
{
int i;
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h
index b470fd0051f..a7e52f91a07 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.h
+++ b/arch/powerpc/platforms/cell/beat_interrupt.h
@@ -24,9 +24,6 @@
extern void beatic_init_IRQ(void);
extern unsigned int beatic_get_irq(void);
-extern void beatic_cause_IPI(int cpu, int mesg);
-extern void beatic_request_IPIs(void);
-extern void beatic_setup_cpu(int);
extern void beatic_deinit_IRQ(void);
#endif
diff --git a/arch/powerpc/platforms/cell/beat_smp.c b/arch/powerpc/platforms/cell/beat_smp.c
deleted file mode 100644
index 26efc204c47..00000000000
--- a/arch/powerpc/platforms/cell/beat_smp.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * SMP support for Celleb platform. (Incomplete)
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/cell/smp.c:
- * Dave Engebretsen, Peter Bergner, and
- * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
- * Plus various changes from other IBM teams...
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/threads.h>
-#include <linux/cpu.h>
-
-#include <asm/irq.h>
-#include <asm/smp.h>
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-
-#include "beat_interrupt.h"
-
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-/*
- * The primary thread of each non-boot processor is recorded here before
- * smp init.
- */
-/* static cpumask_t of_spin_map; */
-
-/**
- * smp_startup_cpu() - start the given cpu
- *
- * At boot time, there is nothing to do for primary threads which were
- * started from Open Firmware. For anything else, call RTAS with the
- * appropriate start location.
- *
- * Returns:
- * 0 - failure
- * 1 - success
- */
-static inline int __devinit smp_startup_cpu(unsigned int lcpu)
-{
- return 0;
-}
-
-static void smp_beatic_message_pass(int target, int msg)
-{
- unsigned int i;
-
- if (target < NR_CPUS) {
- beatic_cause_IPI(target, msg);
- } else {
- for_each_online_cpu(i) {
- if (target == MSG_ALL_BUT_SELF
- && i == smp_processor_id())
- continue;
- beatic_cause_IPI(i, msg);
- }
- }
-}
-
-static int __init smp_beatic_probe(void)
-{
- return cpus_weight(cpu_possible_map);
-}
-
-static void __devinit smp_beatic_setup_cpu(int cpu)
-{
- beatic_setup_cpu(cpu);
-}
-
-static void __devinit smp_celleb_kick_cpu(int nr)
-{
- BUG_ON(nr < 0 || nr >= NR_CPUS);
-
- if (!smp_startup_cpu(nr))
- return;
-}
-
-static int smp_celleb_cpu_bootable(unsigned int nr)
-{
- return 1;
-}
-static struct smp_ops_t bpa_beatic_smp_ops = {
- .message_pass = smp_beatic_message_pass,
- .probe = smp_beatic_probe,
- .kick_cpu = smp_celleb_kick_cpu,
- .setup_cpu = smp_beatic_setup_cpu,
- .cpu_bootable = smp_celleb_cpu_bootable,
-};
-
-/* This is called very early */
-void __init smp_init_celleb(void)
-{
- DBG(" -> smp_init_celleb()\n");
-
- smp_ops = &bpa_beatic_smp_ops;
-
- DBG(" <- smp_init_celleb()\n");
-}
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index dbc338f187a..f3917e7a5b4 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -45,8 +45,8 @@ static struct cbe_thread_map
unsigned int cbe_id;
} cbe_thread_map[NR_CPUS];
-static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = CPU_MASK_NONE };
-static cpumask_t cbe_first_online_cpu = CPU_MASK_NONE;
+static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} };
+static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE };
static struct cbe_regs_map *cbe_find_map(struct device_node *np)
{
@@ -159,7 +159,8 @@ EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
u32 cbe_node_to_cpu(int node)
{
- return find_first_bit( (unsigned long *) &cbe_local_mask[node], sizeof(cpumask_t));
+ return cpumask_first(&cbe_local_mask[node]);
+
}
EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
@@ -268,9 +269,9 @@ void __init cbe_regs_init(void)
thread->regs = map;
thread->cbe_id = cbe_id;
map->be_node = thread->be_node;
- cpu_set(i, cbe_local_mask[cbe_id]);
+ cpumask_set_cpu(i, &cbe_local_mask[cbe_id]);
if(thread->thread_id == 0)
- cpu_set(i, cbe_first_online_cpu);
+ cpumask_set_cpu(i, &cbe_first_online_cpu);
}
}
diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
index 404d1fc04d5..5822141aa63 100644
--- a/arch/powerpc/platforms/cell/celleb_pci.c
+++ b/arch/powerpc/platforms/cell/celleb_pci.c
@@ -41,7 +41,6 @@
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
-#include "io-workarounds.h"
#include "celleb_pci.h"
#define MAX_PCI_DEVICES 32
@@ -320,7 +319,7 @@ static int __init celleb_setup_fake_pci_device(struct device_node *node,
size = 256;
config = &private->fake_config[devno][fn];
- *config = alloc_maybe_bootmem(size, GFP_KERNEL);
+ *config = zalloc_maybe_bootmem(size, GFP_KERNEL);
if (*config == NULL) {
printk(KERN_ERR "PCI: "
"not enough memory for fake configuration space\n");
@@ -331,7 +330,7 @@ static int __init celleb_setup_fake_pci_device(struct device_node *node,
size = sizeof(struct celleb_pci_resource);
res = &private->res[devno][fn];
- *res = alloc_maybe_bootmem(size, GFP_KERNEL);
+ *res = zalloc_maybe_bootmem(size, GFP_KERNEL);
if (*res == NULL) {
printk(KERN_ERR
"PCI: not enough memory for resource data space\n");
@@ -432,7 +431,7 @@ static int __init phb_set_bus_ranges(struct device_node *dev,
static void __init celleb_alloc_private_mem(struct pci_controller *hose)
{
hose->private_data =
- alloc_maybe_bootmem(sizeof(struct celleb_pci_private),
+ zalloc_maybe_bootmem(sizeof(struct celleb_pci_private),
GFP_KERNEL);
}
@@ -469,18 +468,6 @@ static struct of_device_id celleb_phb_match[] __initdata = {
},
};
-static int __init celleb_io_workaround_init(struct pci_controller *phb,
- struct celleb_phb_spec *phb_spec)
-{
- if (phb_spec->ops) {
- iowa_register_bus(phb, phb_spec->ops, phb_spec->iowa_init,
- phb_spec->iowa_data);
- io_workaround_init();
- }
-
- return 0;
-}
-
int __init celleb_setup_phb(struct pci_controller *phb)
{
struct device_node *dev = phb->dn;
@@ -500,7 +487,11 @@ int __init celleb_setup_phb(struct pci_controller *phb)
if (rc)
return 1;
- return celleb_io_workaround_init(phb, phb_spec);
+ if (phb_spec->ops)
+ iowa_register_bus(phb, phb_spec->ops,
+ phb_spec->iowa_init,
+ phb_spec->iowa_data);
+ return 0;
}
int celleb_pci_probe_mode(struct pci_bus *bus)
diff --git a/arch/powerpc/platforms/cell/celleb_pci.h b/arch/powerpc/platforms/cell/celleb_pci.h
index 4cba1523ec5..a801fcc5f38 100644
--- a/arch/powerpc/platforms/cell/celleb_pci.h
+++ b/arch/powerpc/platforms/cell/celleb_pci.h
@@ -26,8 +26,9 @@
#include <asm/pci-bridge.h>
#include <asm/prom.h>
#include <asm/ppc-pci.h>
+#include <asm/io-workarounds.h>
-#include "io-workarounds.h"
+struct iowa_bus;
struct celleb_phb_spec {
int (*setup)(struct device_node *, struct pci_controller *);
diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c
index e5384557977..d58d9bae4b9 100644
--- a/arch/powerpc/platforms/cell/celleb_setup.c
+++ b/arch/powerpc/platforms/cell/celleb_setup.c
@@ -128,10 +128,6 @@ static void __init celleb_setup_arch_beat(void)
spu_management_ops = &spu_management_of_ops;
#endif
-#ifdef CONFIG_SMP
- smp_init_celleb();
-#endif
-
celleb_setup_arch_common();
}
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 44cfd1bef89..449c08c1586 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -196,8 +196,20 @@ static irqreturn_t iic_ipi_action(int irq, void *dev_id)
{
int ipi = (int)(long)dev_id;
- smp_message_recv(ipi);
-
+ switch(ipi) {
+ case PPC_MSG_CALL_FUNCTION:
+ generic_smp_call_function_interrupt();
+ break;
+ case PPC_MSG_RESCHEDULE:
+ scheduler_ipi();
+ break;
+ case PPC_MSG_CALL_FUNC_SINGLE:
+ generic_smp_call_function_single_interrupt();
+ break;
+ case PPC_MSG_DEBUGGER_BREAK:
+ debug_ipi_action(0, NULL);
+ break;
+ }
return IRQ_HANDLED;
}
static void iic_request_ipi(int ipi, const char *name)
diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c
index d31c594cfdf..51e290126bc 100644
--- a/arch/powerpc/platforms/cell/qpace_setup.c
+++ b/arch/powerpc/platforms/cell/qpace_setup.c
@@ -42,7 +42,6 @@
#include "interrupt.h"
#include "pervasive.h"
#include "ras.h"
-#include "io-workarounds.h"
static void qpace_show_cpuinfo(struct seq_file *m)
{
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index fd57bfe00ed..c73cf4c43fc 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -51,11 +51,11 @@
#include <asm/udbg.h>
#include <asm/mpic.h>
#include <asm/cell-regs.h>
+#include <asm/io-workarounds.h>
#include "interrupt.h"
#include "pervasive.h"
#include "ras.h"
-#include "io-workarounds.h"
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -136,8 +136,6 @@ static int __devinit cell_setup_phb(struct pci_controller *phb)
iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
(void *)SPIDER_PCI_REG_BASE);
- io_workaround_init();
-
return 0;
}
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index f774530075b..d176e6148e3 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -77,7 +77,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
unsigned int pcpu;
int start_cpu;
- if (cpu_isset(lcpu, of_spin_map))
+ if (cpumask_test_cpu(lcpu, &of_spin_map))
/* Already started by OF and sitting in spin loop */
return 1;
@@ -103,27 +103,11 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
return 1;
}
-static void smp_iic_message_pass(int target, int msg)
-{
- unsigned int i;
-
- if (target < NR_CPUS) {
- iic_cause_IPI(target, msg);
- } else {
- for_each_online_cpu(i) {
- if (target == MSG_ALL_BUT_SELF
- && i == smp_processor_id())
- continue;
- iic_cause_IPI(i, msg);
- }
- }
-}
-
static int __init smp_iic_probe(void)
{
iic_request_IPIs();
- return cpus_weight(cpu_possible_map);
+ return cpumask_weight(cpu_possible_mask);
}
static void __devinit smp_cell_setup_cpu(int cpu)
@@ -137,12 +121,12 @@ static void __devinit smp_cell_setup_cpu(int cpu)
mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
}
-static void __devinit smp_cell_kick_cpu(int nr)
+static int __devinit smp_cell_kick_cpu(int nr)
{
BUG_ON(nr < 0 || nr >= NR_CPUS);
if (!smp_startup_cpu(nr))
- return;
+ return -ENOENT;
/*
* The processor is currently spinning, waiting for the
@@ -150,6 +134,8 @@ static void __devinit smp_cell_kick_cpu(int nr)
* the processor will continue on to secondary_start
*/
paca[nr].cpu_start = 1;
+
+ return 0;
}
static int smp_cell_cpu_bootable(unsigned int nr)
@@ -166,7 +152,7 @@ static int smp_cell_cpu_bootable(unsigned int nr)
return 1;
}
static struct smp_ops_t bpa_iic_smp_ops = {
- .message_pass = smp_iic_message_pass,
+ .message_pass = iic_cause_IPI,
.probe = smp_iic_probe,
.kick_cpu = smp_cell_kick_cpu,
.setup_cpu = smp_cell_setup_cpu,
@@ -186,13 +172,12 @@ void __init smp_init_cell(void)
if (cpu_has_feature(CPU_FTR_SMT)) {
for_each_present_cpu(i) {
if (cpu_thread_in_core(i) == 0)
- cpu_set(i, of_spin_map);
+ cpumask_set_cpu(i, &of_spin_map);
}
- } else {
- of_spin_map = cpu_present_map;
- }
+ } else
+ cpumask_copy(&of_spin_map, cpu_present_mask);
- cpu_clear(boot_cpuid, of_spin_map);
+ cpumask_clear_cpu(boot_cpuid, &of_spin_map);
/* Non-lpar has additional take/give timebase */
if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index ca7731c0b59..f1f7878893f 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -27,8 +27,7 @@
#include <asm/ppc-pci.h>
#include <asm/pci-bridge.h>
-
-#include "io-workarounds.h"
+#include <asm/io-workarounds.h>
#define SPIDER_PCI_DISABLE_PREFETCH
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index c5cf50e6b45..442c28c00f8 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -68,9 +68,9 @@ struct spider_pic {
};
static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
-static struct spider_pic *spider_virq_to_pic(unsigned int virq)
+static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d)
{
- return irq_map[virq].host->host_data;
+ return irq_data_get_irq_chip_data(d);
}
static void __iomem *spider_get_irq_config(struct spider_pic *pic,
@@ -81,24 +81,24 @@ static void __iomem *spider_get_irq_config(struct spider_pic *pic,
static void spider_unmask_irq(struct irq_data *d)
{
- struct spider_pic *pic = spider_virq_to_pic(d->irq);
- void __iomem *cfg = spider_get_irq_config(pic, irq_map[d->irq].hwirq);
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
out_be32(cfg, in_be32(cfg) | 0x30000000u);
}
static void spider_mask_irq(struct irq_data *d)
{
- struct spider_pic *pic = spider_virq_to_pic(d->irq);
- void __iomem *cfg = spider_get_irq_config(pic, irq_map[d->irq].hwirq);
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
out_be32(cfg, in_be32(cfg) & ~0x30000000u);
}
static void spider_ack_irq(struct irq_data *d)
{
- struct spider_pic *pic = spider_virq_to_pic(d->irq);
- unsigned int src = irq_map[d->irq].hwirq;
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ unsigned int src = irqd_to_hwirq(d);
/* Reset edge detection logic if necessary
*/
@@ -116,8 +116,8 @@ static void spider_ack_irq(struct irq_data *d)
static int spider_set_irq_type(struct irq_data *d, unsigned int type)
{
unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
- struct spider_pic *pic = spider_virq_to_pic(d->irq);
- unsigned int hw = irq_map[d->irq].hwirq;
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ unsigned int hw = irqd_to_hwirq(d);
void __iomem *cfg = spider_get_irq_config(pic, hw);
u32 old_mask;
u32 ic;
@@ -171,6 +171,7 @@ static struct irq_chip spider_pic = {
static int spider_host_map(struct irq_host *h, unsigned int virq,
irq_hw_number_t hw)
{
+ irq_set_chip_data(virq, h->host_data);
irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq);
/* Set default irq type */
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 65203857b0c..32cb4e66d2c 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -141,7 +141,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
* runqueue. The context will be rescheduled on the proper node
* if it is timesliced or preempted.
*/
- ctx->cpus_allowed = current->cpus_allowed;
+ cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current));
/* Save the current cpu id for spu interrupt routing. */
ctx->last_ran = raw_smp_processor_id();
diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c
index 02cafecc90e..a800122e4dd 100644
--- a/arch/powerpc/platforms/chrp/smp.c
+++ b/arch/powerpc/platforms/chrp/smp.c
@@ -30,10 +30,12 @@
#include <asm/mpic.h>
#include <asm/rtas.h>
-static void __devinit smp_chrp_kick_cpu(int nr)
+static int __devinit smp_chrp_kick_cpu(int nr)
{
*(unsigned long *)KERNELBASE = nr;
asm volatile("dcbf 0,%0"::"r"(KERNELBASE):"memory");
+
+ return 0;
}
static void __devinit smp_chrp_setup_cpu(int cpu_nr)
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index 12aa62b6f22..f61a2dd96b9 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -48,7 +48,7 @@
static void flipper_pic_mask_and_ack(struct irq_data *d)
{
- int irq = virq_to_hw(d->irq);
+ int irq = irqd_to_hwirq(d);
void __iomem *io_base = irq_data_get_irq_chip_data(d);
u32 mask = 1 << irq;
@@ -59,7 +59,7 @@ static void flipper_pic_mask_and_ack(struct irq_data *d)
static void flipper_pic_ack(struct irq_data *d)
{
- int irq = virq_to_hw(d->irq);
+ int irq = irqd_to_hwirq(d);
void __iomem *io_base = irq_data_get_irq_chip_data(d);
/* this is at least needed for RSW */
@@ -68,7 +68,7 @@ static void flipper_pic_ack(struct irq_data *d)
static void flipper_pic_mask(struct irq_data *d)
{
- int irq = virq_to_hw(d->irq);
+ int irq = irqd_to_hwirq(d);
void __iomem *io_base = irq_data_get_irq_chip_data(d);
clrbits32(io_base + FLIPPER_IMR, 1 << irq);
@@ -76,7 +76,7 @@ static void flipper_pic_mask(struct irq_data *d)
static void flipper_pic_unmask(struct irq_data *d)
{
- int irq = virq_to_hw(d->irq);
+ int irq = irqd_to_hwirq(d);
void __iomem *io_base = irq_data_get_irq_chip_data(d);
setbits32(io_base + FLIPPER_IMR, 1 << irq);
@@ -107,12 +107,6 @@ static int flipper_pic_map(struct irq_host *h, unsigned int virq,
return 0;
}
-static void flipper_pic_unmap(struct irq_host *h, unsigned int irq)
-{
- irq_set_chip_data(irq, NULL);
- irq_set_chip(irq, NULL);
-}
-
static int flipper_pic_match(struct irq_host *h, struct device_node *np)
{
return 1;
@@ -121,7 +115,6 @@ static int flipper_pic_match(struct irq_host *h, struct device_node *np)
static struct irq_host_ops flipper_irq_host_ops = {
.map = flipper_pic_map,
- .unmap = flipper_pic_unmap,
.match = flipper_pic_match,
};
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index 2bdddfc9d52..e4919170c6b 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -43,7 +43,7 @@
static void hlwd_pic_mask_and_ack(struct irq_data *d)
{
- int irq = virq_to_hw(d->irq);
+ int irq = irqd_to_hwirq(d);
void __iomem *io_base = irq_data_get_irq_chip_data(d);
u32 mask = 1 << irq;
@@ -53,7 +53,7 @@ static void hlwd_pic_mask_and_ack(struct irq_data *d)
static void hlwd_pic_ack(struct irq_data *d)
{
- int irq = virq_to_hw(d->irq);
+ int irq = irqd_to_hwirq(d);
void __iomem *io_base = irq_data_get_irq_chip_data(d);
out_be32(io_base + HW_BROADWAY_ICR, 1 << irq);
@@ -61,7 +61,7 @@ static void hlwd_pic_ack(struct irq_data *d)
static void hlwd_pic_mask(struct irq_data *d)
{
- int irq = virq_to_hw(d->irq);
+ int irq = irqd_to_hwirq(d);
void __iomem *io_base = irq_data_get_irq_chip_data(d);
clrbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
@@ -69,7 +69,7 @@ static void hlwd_pic_mask(struct irq_data *d)
static void hlwd_pic_unmask(struct irq_data *d)
{
- int irq = virq_to_hw(d->irq);
+ int irq = irqd_to_hwirq(d);
void __iomem *io_base = irq_data_get_irq_chip_data(d);
setbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
@@ -100,15 +100,8 @@ static int hlwd_pic_map(struct irq_host *h, unsigned int virq,
return 0;
}
-static void hlwd_pic_unmap(struct irq_host *h, unsigned int irq)
-{
- irq_set_chip_data(irq, NULL);
- irq_set_chip(irq, NULL);
-}
-
static struct irq_host_ops hlwd_irq_host_ops = {
.map = hlwd_pic_map,
- .unmap = hlwd_pic_unmap,
};
static unsigned int __hlwd_pic_get_irq(struct irq_host *h)
diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig
index e5bc9f75d47..b57cda3a081 100644
--- a/arch/powerpc/platforms/iseries/Kconfig
+++ b/arch/powerpc/platforms/iseries/Kconfig
@@ -1,7 +1,9 @@
config PPC_ISERIES
bool "IBM Legacy iSeries"
depends on PPC64 && PPC_BOOK3S
- select PPC_INDIRECT_IO
+ select PPC_SMP_MUXED_IPI
+ select PPC_INDIRECT_PIO
+ select PPC_INDIRECT_MMIO
select PPC_PCI_CHOICE if EXPERT
menu "iSeries device drivers"
diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S
index 32a56c6dfa7..29c02f36b32 100644
--- a/arch/powerpc/platforms/iseries/exception.S
+++ b/arch/powerpc/platforms/iseries/exception.S
@@ -31,6 +31,7 @@
#include <asm/thread_info.h>
#include <asm/ptrace.h>
#include <asm/cputable.h>
+#include <asm/mmu.h>
#include "exception.h"
@@ -60,29 +61,31 @@ system_reset_iSeries:
/* Spin on __secondary_hold_spinloop until it is updated by the boot cpu. */
/* In the UP case we'll yield() later, and we will not access the paca anyway */
#ifdef CONFIG_SMP
-1:
+iSeries_secondary_wait_paca:
HMT_LOW
LOAD_REG_ADDR(r23, __secondary_hold_spinloop)
ld r23,0(r23)
- sync
- LOAD_REG_ADDR(r3,current_set)
- sldi r28,r24,3 /* get current_set[cpu#] */
- ldx r3,r3,r28
- addi r1,r3,THREAD_SIZE
- subi r1,r1,STACK_FRAME_OVERHEAD
- cmpwi 0,r23,0 /* Keep poking the Hypervisor until */
- bne 2f /* we're released */
- /* Let the Hypervisor know we are alive */
+ cmpdi 0,r23,0
+ bne 2f /* go on when the master is ready */
+
+ /* Keep poking the Hypervisor until we're released */
/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
lis r3,0x8002
rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */
li r0,-1 /* r0=-1 indicates a Hypervisor call */
sc /* Invoke the hypervisor via a system call */
- b 1b
-#endif
+ b iSeries_secondary_wait_paca
2:
+ HMT_MEDIUM
+ sync
+
+ LOAD_REG_ADDR(r3, nr_cpu_ids) /* get number of pacas allocated */
+ lwz r3,0(r3) /* nr_cpus= or NR_CPUS can limit */
+ cmpld 0,r24,r3 /* is our cpu number allocated? */
+ bge iSeries_secondary_yield /* no, yield forever */
+
/* Load our paca now that it's been allocated */
LOAD_REG_ADDR(r13, paca)
ld r13,0(r13)
@@ -93,10 +96,24 @@ system_reset_iSeries:
ori r23,r23,MSR_RI
mtmsrd r23 /* RI on */
- HMT_LOW
-#ifdef CONFIG_SMP
+iSeries_secondary_smp_loop:
lbz r23,PACAPROCSTART(r13) /* Test if this processor
* should start */
+ cmpwi 0,r23,0
+ bne 3f /* go on when we are told */
+
+ HMT_LOW
+ /* Let the Hypervisor know we are alive */
+ /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
+ lis r3,0x8002
+ rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */
+ li r0,-1 /* r0=-1 indicates a Hypervisor call */
+ sc /* Invoke the hypervisor via a system call */
+ mfspr r13,SPRN_SPRG_PACA /* Put r13 back ???? */
+ b iSeries_secondary_smp_loop /* wait for signal to start */
+
+3:
+ HMT_MEDIUM
sync
LOAD_REG_ADDR(r3,current_set)
sldi r28,r24,3 /* get current_set[cpu#] */
@@ -104,27 +121,22 @@ system_reset_iSeries:
addi r1,r3,THREAD_SIZE
subi r1,r1,STACK_FRAME_OVERHEAD
- cmpwi 0,r23,0
- beq iSeries_secondary_smp_loop /* Loop until told to go */
b __secondary_start /* Loop until told to go */
-iSeries_secondary_smp_loop:
- /* Let the Hypervisor know we are alive */
- /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
- lis r3,0x8002
- rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */
-#else /* CONFIG_SMP */
+#endif /* CONFIG_SMP */
+
+iSeries_secondary_yield:
/* Yield the processor. This is required for non-SMP kernels
which are running on multi-threaded machines. */
+ HMT_LOW
lis r3,0x8000
rldicr r3,r3,32,15 /* r3 = (r3 << 32) & 0xffff000000000000 */
addi r3,r3,18 /* r3 = 0x8000000000000012 which is "yield" */
li r4,0 /* "yield timed" */
li r5,-1 /* "yield forever" */
-#endif /* CONFIG_SMP */
li r0,-1 /* r0=-1 indicates a Hypervisor call */
sc /* Invoke the hypervisor via a system call */
mfspr r13,SPRN_SPRG_PACA /* Put r13 back ???? */
- b 2b /* If SMP not configured, secondaries
+ b iSeries_secondary_yield /* If SMP not configured, secondaries
* loop forever */
/*** ISeries-LPAR interrupt handlers ***/
@@ -157,7 +169,7 @@ BEGIN_FTR_SECTION
FTR_SECTION_ELSE
EXCEPTION_PROLOG_1(PACA_EXGEN)
EXCEPTION_PROLOG_ISERIES_1
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB)
b data_access_common
.do_stab_bolted_iSeries:
diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c
index 52a6889832c..b2103453eb0 100644
--- a/arch/powerpc/platforms/iseries/irq.c
+++ b/arch/powerpc/platforms/iseries/irq.c
@@ -42,7 +42,6 @@
#include "irq.h"
#include "pci.h"
#include "call_pci.h"
-#include "smp.h"
#ifdef CONFIG_PCI
@@ -171,7 +170,7 @@ static void iseries_enable_IRQ(struct irq_data *d)
{
u32 bus, dev_id, function, mask;
const u32 sub_bus = 0;
- unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
/* The IRQ has already been locked by the caller */
bus = REAL_IRQ_TO_BUS(rirq);
@@ -188,7 +187,7 @@ static unsigned int iseries_startup_IRQ(struct irq_data *d)
{
u32 bus, dev_id, function, mask;
const u32 sub_bus = 0;
- unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
bus = REAL_IRQ_TO_BUS(rirq);
function = REAL_IRQ_TO_FUNC(rirq);
@@ -234,7 +233,7 @@ static void iseries_shutdown_IRQ(struct irq_data *d)
{
u32 bus, dev_id, function, mask;
const u32 sub_bus = 0;
- unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
/* irq should be locked by the caller */
bus = REAL_IRQ_TO_BUS(rirq);
@@ -257,7 +256,7 @@ static void iseries_disable_IRQ(struct irq_data *d)
{
u32 bus, dev_id, function, mask;
const u32 sub_bus = 0;
- unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
/* The IRQ has already been locked by the caller */
bus = REAL_IRQ_TO_BUS(rirq);
@@ -271,7 +270,7 @@ static void iseries_disable_IRQ(struct irq_data *d)
static void iseries_end_IRQ(struct irq_data *d)
{
- unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
HvCallPci_eoi(REAL_IRQ_TO_BUS(rirq), REAL_IRQ_TO_SUBBUS(rirq),
(REAL_IRQ_TO_IDSEL(rirq) << 4) + REAL_IRQ_TO_FUNC(rirq));
@@ -316,7 +315,7 @@ unsigned int iSeries_get_irq(void)
#ifdef CONFIG_SMP
if (get_lppaca()->int_dword.fields.ipi_cnt) {
get_lppaca()->int_dword.fields.ipi_cnt = 0;
- iSeries_smp_message_recv();
+ smp_ipi_demux();
}
#endif /* CONFIG_SMP */
if (hvlpevent_is_pending())
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 2946ae10fbf..c25a0815c26 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -249,7 +249,7 @@ static unsigned long iSeries_process_mainstore_vpd(struct MemoryBlock *mb_array,
unsigned long i;
unsigned long mem_blocks = 0;
- if (cpu_has_feature(CPU_FTR_SLB))
+ if (mmu_has_feature(MMU_FTR_SLB))
mem_blocks = iSeries_process_Regatta_mainstore_vpd(mb_array,
max_entries);
else
@@ -634,7 +634,7 @@ static int __init iseries_probe(void)
hpte_init_iSeries();
/* iSeries does not support 16M pages */
- cur_cpu_spec->cpu_features &= ~CPU_FTR_16M_PAGE;
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_16M_PAGE;
return 1;
}
@@ -685,6 +685,11 @@ void * __init iSeries_early_setup(void)
powerpc_firmware_features |= FW_FEATURE_ISERIES;
powerpc_firmware_features |= FW_FEATURE_LPAR;
+#ifdef CONFIG_SMP
+ /* On iSeries we know we can never have more than 64 cpus */
+ nr_cpu_ids = max(nr_cpu_ids, 64);
+#endif
+
iSeries_fixup_klimit();
/*
diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c
index 6c6029914db..e3265adde5d 100644
--- a/arch/powerpc/platforms/iseries/smp.c
+++ b/arch/powerpc/platforms/iseries/smp.c
@@ -42,57 +42,23 @@
#include <asm/cputable.h>
#include <asm/system.h>
-#include "smp.h"
-
-static unsigned long iSeries_smp_message[NR_CPUS];
-
-void iSeries_smp_message_recv(void)
-{
- int cpu = smp_processor_id();
- int msg;
-
- if (num_online_cpus() < 2)
- return;
-
- for (msg = 0; msg < 4; msg++)
- if (test_and_clear_bit(msg, &iSeries_smp_message[cpu]))
- smp_message_recv(msg);
-}
-
-static inline void smp_iSeries_do_message(int cpu, int msg)
+static void smp_iSeries_cause_ipi(int cpu, unsigned long data)
{
- set_bit(msg, &iSeries_smp_message[cpu]);
HvCall_sendIPI(&(paca[cpu]));
}
-static void smp_iSeries_message_pass(int target, int msg)
-{
- int i;
-
- if (target < NR_CPUS)
- smp_iSeries_do_message(target, msg);
- else {
- for_each_online_cpu(i) {
- if ((target == MSG_ALL_BUT_SELF) &&
- (i == smp_processor_id()))
- continue;
- smp_iSeries_do_message(i, msg);
- }
- }
-}
-
static int smp_iSeries_probe(void)
{
return cpumask_weight(cpu_possible_mask);
}
-static void smp_iSeries_kick_cpu(int nr)
+static int smp_iSeries_kick_cpu(int nr)
{
BUG_ON((nr < 0) || (nr >= NR_CPUS));
/* Verify that our partition has a processor nr */
if (lppaca_of(nr).dyn_proc_status >= 2)
- return;
+ return -ENOENT;
/* The processor is currently spinning, waiting
* for the cpu_start field to become non-zero
@@ -100,6 +66,8 @@ static void smp_iSeries_kick_cpu(int nr)
* continue on to secondary_start in iSeries_head.S
*/
paca[nr].cpu_start = 1;
+
+ return 0;
}
static void __devinit smp_iSeries_setup_cpu(int nr)
@@ -107,7 +75,8 @@ static void __devinit smp_iSeries_setup_cpu(int nr)
}
static struct smp_ops_t iSeries_smp_ops = {
- .message_pass = smp_iSeries_message_pass,
+ .message_pass = smp_muxed_ipi_message_pass,
+ .cause_ipi = smp_iSeries_cause_ipi,
.probe = smp_iSeries_probe,
.kick_cpu = smp_iSeries_kick_cpu,
.setup_cpu = smp_iSeries_setup_cpu,
diff --git a/arch/powerpc/platforms/iseries/smp.h b/arch/powerpc/platforms/iseries/smp.h
deleted file mode 100644
index d501f7de01e..00000000000
--- a/arch/powerpc/platforms/iseries/smp.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _PLATFORMS_ISERIES_SMP_H
-#define _PLATFORMS_ISERIES_SMP_H
-
-extern void iSeries_smp_message_recv(void);
-
-#endif /* _PLATFORMS_ISERIES_SMP_H */
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index 1e1a0873e1d..1afd10f6785 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -18,4 +18,13 @@ config PPC_PMAC64
select PPC_970_NAP
default y
-
+config PPC_PMAC32_PSURGE
+ bool "Support for powersurge upgrade cards" if EXPERT
+ depends on SMP && PPC32 && PPC_PMAC
+ select PPC_SMP_MUXED_IPI
+ default y
+ help
+ The powersurge cpu boards can be used in the generation
+ of powermacs that have a socket for an upgradeable cpu card,
+ including the 7500, 8500, 9500, 9600. Support exists for
+ both dual and quad socket upgrade cards.
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 7c18a1607d1..9089b042119 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -84,7 +84,7 @@ static void __pmac_retrigger(unsigned int irq_nr)
static void pmac_mask_and_ack_irq(struct irq_data *d)
{
- unsigned int src = irq_map[d->irq].hwirq;
+ unsigned int src = irqd_to_hwirq(d);
unsigned long bit = 1UL << (src & 0x1f);
int i = src >> 5;
unsigned long flags;
@@ -106,7 +106,7 @@ static void pmac_mask_and_ack_irq(struct irq_data *d)
static void pmac_ack_irq(struct irq_data *d)
{
- unsigned int src = irq_map[d->irq].hwirq;
+ unsigned int src = irqd_to_hwirq(d);
unsigned long bit = 1UL << (src & 0x1f);
int i = src >> 5;
unsigned long flags;
@@ -152,7 +152,7 @@ static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
static unsigned int pmac_startup_irq(struct irq_data *d)
{
unsigned long flags;
- unsigned int src = irq_map[d->irq].hwirq;
+ unsigned int src = irqd_to_hwirq(d);
unsigned long bit = 1UL << (src & 0x1f);
int i = src >> 5;
@@ -169,7 +169,7 @@ static unsigned int pmac_startup_irq(struct irq_data *d)
static void pmac_mask_irq(struct irq_data *d)
{
unsigned long flags;
- unsigned int src = irq_map[d->irq].hwirq;
+ unsigned int src = irqd_to_hwirq(d);
raw_spin_lock_irqsave(&pmac_pic_lock, flags);
__clear_bit(src, ppc_cached_irq_mask);
@@ -180,7 +180,7 @@ static void pmac_mask_irq(struct irq_data *d)
static void pmac_unmask_irq(struct irq_data *d)
{
unsigned long flags;
- unsigned int src = irq_map[d->irq].hwirq;
+ unsigned int src = irqd_to_hwirq(d);
raw_spin_lock_irqsave(&pmac_pic_lock, flags);
__set_bit(src, ppc_cached_irq_mask);
@@ -193,7 +193,7 @@ static int pmac_retrigger(struct irq_data *d)
unsigned long flags;
raw_spin_lock_irqsave(&pmac_pic_lock, flags);
- __pmac_retrigger(irq_map[d->irq].hwirq);
+ __pmac_retrigger(irqd_to_hwirq(d));
raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
return 1;
}
@@ -239,15 +239,12 @@ static unsigned int pmac_pic_get_irq(void)
unsigned long bits = 0;
unsigned long flags;
-#ifdef CONFIG_SMP
- void psurge_smp_message_recv(void);
-
- /* IPI's are a hack on the powersurge -- Cort */
- if ( smp_processor_id() != 0 ) {
- psurge_smp_message_recv();
- return NO_IRQ_IGNORE; /* ignore, already handled */
+#ifdef CONFIG_PPC_PMAC32_PSURGE
+ /* IPI's are a hack on the powersurge -- Cort */
+ if (smp_processor_id() != 0) {
+ return psurge_secondary_virq;
}
-#endif /* CONFIG_SMP */
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
raw_spin_lock_irqsave(&pmac_pic_lock, flags);
for (irq = max_real_irqs; (irq -= 32) >= 0; ) {
int i = irq >> 5;
diff --git a/arch/powerpc/platforms/powermac/pic.h b/arch/powerpc/platforms/powermac/pic.h
deleted file mode 100644
index d622a8345aa..00000000000
--- a/arch/powerpc/platforms/powermac/pic.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __PPC_PLATFORMS_PMAC_PIC_H
-#define __PPC_PLATFORMS_PMAC_PIC_H
-
-#include <linux/irq.h>
-
-extern struct irq_chip pmac_pic;
-
-extern void pmac_pic_init(void);
-extern int pmac_get_irq(void);
-
-#endif /* __PPC_PLATFORMS_PMAC_PIC_H */
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index 20468f49aec..8327cce2bdb 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -33,6 +33,7 @@ extern void pmac_setup_pci_dma(void);
extern void pmac_check_ht_link(void);
extern void pmac_setup_smp(void);
+extern int psurge_secondary_virq;
extern void low_cpu_die(void) __attribute__((noreturn));
extern int pmac_nvram_init(void);
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index bc5f0dc6ae1..db092d7c4c5 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -70,7 +70,7 @@ static void (*pmac_tb_freeze)(int freeze);
static u64 timebase;
static int tb_req;
-#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_PMAC32_PSURGE
/*
* Powersurge (old powermac SMP) support.
@@ -124,6 +124,10 @@ static volatile u32 __iomem *psurge_start;
/* what sort of powersurge board we have */
static int psurge_type = PSURGE_NONE;
+/* irq for secondary cpus to report */
+static struct irq_host *psurge_host;
+int psurge_secondary_virq;
+
/*
* Set and clear IPIs for powersurge.
*/
@@ -156,51 +160,52 @@ static inline void psurge_clr_ipi(int cpu)
/*
* On powersurge (old SMP powermac architecture) we don't have
* separate IPIs for separate messages like openpic does. Instead
- * we have a bitmap for each processor, where a 1 bit means that
- * the corresponding message is pending for that processor.
- * Ideally each cpu's entry would be in a different cache line.
+ * use the generic demux helpers
* -- paulus.
*/
-static unsigned long psurge_smp_message[NR_CPUS];
-
-void psurge_smp_message_recv(void)
+static irqreturn_t psurge_ipi_intr(int irq, void *d)
{
- int cpu = smp_processor_id();
- int msg;
+ psurge_clr_ipi(smp_processor_id());
+ smp_ipi_demux();
- /* clear interrupt */
- psurge_clr_ipi(cpu);
-
- if (num_online_cpus() < 2)
- return;
+ return IRQ_HANDLED;
+}
- /* make sure there is a message there */
- for (msg = 0; msg < 4; msg++)
- if (test_and_clear_bit(msg, &psurge_smp_message[cpu]))
- smp_message_recv(msg);
+static void smp_psurge_cause_ipi(int cpu, unsigned long data)
+{
+ psurge_set_ipi(cpu);
}
-irqreturn_t psurge_primary_intr(int irq, void *d)
+static int psurge_host_map(struct irq_host *h, unsigned int virq,
+ irq_hw_number_t hw)
{
- psurge_smp_message_recv();
- return IRQ_HANDLED;
+ irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_percpu_irq);
+
+ return 0;
}
-static void smp_psurge_message_pass(int target, int msg)
+struct irq_host_ops psurge_host_ops = {
+ .map = psurge_host_map,
+};
+
+static int psurge_secondary_ipi_init(void)
{
- int i;
+ int rc = -ENOMEM;
- if (num_online_cpus() < 2)
- return;
+ psurge_host = irq_alloc_host(NULL, IRQ_HOST_MAP_NOMAP, 0,
+ &psurge_host_ops, 0);
- for_each_online_cpu(i) {
- if (target == MSG_ALL
- || (target == MSG_ALL_BUT_SELF && i != smp_processor_id())
- || target == i) {
- set_bit(msg, &psurge_smp_message[i]);
- psurge_set_ipi(i);
- }
- }
+ if (psurge_host)
+ psurge_secondary_virq = irq_create_direct_mapping(psurge_host);
+
+ if (psurge_secondary_virq)
+ rc = request_irq(psurge_secondary_virq, psurge_ipi_intr,
+ IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
+
+ if (rc)
+ pr_err("Failed to setup secondary cpu IPI\n");
+
+ return rc;
}
/*
@@ -311,6 +316,9 @@ static int __init smp_psurge_probe(void)
ncpus = 2;
}
+ if (psurge_secondary_ipi_init())
+ return 1;
+
psurge_start = ioremap(PSURGE_START, 4);
psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4);
@@ -329,7 +337,7 @@ static int __init smp_psurge_probe(void)
return ncpus;
}
-static void __init smp_psurge_kick_cpu(int nr)
+static int __init smp_psurge_kick_cpu(int nr)
{
unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8;
unsigned long a, flags;
@@ -394,11 +402,13 @@ static void __init smp_psurge_kick_cpu(int nr)
psurge_set_ipi(1);
if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354);
+
+ return 0;
}
static struct irqaction psurge_irqaction = {
- .handler = psurge_primary_intr,
- .flags = IRQF_DISABLED,
+ .handler = psurge_ipi_intr,
+ .flags = IRQF_DISABLED|IRQF_PERCPU,
.name = "primary IPI",
};
@@ -437,14 +447,15 @@ void __init smp_psurge_give_timebase(void)
/* PowerSurge-style Macs */
struct smp_ops_t psurge_smp_ops = {
- .message_pass = smp_psurge_message_pass,
+ .message_pass = smp_muxed_ipi_message_pass,
+ .cause_ipi = smp_psurge_cause_ipi,
.probe = smp_psurge_probe,
.kick_cpu = smp_psurge_kick_cpu,
.setup_cpu = smp_psurge_setup_cpu,
.give_timebase = smp_psurge_give_timebase,
.take_timebase = smp_psurge_take_timebase,
};
-#endif /* CONFIG_PPC32 - actually powersurge support */
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
/*
* Core 99 and later support
@@ -791,14 +802,14 @@ static int __init smp_core99_probe(void)
return ncpus;
}
-static void __devinit smp_core99_kick_cpu(int nr)
+static int __devinit smp_core99_kick_cpu(int nr)
{
unsigned int save_vector;
unsigned long target, flags;
unsigned int *vector = (unsigned int *)(PAGE_OFFSET+0x100);
if (nr < 0 || nr > 3)
- return;
+ return -ENOENT;
if (ppc_md.progress)
ppc_md.progress("smp_core99_kick_cpu", 0x346);
@@ -830,6 +841,8 @@ static void __devinit smp_core99_kick_cpu(int nr)
local_irq_restore(flags);
if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
+
+ return 0;
}
static void __devinit smp_core99_setup_cpu(int cpu_nr)
@@ -1002,7 +1015,7 @@ void __init pmac_setup_smp(void)
of_node_put(np);
smp_ops = &core99_smp_ops;
}
-#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_PMAC32_PSURGE
else {
/* We have to set bits in cpu_possible_mask here since the
* secondary CPU(s) aren't in the device tree. Various
@@ -1015,7 +1028,7 @@ void __init pmac_setup_smp(void)
set_cpu_possible(cpu, true);
smp_ops = &psurge_smp_ops;
}
-#endif /* CONFIG_PPC32 */
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
#ifdef CONFIG_HOTPLUG_CPU
ppc_md.cpu_die = pmac_cpu_die;
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index f2f6413b81d..600ed2c0ed5 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -197,7 +197,7 @@ static int ps3_virq_setup(enum ps3_cpu_binding cpu, unsigned long outlet,
result = irq_set_chip_data(*virq, pd);
if (result) {
- pr_debug("%s:%d: set_irq_chip_data failed\n",
+ pr_debug("%s:%d: irq_set_chip_data failed\n",
__func__, __LINE__);
goto fail_set;
}
@@ -659,11 +659,6 @@ static void __maybe_unused _dump_mask(struct ps3_private *pd,
static void dump_bmp(struct ps3_private* pd) {};
#endif /* defined(DEBUG) */
-static void ps3_host_unmap(struct irq_host *h, unsigned int virq)
-{
- irq_set_chip_data(virq, NULL);
-}
-
static int ps3_host_map(struct irq_host *h, unsigned int virq,
irq_hw_number_t hwirq)
{
@@ -683,7 +678,6 @@ static int ps3_host_match(struct irq_host *h, struct device_node *np)
static struct irq_host_ops ps3_host_ops = {
.map = ps3_host_map,
- .unmap = ps3_host_unmap,
.match = ps3_host_match,
};
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index 51ffde40af2..4c44794faac 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -39,7 +39,7 @@
#define MSG_COUNT 4
static DEFINE_PER_CPU(unsigned int [MSG_COUNT], ps3_ipi_virqs);
-static void do_message_pass(int target, int msg)
+static void ps3_smp_message_pass(int cpu, int msg)
{
int result;
unsigned int virq;
@@ -49,28 +49,12 @@ static void do_message_pass(int target, int msg)
return;
}
- virq = per_cpu(ps3_ipi_virqs, target)[msg];
+ virq = per_cpu(ps3_ipi_virqs, cpu)[msg];
result = ps3_send_event_locally(virq);
if (result)
DBG("%s:%d: ps3_send_event_locally(%d, %d) failed"
- " (%d)\n", __func__, __LINE__, target, msg, result);
-}
-
-static void ps3_smp_message_pass(int target, int msg)
-{
- int cpu;
-
- if (target < NR_CPUS)
- do_message_pass(target, msg);
- else if (target == MSG_ALL_BUT_SELF) {
- for_each_online_cpu(cpu)
- if (cpu != smp_processor_id())
- do_message_pass(cpu, msg);
- } else {
- for_each_online_cpu(cpu)
- do_message_pass(cpu, msg);
- }
+ " (%d)\n", __func__, __LINE__, cpu, msg, result);
}
static int ps3_smp_probe(void)
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index 39a472e9e80..375a9f92158 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -197,7 +197,7 @@ static void spu_unmap(struct spu *spu)
* The current HV requires the spu shadow regs to be mapped with the
* PTE page protection bits set as read-only (PP=3). This implementation
* uses the low level __ioremap() to bypass the page protection settings
- * inforced by ioremap_flags() to get the needed PTE bits set for the
+ * inforced by ioremap_prot() to get the needed PTE bits set for the
* shadow regs.
*/
@@ -214,7 +214,7 @@ static int __init setup_areas(struct spu *spu)
goto fail_ioremap;
}
- spu->local_store = (__force void *)ioremap_flags(spu->local_store_phys,
+ spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys,
LS_SIZE, _PAGE_NO_CACHE);
if (!spu->local_store) {
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 5b3da4b4ea7..71af4c5d6c0 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -3,7 +3,10 @@ config PPC_PSERIES
bool "IBM pSeries & new (POWER5-based) iSeries"
select MPIC
select PCI_MSI
- select XICS
+ select PPC_XICS
+ select PPC_ICP_NATIVE
+ select PPC_ICP_HV
+ select PPC_ICS_RTAS
select PPC_I8259
select PPC_RTAS
select PPC_RTAS_DAEMON
@@ -47,6 +50,24 @@ config SCANLOG
tristate "Scanlog dump interface"
depends on RTAS_PROC && PPC_PSERIES
+config IO_EVENT_IRQ
+ bool "IO Event Interrupt support"
+ depends on PPC_PSERIES
+ default y
+ help
+ Select this option, if you want to enable support for IO Event
+ interrupts. IO event interrupt is a mechanism provided by RTAS
+ to return information about hardware error and non-error events
+ which may need OS attention. RTAS returns events for multiple
+ event types and scopes. Device drivers can register their handlers
+ to receive events.
+
+ This option will only enable the IO event platform code. You
+ will still need to enable or compile the actual drivers
+ that use this infrastruture to handle IO event interrupts.
+
+ Say Y if you are unsure.
+
config LPARCFG
bool "LPAR Configuration Data"
depends on PPC_PSERIES || PPC_ISERIES
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index fc5237810ec..3556e402cbf 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -5,7 +5,6 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
setup.o iommu.o event_sources.o ras.o \
firmware.o power.o dlpar.o mobility.o
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_XICS) += xics.o
obj-$(CONFIG_SCANLOG) += scanlog.o
obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_KEXEC) += kexec.o
@@ -22,6 +21,7 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o
obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_DTL) += dtl.o
+obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o
ifeq ($(CONFIG_PPC_PSERIES),y)
obj-$(CONFIG_SUSPEND) += suspend.o
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index c371bc06434..e9190073bb9 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -52,10 +52,10 @@ static u8 dtl_event_mask = 0x7;
/*
- * Size of per-cpu log buffers. Default is just under 16 pages worth.
+ * Size of per-cpu log buffers. Firmware requires that the buffer does
+ * not cross a 4k boundary.
*/
-static int dtl_buf_entries = (16 * 85);
-
+static int dtl_buf_entries = N_DISPATCH_LOG;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
struct dtl_ring {
@@ -151,7 +151,7 @@ static int dtl_start(struct dtl *dtl)
/* Register our dtl buffer with the hypervisor. The HV expects the
* buffer size to be passed in the second word of the buffer */
- ((u32 *)dtl->buf)[1] = dtl->buf_entries * sizeof(struct dtl_entry);
+ ((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES;
hwcpu = get_hard_smp_processor_id(dtl->cpu);
addr = __pa(dtl->buf);
@@ -196,13 +196,15 @@ static int dtl_enable(struct dtl *dtl)
long int rc;
struct dtl_entry *buf = NULL;
+ if (!dtl_cache)
+ return -ENOMEM;
+
/* only allow one reader */
if (dtl->buf)
return -EBUSY;
n_entries = dtl_buf_entries;
- buf = kmalloc_node(n_entries * sizeof(struct dtl_entry),
- GFP_KERNEL, cpu_to_node(dtl->cpu));
+ buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu));
if (!buf) {
printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n",
__func__, dtl->cpu);
@@ -223,7 +225,7 @@ static int dtl_enable(struct dtl *dtl)
spin_unlock(&dtl->lock);
if (rc)
- kfree(buf);
+ kmem_cache_free(dtl_cache, buf);
return rc;
}
@@ -231,7 +233,7 @@ static void dtl_disable(struct dtl *dtl)
{
spin_lock(&dtl->lock);
dtl_stop(dtl);
- kfree(dtl->buf);
+ kmem_cache_free(dtl_cache, dtl->buf);
dtl->buf = NULL;
dtl->buf_entries = 0;
spin_unlock(&dtl->lock);
@@ -365,7 +367,7 @@ static int dtl_init(void)
event_mask_file = debugfs_create_x8("dtl_event_mask", 0600,
dtl_dir, &dtl_event_mask);
- buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0600,
+ buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0400,
dtl_dir, &dtl_buf_entries);
if (!event_mask_file || !buf_entries_file) {
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 89649173d3a..46b55cf563e 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -93,6 +93,7 @@ static int ibm_slot_error_detail;
static int ibm_get_config_addr_info;
static int ibm_get_config_addr_info2;
static int ibm_configure_bridge;
+static int ibm_configure_pe;
int eeh_subsystem_enabled;
EXPORT_SYMBOL(eeh_subsystem_enabled);
@@ -261,6 +262,8 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
pci_regs_buf[0] = 0;
rtas_pci_enable(pdn, EEH_THAW_MMIO);
+ rtas_configure_bridge(pdn);
+ eeh_restore_bars(pdn);
loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
@@ -448,6 +451,39 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag)
raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
}
+void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
+{
+ struct device_node *dn;
+
+ for_each_child_of_node(parent, dn) {
+ if (PCI_DN(dn)) {
+
+ struct pci_dev *dev = PCI_DN(dn)->pcidev;
+
+ if (dev && dev->driver)
+ *freset |= dev->needs_freset;
+
+ __eeh_set_pe_freset(dn, freset);
+ }
+ }
+}
+
+void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
+{
+ struct pci_dev *dev;
+ dn = find_device_pe(dn);
+
+ /* Back up one, since config addrs might be shared */
+ if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+ dn = dn->parent;
+
+ dev = PCI_DN(dn)->pcidev;
+ if (dev)
+ *freset |= dev->needs_freset;
+
+ __eeh_set_pe_freset(dn, freset);
+}
+
/**
* eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
* @dn device node
@@ -692,15 +728,24 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state)
if (pdn->eeh_pe_config_addr)
config_addr = pdn->eeh_pe_config_addr;
- rc = rtas_call(ibm_set_slot_reset,4,1, NULL,
+ rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
config_addr,
BUID_HI(pdn->phb->buid),
BUID_LO(pdn->phb->buid),
state);
- if (rc)
- printk (KERN_WARNING "EEH: Unable to reset the failed slot,"
- " (%d) #RST=%d dn=%s\n",
- rc, state, pdn->node->full_name);
+
+ /* Fundamental-reset not supported on this PE, try hot-reset */
+ if (rc == -8 && state == 3) {
+ rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+ config_addr,
+ BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid), 1);
+ if (rc)
+ printk(KERN_WARNING
+ "EEH: Unable to reset the failed slot,"
+ " #RST=%d dn=%s\n",
+ rc, pdn->node->full_name);
+ }
}
/**
@@ -736,18 +781,21 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
/**
* rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
* @pdn: pci device node to be reset.
- *
- * Return 0 if success, else a non-zero value.
*/
static void __rtas_set_slot_reset(struct pci_dn *pdn)
{
- struct pci_dev *dev = pdn->pcidev;
+ unsigned int freset = 0;
- /* Determine type of EEH reset required by device,
- * default hot reset or fundamental reset
- */
- if (dev && dev->needs_freset)
+ /* Determine type of EEH reset required for
+ * Partitionable Endpoint, a hot-reset (1)
+ * or a fundamental reset (3).
+ * A fundamental reset required by any device under
+ * Partitionable Endpoint trumps hot-reset.
+ */
+ eeh_set_pe_freset(pdn->node, &freset);
+
+ if (freset)
rtas_pci_slot_reset(pdn, 3);
else
rtas_pci_slot_reset(pdn, 1);
@@ -895,13 +943,20 @@ rtas_configure_bridge(struct pci_dn *pdn)
{
int config_addr;
int rc;
+ int token;
/* Use PE configuration address, if present */
config_addr = pdn->eeh_config_addr;
if (pdn->eeh_pe_config_addr)
config_addr = pdn->eeh_pe_config_addr;
- rc = rtas_call(ibm_configure_bridge,3,1, NULL,
+ /* Use new configure-pe function, if supported */
+ if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
+ token = ibm_configure_pe;
+ else
+ token = ibm_configure_bridge;
+
+ rc = rtas_call(token, 3, 1, NULL,
config_addr,
BUID_HI(pdn->phb->buid),
BUID_LO(pdn->phb->buid));
@@ -1077,6 +1132,7 @@ void __init eeh_init(void)
ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
ibm_configure_bridge = rtas_token ("ibm,configure-bridge");
+ ibm_configure_pe = rtas_token("ibm,configure-pe");
if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
return;
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index b8d70f5d9aa..1b6cb10589e 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -328,7 +328,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
struct pci_bus *frozen_bus;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
- const char *location, *pci_str, *drv_str;
+ const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
frozen_dn = find_device_pe(event->dn);
if (!frozen_dn) {
@@ -364,13 +364,8 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
frozen_pdn = PCI_DN(frozen_dn);
frozen_pdn->eeh_freeze_count++;
- if (frozen_pdn->pcidev) {
- pci_str = pci_name (frozen_pdn->pcidev);
- drv_str = pcid_name (frozen_pdn->pcidev);
- } else {
- pci_str = eeh_pci_name(event->dev);
- drv_str = pcid_name (event->dev);
- }
+ pci_str = eeh_pci_name(event->dev);
+ drv_str = pcid_name(event->dev);
if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
goto excess_failures;
@@ -378,8 +373,17 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
printk(KERN_WARNING
"EEH: This PCI device has failed %d times in the last hour:\n",
frozen_pdn->eeh_freeze_count);
+
+ if (frozen_pdn->pcidev) {
+ bus_pci_str = pci_name(frozen_pdn->pcidev);
+ bus_drv_str = pcid_name(frozen_pdn->pcidev);
+ printk(KERN_WARNING
+ "EEH: Bus location=%s driver=%s pci addr=%s\n",
+ location, bus_drv_str, bus_pci_str);
+ }
+
printk(KERN_WARNING
- "EEH: location=%s driver=%s pci addr=%s\n",
+ "EEH: Device location=%s driver=%s pci addr=%s\n",
location, drv_str, pci_str);
/* Walk the various device drivers attached to this slot through
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index ef8c45489e2..46f13a3c5d0 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -19,6 +19,7 @@
*/
#include <linux/kernel.h>
+#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/cpu.h>
#include <asm/system.h>
@@ -28,7 +29,7 @@
#include <asm/machdep.h>
#include <asm/vdso_datapage.h>
#include <asm/pSeries_reconfig.h>
-#include "xics.h"
+#include <asm/xics.h>
#include "plpar_wrappers.h"
#include "offline_states.h"
@@ -280,7 +281,7 @@ static int pseries_add_processor(struct device_node *np)
}
for_each_cpu(cpu, tmp) {
- BUG_ON(cpumask_test_cpu(cpu, cpu_present_mask));
+ BUG_ON(cpu_present(cpu));
set_cpu_present(cpu, true);
set_hard_smp_processor_id(cpu, *intserv++);
}
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c
new file mode 100644
index 00000000000..c829e6067d5
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2010 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/irq.h>
+#include <asm/io_event_irq.h>
+
+#include "pseries.h"
+
+/*
+ * IO event interrupt is a mechanism provided by RTAS to return
+ * information about hardware error and non-error events. Device
+ * drivers can register their event handlers to receive events.
+ * Device drivers are expected to use atomic_notifier_chain_register()
+ * and atomic_notifier_chain_unregister() to register and unregister
+ * their event handlers. Since multiple IO event types and scopes
+ * share an IO event interrupt, the event handlers are called one
+ * by one until the IO event is claimed by one of the handlers.
+ * The event handlers are expected to return NOTIFY_OK if the
+ * event is handled by the event handler or NOTIFY_DONE if the
+ * event does not belong to the handler.
+ *
+ * Usage:
+ *
+ * Notifier function:
+ * #include <asm/io_event_irq.h>
+ * int event_handler(struct notifier_block *nb, unsigned long val, void *data) {
+ * p = (struct pseries_io_event_sect_data *) data;
+ * if (! is_my_event(p->scope, p->event_type)) return NOTIFY_DONE;
+ * :
+ * :
+ * return NOTIFY_OK;
+ * }
+ * struct notifier_block event_nb = {
+ * .notifier_call = event_handler,
+ * }
+ *
+ * Registration:
+ * atomic_notifier_chain_register(&pseries_ioei_notifier_list, &event_nb);
+ *
+ * Unregistration:
+ * atomic_notifier_chain_unregister(&pseries_ioei_notifier_list, &event_nb);
+ */
+
+ATOMIC_NOTIFIER_HEAD(pseries_ioei_notifier_list);
+EXPORT_SYMBOL_GPL(pseries_ioei_notifier_list);
+
+static int ioei_check_exception_token;
+
+/* pSeries event log format */
+
+/* Two bytes ASCII section IDs */
+#define PSERIES_ELOG_SECT_ID_PRIV_HDR (('P' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_HDR (('U' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_PRIMARY_SRC (('P' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_EXTENDED_UH (('E' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FAILING_MTMS (('M' << 8) | 'T')
+#define PSERIES_ELOG_SECT_ID_SECONDARY_SRC (('S' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_DUMP_LOCATOR (('D' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FW_ERROR (('S' << 8) | 'W')
+#define PSERIES_ELOG_SECT_ID_IMPACT_PART_ID (('L' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_LOGIC_RESOURCE_ID (('L' << 8) | 'R')
+#define PSERIES_ELOG_SECT_ID_HMC_ID (('H' << 8) | 'M')
+#define PSERIES_ELOG_SECT_ID_EPOW (('E' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_IO_EVENT (('I' << 8) | 'E')
+#define PSERIES_ELOG_SECT_ID_MANUFACT_INFO (('M' << 8) | 'I')
+#define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_DEF (('U' << 8) | 'D')
+
+/* Vendor specific Platform Event Log Format, Version 6, section header */
+struct pseries_elog_section {
+ uint16_t id; /* 0x00 2-byte ASCII section ID */
+ uint16_t length; /* 0x02 Section length in bytes */
+ uint8_t version; /* 0x04 Section version */
+ uint8_t subtype; /* 0x05 Section subtype */
+ uint16_t creator_component; /* 0x06 Creator component ID */
+ uint8_t data[]; /* 0x08 Start of section data */
+};
+
+static char ioei_rtas_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned;
+
+/**
+ * Find data portion of a specific section in RTAS extended event log.
+ * @elog: RTAS error/event log.
+ * @sect_id: secsion ID.
+ *
+ * Return:
+ * pointer to the section data of the specified section
+ * NULL if not found
+ */
+static struct pseries_elog_section *find_xelog_section(struct rtas_error_log *elog,
+ uint16_t sect_id)
+{
+ struct rtas_ext_event_log_v6 *xelog =
+ (struct rtas_ext_event_log_v6 *) elog->buffer;
+ struct pseries_elog_section *sect;
+ unsigned char *p, *log_end;
+
+ /* Check that we understand the format */
+ if (elog->extended_log_length < sizeof(struct rtas_ext_event_log_v6) ||
+ xelog->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG ||
+ xelog->company_id != RTAS_V6EXT_COMPANY_ID_IBM)
+ return NULL;
+
+ log_end = elog->buffer + elog->extended_log_length;
+ p = xelog->vendor_log;
+ while (p < log_end) {
+ sect = (struct pseries_elog_section *)p;
+ if (sect->id == sect_id)
+ return sect;
+ p += sect->length;
+ }
+ return NULL;
+}
+
+/**
+ * Find the data portion of an IO Event section from event log.
+ * @elog: RTAS error/event log.
+ *
+ * Return:
+ * pointer to a valid IO event section data. NULL if not found.
+ */
+static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
+{
+ struct pseries_elog_section *sect;
+
+ /* We should only ever get called for io-event interrupts, but if
+ * we do get called for another type then something went wrong so
+ * make some noise about it.
+ * RTAS_TYPE_IO only exists in extended event log version 6 or later.
+ * No need to check event log version.
+ */
+ if (unlikely(elog->type != RTAS_TYPE_IO)) {
+ printk_once(KERN_WARNING "io_event_irq: Unexpected event type %d",
+ elog->type);
+ return NULL;
+ }
+
+ sect = find_xelog_section(elog, PSERIES_ELOG_SECT_ID_IO_EVENT);
+ if (unlikely(!sect)) {
+ printk_once(KERN_WARNING "io_event_irq: RTAS extended event "
+ "log does not contain an IO Event section. "
+ "Could be a bug in system firmware!\n");
+ return NULL;
+ }
+ return (struct pseries_io_event *) &sect->data;
+}
+
+/*
+ * PAPR:
+ * - check-exception returns the first found error or event and clear that
+ * error or event so it is reported once.
+ * - Each interrupt returns one event. If a plateform chooses to report
+ * multiple events through a single interrupt, it must ensure that the
+ * interrupt remains asserted until check-exception has been used to
+ * process all out-standing events for that interrupt.
+ *
+ * Implementation notes:
+ * - Events must be processed in the order they are returned. Hence,
+ * sequential in nature.
+ * - The owner of an event is determined by combinations of scope,
+ * event type, and sub-type. There is no easy way to pre-sort clients
+ * by scope or event type alone. For example, Torrent ISR route change
+ * event is reported with scope 0x00 (Not Applicatable) rather than
+ * 0x3B (Torrent-hub). It is better to let the clients to identify
+ * who owns the the event.
+ */
+
+static irqreturn_t ioei_interrupt(int irq, void *dev_id)
+{
+ struct pseries_io_event *event;
+ int rtas_rc;
+
+ for (;;) {
+ rtas_rc = rtas_call(ioei_check_exception_token, 6, 1, NULL,
+ RTAS_VECTOR_EXTERNAL_INTERRUPT,
+ virq_to_hw(irq),
+ RTAS_IO_EVENTS, 1 /* Time Critical */,
+ __pa(ioei_rtas_buf),
+ RTAS_DATA_BUF_SIZE);
+ if (rtas_rc != 0)
+ break;
+
+ event = ioei_find_event((struct rtas_error_log *)ioei_rtas_buf);
+ if (!event)
+ continue;
+
+ atomic_notifier_call_chain(&pseries_ioei_notifier_list,
+ 0, event);
+ }
+ return IRQ_HANDLED;
+}
+
+static int __init ioei_init(void)
+{
+ struct device_node *np;
+
+ ioei_check_exception_token = rtas_token("check-exception");
+ if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE) {
+ pr_warning("IO Event IRQ not supported on this system !\n");
+ return -ENODEV;
+ }
+ np = of_find_node_by_path("/event-sources/ibm,io-events");
+ if (np) {
+ request_event_sources_irqs(np, ioei_interrupt, "IO_EVENT");
+ of_node_put(np);
+ } else {
+ pr_err("io_event_irq: No ibm,io-events on system! "
+ "IO Event interrupt disabled.\n");
+ return -ENODEV;
+ }
+ return 0;
+}
+machine_subsys_initcall(pseries, ioei_init);
+
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 6d5412a18b2..01faab9456c 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -659,15 +659,18 @@ static void remove_ddw(struct device_node *np)
{
struct dynamic_dma_window_prop *dwp;
struct property *win64;
- const u32 *ddr_avail;
+ const u32 *ddw_avail;
u64 liobn;
int len, ret;
- ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len);
+ ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len);
win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
- if (!win64 || !ddr_avail || len < 3 * sizeof(u32))
+ if (!win64)
return;
+ if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp))
+ goto delprop;
+
dwp = win64->value;
liobn = (u64)be32_to_cpu(dwp->liobn);
@@ -681,28 +684,29 @@ static void remove_ddw(struct device_node *np)
pr_debug("%s successfully cleared tces in window.\n",
np->full_name);
- ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn);
+ ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
if (ret)
pr_warning("%s: failed to remove direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np->full_name, ret, ddr_avail[2], liobn);
+ np->full_name, ret, ddw_avail[2], liobn);
else
pr_debug("%s: successfully removed direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np->full_name, ret, ddr_avail[2], liobn);
-}
+ np->full_name, ret, ddw_avail[2], liobn);
+delprop:
+ ret = prom_remove_property(np, win64);
+ if (ret)
+ pr_warning("%s: failed to remove direct window property: %d\n",
+ np->full_name, ret);
+}
-static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn)
+static u64 find_existing_ddw(struct device_node *pdn)
{
- struct device_node *dn;
- struct pci_dn *pcidn;
struct direct_window *window;
const struct dynamic_dma_window_prop *direct64;
u64 dma_addr = 0;
- dn = pci_device_to_OF_node(dev);
- pcidn = PCI_DN(dn);
spin_lock(&direct_window_list_lock);
/* check if we already created a window and dupe that config if so */
list_for_each_entry(window, &direct_window_list, list) {
@@ -717,36 +721,40 @@ static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *
return dma_addr;
}
-static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn)
+static int find_existing_ddw_windows(void)
{
- struct device_node *dn;
- struct pci_dn *pcidn;
int len;
+ struct device_node *pdn;
struct direct_window *window;
const struct dynamic_dma_window_prop *direct64;
- u64 dma_addr = 0;
- dn = pci_device_to_OF_node(dev);
- pcidn = PCI_DN(dn);
- direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
- if (direct64) {
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ return 0;
+
+ for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
+ direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
+ if (!direct64)
+ continue;
+
window = kzalloc(sizeof(*window), GFP_KERNEL);
- if (!window) {
+ if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
+ kfree(window);
remove_ddw(pdn);
- } else {
- window->device = pdn;
- window->prop = direct64;
- spin_lock(&direct_window_list_lock);
- list_add(&window->list, &direct_window_list);
- spin_unlock(&direct_window_list_lock);
- dma_addr = direct64->dma_base;
+ continue;
}
+
+ window->device = pdn;
+ window->prop = direct64;
+ spin_lock(&direct_window_list_lock);
+ list_add(&window->list, &direct_window_list);
+ spin_unlock(&direct_window_list_lock);
}
- return dma_addr;
+ return 0;
}
+machine_arch_initcall(pseries, find_existing_ddw_windows);
-static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
+static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
struct ddw_query_response *query)
{
struct device_node *dn;
@@ -767,15 +775,15 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
if (pcidn->eeh_pe_config_addr)
cfg_addr = pcidn->eeh_pe_config_addr;
buid = pcidn->phb->buid;
- ret = rtas_call(ddr_avail[0], 3, 5, (u32 *)query,
+ ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
cfg_addr, BUID_HI(buid), BUID_LO(buid));
dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
- " returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid),
+ " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
BUID_LO(buid), ret);
return ret;
}
-static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail,
+static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
struct ddw_create_response *create, int page_shift,
int window_shift)
{
@@ -800,12 +808,12 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail,
do {
/* extra outputs are LIOBN and dma-addr (hi, lo) */
- ret = rtas_call(ddr_avail[1], 5, 4, (u32 *)create, cfg_addr,
+ ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr,
BUID_HI(buid), BUID_LO(buid), page_shift, window_shift);
} while (rtas_busy_delay(ret));
dev_info(&dev->dev,
"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
- "(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1],
+ "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
@@ -831,18 +839,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
int page_shift;
u64 dma_addr, max_addr;
struct device_node *dn;
- const u32 *uninitialized_var(ddr_avail);
+ const u32 *uninitialized_var(ddw_avail);
struct direct_window *window;
- struct property *uninitialized_var(win64);
+ struct property *win64;
struct dynamic_dma_window_prop *ddwprop;
mutex_lock(&direct_window_init_mutex);
- dma_addr = dupe_ddw_if_already_created(dev, pdn);
- if (dma_addr != 0)
- goto out_unlock;
-
- dma_addr = dupe_ddw_if_kexec(dev, pdn);
+ dma_addr = find_existing_ddw(pdn);
if (dma_addr != 0)
goto out_unlock;
@@ -854,8 +858,8 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* for the given node in that order.
* the property is actually in the parent, not the PE
*/
- ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
- if (!ddr_avail || len < 3 * sizeof(u32))
+ ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
+ if (!ddw_avail || len < 3 * sizeof(u32))
goto out_unlock;
/*
@@ -865,7 +869,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* of page sizes: supported and supported for migrate-dma.
*/
dn = pci_device_to_OF_node(dev);
- ret = query_ddw(dev, ddr_avail, &query);
+ ret = query_ddw(dev, ddw_avail, &query);
if (ret != 0)
goto out_unlock;
@@ -907,13 +911,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
}
win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
+ win64->length = sizeof(*ddwprop);
if (!win64->name || !win64->value) {
dev_info(&dev->dev,
"couldn't allocate property name and value\n");
goto out_free_prop;
}
- ret = create_ddw(dev, ddr_avail, &create, page_shift, len);
+ ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
if (ret != 0)
goto out_free_prop;
@@ -1021,13 +1026,16 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
const void *dma_window = NULL;
u64 dma_offset;
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+ if (!dev->dma_mask)
return -EIO;
+ if (!dev_is_pci(dev))
+ goto check_mask;
+
+ pdev = to_pci_dev(dev);
+
/* only attempt to use a new window if 64-bit DMA is requested */
if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
- pdev = to_pci_dev(dev);
-
dn = pci_device_to_OF_node(pdev);
dev_dbg(dev, "node is %s\n", dn->full_name);
@@ -1054,12 +1062,17 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
}
}
- /* fall-through to iommu ops */
- if (!ddw_enabled) {
- dev_info(dev, "Using 32-bit DMA via iommu\n");
+ /* fall back on iommu ops, restore table pointer with ops */
+ if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) {
+ dev_info(dev, "Restoring 32-bit DMA via iommu\n");
set_dma_ops(dev, &dma_iommu_ops);
+ pci_dma_dev_setup_pSeriesLP(pdev);
}
+check_mask:
+ if (!dma_supported(dev, dma_mask))
+ return -EIO;
+
*dev->dma_mask = dma_mask;
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 77d38a5e2ff..54cf3a4aa16 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -7,15 +7,18 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
#include <asm/machdep.h>
#include <asm/page.h>
#include <asm/firmware.h>
#include <asm/kexec.h>
#include <asm/mpic.h>
+#include <asm/xics.h>
#include <asm/smp.h>
#include "pseries.h"
-#include "xics.h"
#include "plpar_wrappers.h"
static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index ca5d5898d32..39e6e0a7b2f 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -329,6 +329,8 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
/* Make pHyp happy */
if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU))
hpte_r &= ~_PAGE_COHERENT;
+ if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
+ flags |= H_COALESCE_CAND;
lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
if (unlikely(lpar_rc == H_PTEG_FULL)) {
@@ -573,7 +575,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
unsigned long i, pix, rc;
unsigned long flags = 0;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
unsigned long param[9];
unsigned long va;
unsigned long hash, index, shift, hidx, slot;
@@ -771,3 +773,47 @@ out:
local_irq_restore(flags);
}
#endif
+
+/**
+ * h_get_mpp
+ * H_GET_MPP hcall returns info in 7 parms
+ */
+int h_get_mpp(struct hvcall_mpp_data *mpp_data)
+{
+ int rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+ rc = plpar_hcall9(H_GET_MPP, retbuf);
+
+ mpp_data->entitled_mem = retbuf[0];
+ mpp_data->mapped_mem = retbuf[1];
+
+ mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+ mpp_data->pool_num = retbuf[2] & 0xffff;
+
+ mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
+ mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
+ mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
+
+ mpp_data->pool_size = retbuf[4];
+ mpp_data->loan_request = retbuf[5];
+ mpp_data->backing_mem = retbuf[6];
+
+ return rc;
+}
+EXPORT_SYMBOL(h_get_mpp);
+
+int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
+{
+ int rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 };
+
+ rc = plpar_hcall9(H_GET_MPP_X, retbuf);
+
+ mpp_x_data->coalesced_bytes = retbuf[0];
+ mpp_x_data->pool_coalesced_bytes = retbuf[1];
+ mpp_x_data->pool_purr_cycles = retbuf[2];
+ mpp_x_data->pool_spurr_cycles = retbuf[3];
+
+ return rc;
+}
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index d9801117124..4bf21207d7d 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -270,31 +270,4 @@ static inline long plpar_put_term_char(unsigned long termno, unsigned long len,
lbuf[1]);
}
-static inline long plpar_eoi(unsigned long xirr)
-{
- return plpar_hcall_norets(H_EOI, xirr);
-}
-
-static inline long plpar_cppr(unsigned long cppr)
-{
- return plpar_hcall_norets(H_CPPR, cppr);
-}
-
-static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr)
-{
- return plpar_hcall_norets(H_IPI, servernum, mfrr);
-}
-
-static inline long plpar_xirr(unsigned long *xirr_ret, unsigned char cppr)
-{
- long rc;
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- rc = plpar_hcall(H_XIRR, retbuf, cppr);
-
- *xirr_ret = retbuf[0];
-
- return rc;
-}
-
#endif /* _PSERIES_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index c55d7ad9c64..086d2ae4e06 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -122,7 +122,7 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
RTAS_VECTOR_EXTERNAL_INTERRUPT,
- irq_map[irq].hwirq,
+ virq_to_hw(irq),
RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS,
critical, __pa(&ras_log_buf),
rtas_get_error_log_max());
@@ -157,7 +157,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
RTAS_VECTOR_EXTERNAL_INTERRUPT,
- irq_map[irq].hwirq,
+ virq_to_hw(irq),
RTAS_INTERNAL_ERROR, 1 /*Time Critical */,
__pa(&ras_log_buf),
rtas_get_error_log_max());
@@ -227,7 +227,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
struct rtas_error_log *h, *errhdr = NULL;
if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
- printk(KERN_ERR "FWNMI: corrupt r3\n");
+ printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
return NULL;
}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 6c42cfde841..593acceeff9 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -53,9 +53,9 @@
#include <asm/irq.h>
#include <asm/time.h>
#include <asm/nvram.h>
-#include "xics.h"
#include <asm/pmc.h>
#include <asm/mpic.h>
+#include <asm/xics.h>
#include <asm/ppc-pci.h>
#include <asm/i8259.h>
#include <asm/udbg.h>
@@ -205,6 +205,9 @@ static void __init pseries_mpic_init_IRQ(void)
mpic_assign_isu(mpic, n, isuaddr);
}
+ /* Setup top-level get_irq */
+ ppc_md.get_irq = mpic_get_irq;
+
/* All ISUs are setup, complete initialization */
mpic_init(mpic);
@@ -214,7 +217,7 @@ static void __init pseries_mpic_init_IRQ(void)
static void __init pseries_xics_init_IRQ(void)
{
- xics_init_IRQ();
+ xics_init();
pseries_setup_i8259_cascade();
}
@@ -238,7 +241,6 @@ static void __init pseries_discover_pic(void)
if (strstr(typep, "open-pic")) {
pSeries_mpic_node = of_node_get(np);
ppc_md.init_IRQ = pseries_mpic_init_IRQ;
- ppc_md.get_irq = mpic_get_irq;
setup_kexec_cpu_down_mpic();
smp_init_pseries_mpic();
return;
@@ -276,6 +278,8 @@ static struct notifier_block pci_dn_reconfig_nb = {
.notifier_call = pci_dn_reconfig_notifier,
};
+struct kmem_cache *dtl_cache;
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
/*
* Allocate space for the dispatch trace log for all possible cpus
@@ -287,18 +291,12 @@ static int alloc_dispatch_logs(void)
int cpu, ret;
struct paca_struct *pp;
struct dtl_entry *dtl;
- struct kmem_cache *dtl_cache;
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return 0;
- dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
- DISPATCH_LOG_BYTES, 0, NULL);
- if (!dtl_cache) {
- pr_warn("Failed to create dispatch trace log buffer cache\n");
- pr_warn("Stolen time statistics will be unreliable\n");
+ if (!dtl_cache)
return 0;
- }
for_each_possible_cpu(cpu) {
pp = &paca[cpu];
@@ -332,10 +330,27 @@ static int alloc_dispatch_logs(void)
return 0;
}
-
-early_initcall(alloc_dispatch_logs);
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+static inline int alloc_dispatch_logs(void)
+{
+ return 0;
+}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+static int alloc_dispatch_log_kmem_cache(void)
+{
+ dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
+ DISPATCH_LOG_BYTES, 0, NULL);
+ if (!dtl_cache) {
+ pr_warn("Failed to create dispatch trace log buffer cache\n");
+ pr_warn("Stolen time statistics will be unreliable\n");
+ return 0;
+ }
+
+ return alloc_dispatch_logs();
+}
+early_initcall(alloc_dispatch_log_kmem_cache);
+
static void __init pSeries_setup_arch(void)
{
/* Discover PIC type and setup ppc_md accordingly */
@@ -403,6 +418,16 @@ static int pseries_set_xdabr(unsigned long dabr)
#define CMO_CHARACTERISTICS_TOKEN 44
#define CMO_MAXLENGTH 1026
+void pSeries_coalesce_init(void)
+{
+ struct hvcall_mpp_x_data mpp_x_data;
+
+ if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
+ powerpc_firmware_features |= FW_FEATURE_XCMO;
+ else
+ powerpc_firmware_features &= ~FW_FEATURE_XCMO;
+}
+
/**
* fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
* handle that here. (Stolen from parse_system_parameter_string)
@@ -472,6 +497,7 @@ void pSeries_cmo_feature_init(void)
pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
CMO_SecPSP);
powerpc_firmware_features |= FW_FEATURE_CMO;
+ pSeries_coalesce_init();
} else
pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
CMO_SecPSP);
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index a509c5292a6..fbffd7e47ab 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -44,10 +44,11 @@
#include <asm/mpic.h>
#include <asm/vdso_datapage.h>
#include <asm/cputhreads.h>
+#include <asm/mpic.h>
+#include <asm/xics.h>
#include "plpar_wrappers.h"
#include "pseries.h"
-#include "xics.h"
#include "offline_states.h"
@@ -136,7 +137,6 @@ out:
return 1;
}
-#ifdef CONFIG_XICS
static void __devinit smp_xics_setup_cpu(int cpu)
{
if (cpu != boot_cpuid)
@@ -151,14 +151,13 @@ static void __devinit smp_xics_setup_cpu(int cpu)
set_default_offline_state(cpu);
#endif
}
-#endif /* CONFIG_XICS */
-static void __devinit smp_pSeries_kick_cpu(int nr)
+static int __devinit smp_pSeries_kick_cpu(int nr)
{
BUG_ON(nr < 0 || nr >= NR_CPUS);
if (!smp_startup_cpu(nr))
- return;
+ return -ENOENT;
/*
* The processor is currently spinning, waiting for the
@@ -180,6 +179,8 @@ static void __devinit smp_pSeries_kick_cpu(int nr)
"Ret= %ld\n", nr, rc);
}
#endif
+
+ return 0;
}
static int smp_pSeries_cpu_bootable(unsigned int nr)
@@ -197,23 +198,22 @@ static int smp_pSeries_cpu_bootable(unsigned int nr)
return 1;
}
-#ifdef CONFIG_MPIC
+
static struct smp_ops_t pSeries_mpic_smp_ops = {
.message_pass = smp_mpic_message_pass,
.probe = smp_mpic_probe,
.kick_cpu = smp_pSeries_kick_cpu,
.setup_cpu = smp_mpic_setup_cpu,
};
-#endif
-#ifdef CONFIG_XICS
+
static struct smp_ops_t pSeries_xics_smp_ops = {
- .message_pass = smp_xics_message_pass,
- .probe = smp_xics_probe,
+ .message_pass = smp_muxed_ipi_message_pass,
+ .cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */
+ .probe = xics_smp_probe,
.kick_cpu = smp_pSeries_kick_cpu,
.setup_cpu = smp_xics_setup_cpu,
.cpu_bootable = smp_pSeries_cpu_bootable,
};
-#endif
/* This is called very early */
static void __init smp_init_pseries(void)
@@ -245,14 +245,12 @@ static void __init smp_init_pseries(void)
pr_debug(" <- smp_init_pSeries()\n");
}
-#ifdef CONFIG_MPIC
void __init smp_init_pseries_mpic(void)
{
smp_ops = &pSeries_mpic_smp_ops;
smp_init_pseries();
}
-#endif
void __init smp_init_pseries_xics(void)
{
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
deleted file mode 100644
index d6901334d66..00000000000
--- a/arch/powerpc/platforms/pseries/xics.c
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- * arch/powerpc/platforms/pseries/xics.c
- *
- * Copyright 2000 IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/types.h>
-#include <linux/threads.h>
-#include <linux/kernel.h>
-#include <linux/irq.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/radix-tree.h>
-#include <linux/cpu.h>
-#include <linux/msi.h>
-#include <linux/of.h>
-#include <linux/percpu.h>
-
-#include <asm/firmware.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/smp.h>
-#include <asm/rtas.h>
-#include <asm/hvcall.h>
-#include <asm/machdep.h>
-
-#include "xics.h"
-#include "plpar_wrappers.h"
-
-static struct irq_host *xics_host;
-
-#define XICS_IPI 2
-#define XICS_IRQ_SPURIOUS 0
-
-/* Want a priority other than 0. Various HW issues require this. */
-#define DEFAULT_PRIORITY 5
-
-/*
- * Mark IPIs as higher priority so we can take them inside interrupts that
- * arent marked IRQF_DISABLED
- */
-#define IPI_PRIORITY 4
-
-/* The least favored priority */
-#define LOWEST_PRIORITY 0xFF
-
-/* The number of priorities defined above */
-#define MAX_NUM_PRIORITIES 3
-
-static unsigned int default_server = 0xFF;
-static unsigned int default_distrib_server = 0;
-static unsigned int interrupt_server_size = 8;
-
-/* RTAS service tokens */
-static int ibm_get_xive;
-static int ibm_set_xive;
-static int ibm_int_on;
-static int ibm_int_off;
-
-struct xics_cppr {
- unsigned char stack[MAX_NUM_PRIORITIES];
- int index;
-};
-
-static DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
-
-/* Direct hardware low level accessors */
-
-/* The part of the interrupt presentation layer that we care about */
-struct xics_ipl {
- union {
- u32 word;
- u8 bytes[4];
- } xirr_poll;
- union {
- u32 word;
- u8 bytes[4];
- } xirr;
- u32 dummy;
- union {
- u32 word;
- u8 bytes[4];
- } qirr;
-};
-
-static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS];
-
-static inline unsigned int direct_xirr_info_get(void)
-{
- int cpu = smp_processor_id();
-
- return in_be32(&xics_per_cpu[cpu]->xirr.word);
-}
-
-static inline void direct_xirr_info_set(unsigned int value)
-{
- int cpu = smp_processor_id();
-
- out_be32(&xics_per_cpu[cpu]->xirr.word, value);
-}
-
-static inline void direct_cppr_info(u8 value)
-{
- int cpu = smp_processor_id();
-
- out_8(&xics_per_cpu[cpu]->xirr.bytes[0], value);
-}
-
-static inline void direct_qirr_info(int n_cpu, u8 value)
-{
- out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value);
-}
-
-
-/* LPAR low level accessors */
-
-static inline unsigned int lpar_xirr_info_get(unsigned char cppr)
-{
- unsigned long lpar_rc;
- unsigned long return_value;
-
- lpar_rc = plpar_xirr(&return_value, cppr);
- if (lpar_rc != H_SUCCESS)
- panic(" bad return code xirr - rc = %lx\n", lpar_rc);
- return (unsigned int)return_value;
-}
-
-static inline void lpar_xirr_info_set(unsigned int value)
-{
- unsigned long lpar_rc;
-
- lpar_rc = plpar_eoi(value);
- if (lpar_rc != H_SUCCESS)
- panic("bad return code EOI - rc = %ld, value=%x\n", lpar_rc,
- value);
-}
-
-static inline void lpar_cppr_info(u8 value)
-{
- unsigned long lpar_rc;
-
- lpar_rc = plpar_cppr(value);
- if (lpar_rc != H_SUCCESS)
- panic("bad return code cppr - rc = %lx\n", lpar_rc);
-}
-
-static inline void lpar_qirr_info(int n_cpu , u8 value)
-{
- unsigned long lpar_rc;
-
- lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu), value);
- if (lpar_rc != H_SUCCESS)
- panic("bad return code qirr - rc = %lx\n", lpar_rc);
-}
-
-
-/* Interface to generic irq subsystem */
-
-#ifdef CONFIG_SMP
-/*
- * For the moment we only implement delivery to all cpus or one cpu.
- *
- * If the requested affinity is cpu_all_mask, we set global affinity.
- * If not we set it to the first cpu in the mask, even if multiple cpus
- * are set. This is so things like irqbalance (which set core and package
- * wide affinities) do the right thing.
- */
-static int get_irq_server(unsigned int virq, const struct cpumask *cpumask,
- unsigned int strict_check)
-{
-
- if (!distribute_irqs)
- return default_server;
-
- if (!cpumask_subset(cpu_possible_mask, cpumask)) {
- int server = cpumask_first_and(cpu_online_mask, cpumask);
-
- if (server < nr_cpu_ids)
- return get_hard_smp_processor_id(server);
-
- if (strict_check)
- return -1;
- }
-
- /*
- * Workaround issue with some versions of JS20 firmware that
- * deliver interrupts to cpus which haven't been started. This
- * happens when using the maxcpus= boot option.
- */
- if (cpumask_equal(cpu_online_mask, cpu_present_mask))
- return default_distrib_server;
-
- return default_server;
-}
-#else
-#define get_irq_server(virq, cpumask, strict_check) (default_server)
-#endif
-
-static void xics_unmask_irq(struct irq_data *d)
-{
- unsigned int hwirq;
- int call_status;
- int server;
-
- pr_devel("xics: unmask virq %d\n", d->irq);
-
- hwirq = (unsigned int)irq_map[d->irq].hwirq;
- pr_devel(" -> map to hwirq 0x%x\n", hwirq);
- if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
- return;
-
- server = get_irq_server(d->irq, d->affinity, 0);
-
- call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq, server,
- DEFAULT_PRIORITY);
- if (call_status != 0) {
- printk(KERN_ERR
- "%s: ibm_set_xive irq %u server %x returned %d\n",
- __func__, hwirq, server, call_status);
- return;
- }
-
- /* Now unmask the interrupt (often a no-op) */
- call_status = rtas_call(ibm_int_on, 1, 1, NULL, hwirq);
- if (call_status != 0) {
- printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
- __func__, hwirq, call_status);
- return;
- }
-}
-
-static unsigned int xics_startup(struct irq_data *d)
-{
- /*
- * The generic MSI code returns with the interrupt disabled on the
- * card, using the MSI mask bits. Firmware doesn't appear to unmask
- * at that level, so we do it here by hand.
- */
- if (d->msi_desc)
- unmask_msi_irq(d);
-
- /* unmask it */
- xics_unmask_irq(d);
- return 0;
-}
-
-static void xics_mask_real_irq(unsigned int hwirq)
-{
- int call_status;
-
- if (hwirq == XICS_IPI)
- return;
-
- call_status = rtas_call(ibm_int_off, 1, 1, NULL, hwirq);
- if (call_status != 0) {
- printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
- __func__, hwirq, call_status);
- return;
- }
-
- /* Have to set XIVE to 0xff to be able to remove a slot */
- call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq,
- default_server, 0xff);
- if (call_status != 0) {
- printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
- __func__, hwirq, call_status);
- return;
- }
-}
-
-static void xics_mask_irq(struct irq_data *d)
-{
- unsigned int hwirq;
-
- pr_devel("xics: mask virq %d\n", d->irq);
-
- hwirq = (unsigned int)irq_map[d->irq].hwirq;
- if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
- return;
- xics_mask_real_irq(hwirq);
-}
-
-static void xics_mask_unknown_vec(unsigned int vec)
-{
- printk(KERN_ERR "Interrupt %u (real) is invalid, disabling it.\n", vec);
- xics_mask_real_irq(vec);
-}
-
-static inline unsigned int xics_xirr_vector(unsigned int xirr)
-{
- /*
- * The top byte is the old cppr, to be restored on EOI.
- * The remaining 24 bits are the vector.
- */
- return xirr & 0x00ffffff;
-}
-
-static void push_cppr(unsigned int vec)
-{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
- if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
- return;
-
- if (vec == XICS_IPI)
- os_cppr->stack[++os_cppr->index] = IPI_PRIORITY;
- else
- os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY;
-}
-
-static unsigned int xics_get_irq_direct(void)
-{
- unsigned int xirr = direct_xirr_info_get();
- unsigned int vec = xics_xirr_vector(xirr);
- unsigned int irq;
-
- if (vec == XICS_IRQ_SPURIOUS)
- return NO_IRQ;
-
- irq = irq_radix_revmap_lookup(xics_host, vec);
- if (likely(irq != NO_IRQ)) {
- push_cppr(vec);
- return irq;
- }
-
- /* We don't have a linux mapping, so have rtas mask it. */
- xics_mask_unknown_vec(vec);
-
- /* We might learn about it later, so EOI it */
- direct_xirr_info_set(xirr);
- return NO_IRQ;
-}
-
-static unsigned int xics_get_irq_lpar(void)
-{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
- unsigned int xirr = lpar_xirr_info_get(os_cppr->stack[os_cppr->index]);
- unsigned int vec = xics_xirr_vector(xirr);
- unsigned int irq;
-
- if (vec == XICS_IRQ_SPURIOUS)
- return NO_IRQ;
-
- irq = irq_radix_revmap_lookup(xics_host, vec);
- if (likely(irq != NO_IRQ)) {
- push_cppr(vec);
- return irq;
- }
-
- /* We don't have a linux mapping, so have RTAS mask it. */
- xics_mask_unknown_vec(vec);
-
- /* We might learn about it later, so EOI it */
- lpar_xirr_info_set(xirr);
- return NO_IRQ;
-}
-
-static unsigned char pop_cppr(void)
-{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
- if (WARN_ON(os_cppr->index < 1))
- return LOWEST_PRIORITY;
-
- return os_cppr->stack[--os_cppr->index];
-}
-
-static void xics_eoi_direct(struct irq_data *d)
-{
- unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq;
-
- iosync();
- direct_xirr_info_set((pop_cppr() << 24) | hwirq);
-}
-
-static void xics_eoi_lpar(struct irq_data *d)
-{
- unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq;
-
- iosync();
- lpar_xirr_info_set((pop_cppr() << 24) | hwirq);
-}
-
-static int
-xics_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force)
-{
- unsigned int hwirq;
- int status;
- int xics_status[2];
- int irq_server;
-
- hwirq = (unsigned int)irq_map[d->irq].hwirq;
- if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
- return -1;
-
- status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq);
-
- if (status) {
- printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
- __func__, hwirq, status);
- return -1;
- }
-
- irq_server = get_irq_server(d->irq, cpumask, 1);
- if (irq_server == -1) {
- char cpulist[128];
- cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
- printk(KERN_WARNING
- "%s: No online cpus in the mask %s for irq %d\n",
- __func__, cpulist, d->irq);
- return -1;
- }
-
- status = rtas_call(ibm_set_xive, 3, 1, NULL,
- hwirq, irq_server, xics_status[1]);
-
- if (status) {
- printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
- __func__, hwirq, status);
- return -1;
- }
-
- return 0;
-}
-
-static struct irq_chip xics_pic_direct = {
- .name = "XICS",
- .irq_startup = xics_startup,
- .irq_mask = xics_mask_irq,
- .irq_unmask = xics_unmask_irq,
- .irq_eoi = xics_eoi_direct,
- .irq_set_affinity = xics_set_affinity
-};
-
-static struct irq_chip xics_pic_lpar = {
- .name = "XICS",
- .irq_startup = xics_startup,
- .irq_mask = xics_mask_irq,
- .irq_unmask = xics_unmask_irq,
- .irq_eoi = xics_eoi_lpar,
- .irq_set_affinity = xics_set_affinity
-};
-
-
-/* Interface to arch irq controller subsystem layer */
-
-/* Points to the irq_chip we're actually using */
-static struct irq_chip *xics_irq_chip;
-
-static int xics_host_match(struct irq_host *h, struct device_node *node)
-{
- /* IBM machines have interrupt parents of various funky types for things
- * like vdevices, events, etc... The trick we use here is to match
- * everything here except the legacy 8259 which is compatible "chrp,iic"
- */
- return !of_device_is_compatible(node, "chrp,iic");
-}
-
-static int xics_host_map(struct irq_host *h, unsigned int virq,
- irq_hw_number_t hw)
-{
- pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
-
- /* Insert the interrupt mapping into the radix tree for fast lookup */
- irq_radix_revmap_insert(xics_host, virq, hw);
-
- irq_set_status_flags(virq, IRQ_LEVEL);
- irq_set_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq);
- return 0;
-}
-
-static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
- const u32 *intspec, unsigned int intsize,
- irq_hw_number_t *out_hwirq, unsigned int *out_flags)
-
-{
- /* Current xics implementation translates everything
- * to level. It is not technically right for MSIs but this
- * is irrelevant at this point. We might get smarter in the future
- */
- *out_hwirq = intspec[0];
- *out_flags = IRQ_TYPE_LEVEL_LOW;
-
- return 0;
-}
-
-static struct irq_host_ops xics_host_ops = {
- .match = xics_host_match,
- .map = xics_host_map,
- .xlate = xics_host_xlate,
-};
-
-static void __init xics_init_host(void)
-{
- if (firmware_has_feature(FW_FEATURE_LPAR))
- xics_irq_chip = &xics_pic_lpar;
- else
- xics_irq_chip = &xics_pic_direct;
-
- xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops,
- XICS_IRQ_SPURIOUS);
- BUG_ON(xics_host == NULL);
- irq_set_default_host(xics_host);
-}
-
-
-/* Inter-processor interrupt support */
-
-#ifdef CONFIG_SMP
-/*
- * XICS only has a single IPI, so encode the messages per CPU
- */
-static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message);
-
-static inline void smp_xics_do_message(int cpu, int msg)
-{
- unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
-
- set_bit(msg, tgt);
- mb();
- if (firmware_has_feature(FW_FEATURE_LPAR))
- lpar_qirr_info(cpu, IPI_PRIORITY);
- else
- direct_qirr_info(cpu, IPI_PRIORITY);
-}
-
-void smp_xics_message_pass(int target, int msg)
-{
- unsigned int i;
-
- if (target < NR_CPUS) {
- smp_xics_do_message(target, msg);
- } else {
- for_each_online_cpu(i) {
- if (target == MSG_ALL_BUT_SELF
- && i == smp_processor_id())
- continue;
- smp_xics_do_message(i, msg);
- }
- }
-}
-
-static irqreturn_t xics_ipi_dispatch(int cpu)
-{
- unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
-
- mb(); /* order mmio clearing qirr */
- while (*tgt) {
- if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) {
- smp_message_recv(PPC_MSG_CALL_FUNCTION);
- }
- if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) {
- smp_message_recv(PPC_MSG_RESCHEDULE);
- }
- if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) {
- smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE);
- }
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
- if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) {
- smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
- }
-#endif
- }
- return IRQ_HANDLED;
-}
-
-static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id)
-{
- int cpu = smp_processor_id();
-
- direct_qirr_info(cpu, 0xff);
-
- return xics_ipi_dispatch(cpu);
-}
-
-static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id)
-{
- int cpu = smp_processor_id();
-
- lpar_qirr_info(cpu, 0xff);
-
- return xics_ipi_dispatch(cpu);
-}
-
-static void xics_request_ipi(void)
-{
- unsigned int ipi;
- int rc;
-
- ipi = irq_create_mapping(xics_host, XICS_IPI);
- BUG_ON(ipi == NO_IRQ);
-
- /*
- * IPIs are marked IRQF_DISABLED as they must run with irqs
- * disabled
- */
- irq_set_handler(ipi, handle_percpu_irq);
- if (firmware_has_feature(FW_FEATURE_LPAR))
- rc = request_irq(ipi, xics_ipi_action_lpar,
- IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
- else
- rc = request_irq(ipi, xics_ipi_action_direct,
- IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
- BUG_ON(rc);
-}
-
-int __init smp_xics_probe(void)
-{
- xics_request_ipi();
-
- return cpumask_weight(cpu_possible_mask);
-}
-
-#endif /* CONFIG_SMP */
-
-
-/* Initialization */
-
-static void xics_update_irq_servers(void)
-{
- int i, j;
- struct device_node *np;
- u32 ilen;
- const u32 *ireg;
- u32 hcpuid;
-
- /* Find the server numbers for the boot cpu. */
- np = of_get_cpu_node(boot_cpuid, NULL);
- BUG_ON(!np);
-
- ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
- if (!ireg) {
- of_node_put(np);
- return;
- }
-
- i = ilen / sizeof(int);
- hcpuid = get_hard_smp_processor_id(boot_cpuid);
-
- /* Global interrupt distribution server is specified in the last
- * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last
- * entry fom this property for current boot cpu id and use it as
- * default distribution server
- */
- for (j = 0; j < i; j += 2) {
- if (ireg[j] == hcpuid) {
- default_server = hcpuid;
- default_distrib_server = ireg[j+1];
- }
- }
-
- of_node_put(np);
-}
-
-static void __init xics_map_one_cpu(int hw_id, unsigned long addr,
- unsigned long size)
-{
- int i;
-
- /* This may look gross but it's good enough for now, we don't quite
- * have a hard -> linux processor id matching.
- */
- for_each_possible_cpu(i) {
- if (!cpu_present(i))
- continue;
- if (hw_id == get_hard_smp_processor_id(i)) {
- xics_per_cpu[i] = ioremap(addr, size);
- return;
- }
- }
-}
-
-static void __init xics_init_one_node(struct device_node *np,
- unsigned int *indx)
-{
- unsigned int ilen;
- const u32 *ireg;
-
- /* This code does the theorically broken assumption that the interrupt
- * server numbers are the same as the hard CPU numbers.
- * This happens to be the case so far but we are playing with fire...
- * should be fixed one of these days. -BenH.
- */
- ireg = of_get_property(np, "ibm,interrupt-server-ranges", NULL);
-
- /* Do that ever happen ? we'll know soon enough... but even good'old
- * f80 does have that property ..
- */
- WARN_ON(ireg == NULL);
- if (ireg) {
- /*
- * set node starting index for this node
- */
- *indx = *ireg;
- }
- ireg = of_get_property(np, "reg", &ilen);
- if (!ireg)
- panic("xics_init_IRQ: can't find interrupt reg property");
-
- while (ilen >= (4 * sizeof(u32))) {
- unsigned long addr, size;
-
- /* XXX Use proper OF parsing code here !!! */
- addr = (unsigned long)*ireg++ << 32;
- ilen -= sizeof(u32);
- addr |= *ireg++;
- ilen -= sizeof(u32);
- size = (unsigned long)*ireg++ << 32;
- ilen -= sizeof(u32);
- size |= *ireg++;
- ilen -= sizeof(u32);
- xics_map_one_cpu(*indx, addr, size);
- (*indx)++;
- }
-}
-
-void __init xics_init_IRQ(void)
-{
- struct device_node *np;
- u32 indx = 0;
- int found = 0;
- const u32 *isize;
-
- ppc64_boot_msg(0x20, "XICS Init");
-
- ibm_get_xive = rtas_token("ibm,get-xive");
- ibm_set_xive = rtas_token("ibm,set-xive");
- ibm_int_on = rtas_token("ibm,int-on");
- ibm_int_off = rtas_token("ibm,int-off");
-
- for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") {
- found = 1;
- if (firmware_has_feature(FW_FEATURE_LPAR)) {
- of_node_put(np);
- break;
- }
- xics_init_one_node(np, &indx);
- }
- if (found == 0)
- return;
-
- /* get the bit size of server numbers */
- found = 0;
-
- for_each_compatible_node(np, NULL, "ibm,ppc-xics") {
- isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
-
- if (!isize)
- continue;
-
- if (!found) {
- interrupt_server_size = *isize;
- found = 1;
- } else if (*isize != interrupt_server_size) {
- printk(KERN_WARNING "XICS: "
- "mismatched ibm,interrupt-server#-size\n");
- interrupt_server_size = max(*isize,
- interrupt_server_size);
- }
- }
-
- xics_update_irq_servers();
- xics_init_host();
-
- if (firmware_has_feature(FW_FEATURE_LPAR))
- ppc_md.get_irq = xics_get_irq_lpar;
- else
- ppc_md.get_irq = xics_get_irq_direct;
-
- xics_setup_cpu();
-
- ppc64_boot_msg(0x21, "XICS Done");
-}
-
-/* Cpu startup, shutdown, and hotplug */
-
-static void xics_set_cpu_priority(unsigned char cppr)
-{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
- /*
- * we only really want to set the priority when there's
- * just one cppr value on the stack
- */
- WARN_ON(os_cppr->index != 0);
-
- os_cppr->stack[0] = cppr;
-
- if (firmware_has_feature(FW_FEATURE_LPAR))
- lpar_cppr_info(cppr);
- else
- direct_cppr_info(cppr);
- iosync();
-}
-
-/* Have the calling processor join or leave the specified global queue */
-static void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
-{
- int index;
- int status;
-
- if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
- return;
-
- index = (1UL << interrupt_server_size) - 1 - gserver;
-
- status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
-
- WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
- GLOBAL_INTERRUPT_QUEUE, index, join, status);
-}
-
-void xics_setup_cpu(void)
-{
- xics_set_cpu_priority(LOWEST_PRIORITY);
-
- xics_set_cpu_giq(default_distrib_server, 1);
-}
-
-void xics_teardown_cpu(void)
-{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
- int cpu = smp_processor_id();
-
- /*
- * we have to reset the cppr index to 0 because we're
- * not going to return from the IPI
- */
- os_cppr->index = 0;
- xics_set_cpu_priority(0);
-
- /* Clear any pending IPI request */
- if (firmware_has_feature(FW_FEATURE_LPAR))
- lpar_qirr_info(cpu, 0xff);
- else
- direct_qirr_info(cpu, 0xff);
-}
-
-void xics_kexec_teardown_cpu(int secondary)
-{
- xics_teardown_cpu();
-
- /*
- * we take the ipi irq but and never return so we
- * need to EOI the IPI, but want to leave our priority 0
- *
- * should we check all the other interrupts too?
- * should we be flagging idle loop instead?
- * or creating some task to be scheduled?
- */
-
- if (firmware_has_feature(FW_FEATURE_LPAR))
- lpar_xirr_info_set((0x00 << 24) | XICS_IPI);
- else
- direct_xirr_info_set((0x00 << 24) | XICS_IPI);
-
- /*
- * Some machines need to have at least one cpu in the GIQ,
- * so leave the master cpu in the group.
- */
- if (secondary)
- xics_set_cpu_giq(default_distrib_server, 0);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/* Interrupts are disabled. */
-void xics_migrate_irqs_away(void)
-{
- int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
- int virq;
-
- /* If we used to be the default server, move to the new "boot_cpuid" */
- if (hw_cpu == default_server)
- xics_update_irq_servers();
-
- /* Reject any interrupt that was queued to us... */
- xics_set_cpu_priority(0);
-
- /* Remove ourselves from the global interrupt queue */
- xics_set_cpu_giq(default_distrib_server, 0);
-
- /* Allow IPIs again... */
- xics_set_cpu_priority(DEFAULT_PRIORITY);
-
- for_each_irq(virq) {
- struct irq_desc *desc;
- struct irq_chip *chip;
- unsigned int hwirq;
- int xics_status[2];
- int status;
- unsigned long flags;
-
- /* We can't set affinity on ISA interrupts */
- if (virq < NUM_ISA_INTERRUPTS)
- continue;
- if (irq_map[virq].host != xics_host)
- continue;
- hwirq = (unsigned int)irq_map[virq].hwirq;
- /* We need to get IPIs still. */
- if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
- continue;
-
- desc = irq_to_desc(virq);
-
- /* We only need to migrate enabled IRQS */
- if (desc == NULL || desc->action == NULL)
- continue;
-
- chip = irq_desc_get_chip(desc);
- if (chip == NULL || chip->irq_set_affinity == NULL)
- continue;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
-
- status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq);
- if (status) {
- printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
- __func__, hwirq, status);
- goto unlock;
- }
-
- /*
- * We only support delivery to all cpus or to one cpu.
- * The irq has to be migrated only in the single cpu
- * case.
- */
- if (xics_status[0] != hw_cpu)
- goto unlock;
-
- /* This is expected during cpu offline. */
- if (cpu_online(cpu))
- printk(KERN_WARNING "IRQ %u affinity broken off cpu %u\n",
- virq, cpu);
-
- /* Reset affinity to all cpus */
- cpumask_setall(desc->irq_data.affinity);
- chip->irq_set_affinity(&desc->irq_data, cpu_all_mask, true);
-unlock:
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- }
-}
-#endif
diff --git a/arch/powerpc/platforms/pseries/xics.h b/arch/powerpc/platforms/pseries/xics.h
deleted file mode 100644
index d1d5a83039a..00000000000
--- a/arch/powerpc/platforms/pseries/xics.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * arch/powerpc/platforms/pseries/xics.h
- *
- * Copyright 2000 IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _POWERPC_KERNEL_XICS_H
-#define _POWERPC_KERNEL_XICS_H
-
-extern void xics_init_IRQ(void);
-extern void xics_setup_cpu(void);
-extern void xics_teardown_cpu(void);
-extern void xics_kexec_teardown_cpu(int secondary);
-extern void xics_migrate_irqs_away(void);
-extern int smp_xics_probe(void);
-extern void smp_xics_message_pass(int target, int msg);
-
-#endif /* _POWERPC_KERNEL_XICS_H */
diff --git a/arch/powerpc/platforms/wsp/Kconfig b/arch/powerpc/platforms/wsp/Kconfig
new file mode 100644
index 00000000000..c3c48eb62cc
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/Kconfig
@@ -0,0 +1,28 @@
+config PPC_WSP
+ bool
+ default n
+
+menu "WSP platform selection"
+ depends on PPC_BOOK3E_64
+
+config PPC_PSR2
+ bool "PSR-2 platform"
+ select PPC_A2
+ select GENERIC_TBSYNC
+ select PPC_SCOM
+ select EPAPR_BOOT
+ select PPC_WSP
+ select PPC_XICS
+ select PPC_ICP_NATIVE
+ default y
+
+endmenu
+
+config PPC_A2_DD2
+ bool "Support for DD2 based A2/WSP systems"
+ depends on PPC_A2
+
+config WORKAROUND_ERRATUM_463
+ depends on PPC_A2_DD2
+ bool "Workaround erratum 463"
+ default y
diff --git a/arch/powerpc/platforms/wsp/Makefile b/arch/powerpc/platforms/wsp/Makefile
new file mode 100644
index 00000000000..095be73d6cd
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/Makefile
@@ -0,0 +1,6 @@
+ccflags-y += -mno-minimal-toc
+
+obj-y += setup.o ics.o
+obj-$(CONFIG_PPC_PSR2) += psr2.o opb_pic.o
+obj-$(CONFIG_PPC_WSP) += scom_wsp.o
+obj-$(CONFIG_SMP) += smp.o scom_smp.o
diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c
new file mode 100644
index 00000000000..e53bd9e7b12
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/ics.c
@@ -0,0 +1,712 @@
+/*
+ * Copyright 2008-2011 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/xics.h>
+
+#include "wsp.h"
+#include "ics.h"
+
+
+/* WSP ICS */
+
+struct wsp_ics {
+ struct ics ics;
+ struct device_node *dn;
+ void __iomem *regs;
+ spinlock_t lock;
+ unsigned long *bitmap;
+ u32 chip_id;
+ u32 lsi_base;
+ u32 lsi_count;
+ u64 hwirq_start;
+ u64 count;
+#ifdef CONFIG_SMP
+ int *hwirq_cpu_map;
+#endif
+};
+
+#define to_wsp_ics(ics) container_of(ics, struct wsp_ics, ics)
+
+#define INT_SRC_LAYER_BUID_REG(base) ((base) + 0x00)
+#define IODA_TBL_ADDR_REG(base) ((base) + 0x18)
+#define IODA_TBL_DATA_REG(base) ((base) + 0x20)
+#define XIVE_UPDATE_REG(base) ((base) + 0x28)
+#define ICS_INT_CAPS_REG(base) ((base) + 0x30)
+
+#define TBL_AUTO_INCREMENT ((1UL << 63) | (1UL << 15))
+#define TBL_SELECT_XIST (1UL << 48)
+#define TBL_SELECT_XIVT (1UL << 49)
+
+#define IODA_IRQ(irq) ((irq) & (0x7FFULL)) /* HRM 5.1.3.4 */
+
+#define XIST_REQUIRED 0x8
+#define XIST_REJECTED 0x4
+#define XIST_PRESENTED 0x2
+#define XIST_PENDING 0x1
+
+#define XIVE_SERVER_SHIFT 42
+#define XIVE_SERVER_MASK 0xFFFFULL
+#define XIVE_PRIORITY_MASK 0xFFULL
+#define XIVE_PRIORITY_SHIFT 32
+#define XIVE_WRITE_ENABLE (1ULL << 63)
+
+/*
+ * The docs refer to a 6 bit field called ChipID, which consists of a
+ * 3 bit NodeID and a 3 bit ChipID. On WSP the ChipID is always zero
+ * so we ignore it, and every where we use "chip id" in this code we
+ * mean the NodeID.
+ */
+#define WSP_ICS_CHIP_SHIFT 17
+
+
+static struct wsp_ics *ics_list;
+static int num_ics;
+
+/* ICS Source controller accessors */
+
+static u64 wsp_ics_get_xive(struct wsp_ics *ics, unsigned int irq)
+{
+ unsigned long flags;
+ u64 xive;
+
+ spin_lock_irqsave(&ics->lock, flags);
+ out_be64(IODA_TBL_ADDR_REG(ics->regs), TBL_SELECT_XIVT | IODA_IRQ(irq));
+ xive = in_be64(IODA_TBL_DATA_REG(ics->regs));
+ spin_unlock_irqrestore(&ics->lock, flags);
+
+ return xive;
+}
+
+static void wsp_ics_set_xive(struct wsp_ics *ics, unsigned int irq, u64 xive)
+{
+ xive &= ~XIVE_ADDR_MASK;
+ xive |= (irq & XIVE_ADDR_MASK);
+ xive |= XIVE_WRITE_ENABLE;
+
+ out_be64(XIVE_UPDATE_REG(ics->regs), xive);
+}
+
+static u64 xive_set_server(u64 xive, unsigned int server)
+{
+ u64 mask = ~(XIVE_SERVER_MASK << XIVE_SERVER_SHIFT);
+
+ xive &= mask;
+ xive |= (server & XIVE_SERVER_MASK) << XIVE_SERVER_SHIFT;
+
+ return xive;
+}
+
+static u64 xive_set_priority(u64 xive, unsigned int priority)
+{
+ u64 mask = ~(XIVE_PRIORITY_MASK << XIVE_PRIORITY_SHIFT);
+
+ xive &= mask;
+ xive |= (priority & XIVE_PRIORITY_MASK) << XIVE_PRIORITY_SHIFT;
+
+ return xive;
+}
+
+
+#ifdef CONFIG_SMP
+/* Find logical CPUs within mask on a given chip and store result in ret */
+void cpus_on_chip(int chip_id, cpumask_t *mask, cpumask_t *ret)
+{
+ int cpu, chip;
+ struct device_node *cpu_dn, *dn;
+ const u32 *prop;
+
+ cpumask_clear(ret);
+ for_each_cpu(cpu, mask) {
+ cpu_dn = of_get_cpu_node(cpu, NULL);
+ if (!cpu_dn)
+ continue;
+
+ prop = of_get_property(cpu_dn, "at-node", NULL);
+ if (!prop) {
+ of_node_put(cpu_dn);
+ continue;
+ }
+
+ dn = of_find_node_by_phandle(*prop);
+ of_node_put(cpu_dn);
+
+ chip = wsp_get_chip_id(dn);
+ if (chip == chip_id)
+ cpumask_set_cpu(cpu, ret);
+
+ of_node_put(dn);
+ }
+}
+
+/* Store a suitable CPU to handle a hwirq in the ics->hwirq_cpu_map cache */
+static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
+ const cpumask_t *affinity)
+{
+ cpumask_var_t avail, newmask;
+ int ret = -ENOMEM, cpu, cpu_rover = 0, target;
+ int index = hwirq - ics->hwirq_start;
+ unsigned int nodeid;
+
+ BUG_ON(index < 0 || index >= ics->count);
+
+ if (!ics->hwirq_cpu_map)
+ return -ENOMEM;
+
+ if (!distribute_irqs) {
+ ics->hwirq_cpu_map[hwirq - ics->hwirq_start] = xics_default_server;
+ return 0;
+ }
+
+ /* Allocate needed CPU masks */
+ if (!alloc_cpumask_var(&avail, GFP_KERNEL))
+ goto ret;
+ if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
+ goto freeavail;
+
+ /* Find PBus attached to the source of this IRQ */
+ nodeid = (hwirq >> WSP_ICS_CHIP_SHIFT) & 0x3; /* 12:14 */
+
+ /* Find CPUs that could handle this IRQ */
+ if (affinity)
+ cpumask_and(avail, cpu_online_mask, affinity);
+ else
+ cpumask_copy(avail, cpu_online_mask);
+
+ /* Narrow selection down to logical CPUs on the same chip */
+ cpus_on_chip(nodeid, avail, newmask);
+
+ /* Ensure we haven't narrowed it down to 0 */
+ if (unlikely(cpumask_empty(newmask))) {
+ if (unlikely(cpumask_empty(avail))) {
+ ret = -1;
+ goto out;
+ }
+ cpumask_copy(newmask, avail);
+ }
+
+ /* Choose a CPU out of those we narrowed it down to in round robin */
+ target = hwirq % cpumask_weight(newmask);
+ for_each_cpu(cpu, newmask) {
+ if (cpu_rover++ >= target) {
+ ics->hwirq_cpu_map[index] = get_hard_smp_processor_id(cpu);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /* Shouldn't happen */
+ WARN_ON(1);
+
+out:
+ free_cpumask_var(newmask);
+freeavail:
+ free_cpumask_var(avail);
+ret:
+ if (ret < 0) {
+ ics->hwirq_cpu_map[index] = cpumask_first(cpu_online_mask);
+ pr_warning("Error, falling hwirq 0x%x routing back to CPU %i\n",
+ hwirq, ics->hwirq_cpu_map[index]);
+ }
+ return ret;
+}
+
+static void alloc_irq_map(struct wsp_ics *ics)
+{
+ int i;
+
+ ics->hwirq_cpu_map = kmalloc(sizeof(int) * ics->count, GFP_KERNEL);
+ if (!ics->hwirq_cpu_map) {
+ pr_warning("Allocate hwirq_cpu_map failed, "
+ "IRQ balancing disabled\n");
+ return;
+ }
+
+ for (i=0; i < ics->count; i++)
+ ics->hwirq_cpu_map[i] = xics_default_server;
+}
+
+static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
+{
+ int index = hwirq - ics->hwirq_start;
+
+ BUG_ON(index < 0 || index >= ics->count);
+
+ if (!ics->hwirq_cpu_map)
+ return xics_default_server;
+
+ return ics->hwirq_cpu_map[index];
+}
+#else /* !CONFIG_SMP */
+static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
+ const cpumask_t *affinity)
+{
+ return 0;
+}
+
+static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
+{
+ return xics_default_server;
+}
+
+static void alloc_irq_map(struct wsp_ics *ics) { }
+#endif
+
+static void wsp_chip_unmask_irq(struct irq_data *d)
+{
+ unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+ struct wsp_ics *ics;
+ int server;
+ u64 xive;
+
+ if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+ return;
+
+ ics = d->chip_data;
+ if (WARN_ON(!ics))
+ return;
+
+ server = get_irq_server(ics, hw_irq);
+
+ xive = wsp_ics_get_xive(ics, hw_irq);
+ xive = xive_set_server(xive, server);
+ xive = xive_set_priority(xive, DEFAULT_PRIORITY);
+ wsp_ics_set_xive(ics, hw_irq, xive);
+}
+
+static unsigned int wsp_chip_startup(struct irq_data *d)
+{
+ /* unmask it */
+ wsp_chip_unmask_irq(d);
+ return 0;
+}
+
+static void wsp_mask_real_irq(unsigned int hw_irq, struct wsp_ics *ics)
+{
+ u64 xive;
+
+ if (hw_irq == XICS_IPI)
+ return;
+
+ if (WARN_ON(!ics))
+ return;
+ xive = wsp_ics_get_xive(ics, hw_irq);
+ xive = xive_set_server(xive, xics_default_server);
+ xive = xive_set_priority(xive, LOWEST_PRIORITY);
+ wsp_ics_set_xive(ics, hw_irq, xive);
+}
+
+static void wsp_chip_mask_irq(struct irq_data *d)
+{
+ unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+ struct wsp_ics *ics = d->chip_data;
+
+ if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+ return;
+
+ wsp_mask_real_irq(hw_irq, ics);
+}
+
+static int wsp_chip_set_affinity(struct irq_data *d,
+ const struct cpumask *cpumask, bool force)
+{
+ unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+ struct wsp_ics *ics;
+ int ret;
+ u64 xive;
+
+ if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+ return -1;
+
+ ics = d->chip_data;
+ if (WARN_ON(!ics))
+ return -1;
+ xive = wsp_ics_get_xive(ics, hw_irq);
+
+ /*
+ * For the moment only implement delivery to all cpus or one cpu.
+ * Get current irq_server for the given irq
+ */
+ ret = cache_hwirq_map(ics, d->irq, cpumask);
+ if (ret == -1) {
+ char cpulist[128];
+ cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
+ pr_warning("%s: No online cpus in the mask %s for irq %d\n",
+ __func__, cpulist, d->irq);
+ return -1;
+ } else if (ret == -ENOMEM) {
+ pr_warning("%s: Out of memory\n", __func__);
+ return -1;
+ }
+
+ xive = xive_set_server(xive, get_irq_server(ics, hw_irq));
+ wsp_ics_set_xive(ics, hw_irq, xive);
+
+ return 0;
+}
+
+static struct irq_chip wsp_irq_chip = {
+ .name = "WSP ICS",
+ .irq_startup = wsp_chip_startup,
+ .irq_mask = wsp_chip_mask_irq,
+ .irq_unmask = wsp_chip_unmask_irq,
+ .irq_set_affinity = wsp_chip_set_affinity
+};
+
+static int wsp_ics_host_match(struct ics *ics, struct device_node *dn)
+{
+ /* All ICSs in the system implement a global irq number space,
+ * so match against them all. */
+ return of_device_is_compatible(dn, "ibm,ppc-xics");
+}
+
+static int wsp_ics_match_hwirq(struct wsp_ics *wsp_ics, unsigned int hwirq)
+{
+ if (hwirq >= wsp_ics->hwirq_start &&
+ hwirq < wsp_ics->hwirq_start + wsp_ics->count)
+ return 1;
+
+ return 0;
+}
+
+static int wsp_ics_map(struct ics *ics, unsigned int virq)
+{
+ struct wsp_ics *wsp_ics = to_wsp_ics(ics);
+ unsigned int hw_irq = virq_to_hw(virq);
+ unsigned long flags;
+
+ if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
+ return -ENOENT;
+
+ irq_set_chip_and_handler(virq, &wsp_irq_chip, handle_fasteoi_irq);
+
+ irq_set_chip_data(virq, wsp_ics);
+
+ spin_lock_irqsave(&wsp_ics->lock, flags);
+ bitmap_allocate_region(wsp_ics->bitmap, hw_irq - wsp_ics->hwirq_start, 0);
+ spin_unlock_irqrestore(&wsp_ics->lock, flags);
+
+ return 0;
+}
+
+static void wsp_ics_mask_unknown(struct ics *ics, unsigned long hw_irq)
+{
+ struct wsp_ics *wsp_ics = to_wsp_ics(ics);
+
+ if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
+ return;
+
+ pr_err("%s: IRQ %lu (real) is invalid, disabling it.\n", __func__, hw_irq);
+ wsp_mask_real_irq(hw_irq, wsp_ics);
+}
+
+static long wsp_ics_get_server(struct ics *ics, unsigned long hw_irq)
+{
+ struct wsp_ics *wsp_ics = to_wsp_ics(ics);
+
+ if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
+ return -ENOENT;
+
+ return get_irq_server(wsp_ics, hw_irq);
+}
+
+/* HW Number allocation API */
+
+static struct wsp_ics *wsp_ics_find_dn_ics(struct device_node *dn)
+{
+ struct device_node *iparent;
+ int i;
+
+ iparent = of_irq_find_parent(dn);
+ if (!iparent) {
+ pr_err("wsp_ics: Failed to find interrupt parent!\n");
+ return NULL;
+ }
+
+ for(i = 0; i < num_ics; i++) {
+ if(ics_list[i].dn == iparent)
+ break;
+ }
+
+ if (i >= num_ics) {
+ pr_err("wsp_ics: Unable to find parent bitmap!\n");
+ return NULL;
+ }
+
+ return &ics_list[i];
+}
+
+int wsp_ics_alloc_irq(struct device_node *dn, int num)
+{
+ struct wsp_ics *ics;
+ int order, offset;
+
+ ics = wsp_ics_find_dn_ics(dn);
+ if (!ics)
+ return -ENODEV;
+
+ /* Fast, but overly strict if num isn't a power of two */
+ order = get_count_order(num);
+
+ spin_lock_irq(&ics->lock);
+ offset = bitmap_find_free_region(ics->bitmap, ics->count, order);
+ spin_unlock_irq(&ics->lock);
+
+ if (offset < 0)
+ return offset;
+
+ return offset + ics->hwirq_start;
+}
+
+void wsp_ics_free_irq(struct device_node *dn, unsigned int irq)
+{
+ struct wsp_ics *ics;
+
+ ics = wsp_ics_find_dn_ics(dn);
+ if (WARN_ON(!ics))
+ return;
+
+ spin_lock_irq(&ics->lock);
+ bitmap_release_region(ics->bitmap, irq, 0);
+ spin_unlock_irq(&ics->lock);
+}
+
+/* Initialisation */
+
+static int __init wsp_ics_bitmap_setup(struct wsp_ics *ics,
+ struct device_node *dn)
+{
+ int len, i, j, size;
+ u32 start, count;
+ const u32 *p;
+
+ size = BITS_TO_LONGS(ics->count) * sizeof(long);
+ ics->bitmap = kzalloc(size, GFP_KERNEL);
+ if (!ics->bitmap) {
+ pr_err("wsp_ics: ENOMEM allocating IRQ bitmap!\n");
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&ics->lock);
+
+ p = of_get_property(dn, "available-ranges", &len);
+ if (!p || !len) {
+ /* FIXME this should be a WARN() once mambo is updated */
+ pr_err("wsp_ics: No available-ranges defined for %s\n",
+ dn->full_name);
+ return 0;
+ }
+
+ if (len % (2 * sizeof(u32)) != 0) {
+ /* FIXME this should be a WARN() once mambo is updated */
+ pr_err("wsp_ics: Invalid available-ranges for %s\n",
+ dn->full_name);
+ return 0;
+ }
+
+ bitmap_fill(ics->bitmap, ics->count);
+
+ for (i = 0; i < len / sizeof(u32); i += 2) {
+ start = of_read_number(p + i, 1);
+ count = of_read_number(p + i + 1, 1);
+
+ pr_devel("%s: start: %d count: %d\n", __func__, start, count);
+
+ if ((start + count) > (ics->hwirq_start + ics->count) ||
+ start < ics->hwirq_start) {
+ pr_err("wsp_ics: Invalid range! -> %d to %d\n",
+ start, start + count);
+ break;
+ }
+
+ for (j = 0; j < count; j++)
+ bitmap_release_region(ics->bitmap,
+ (start + j) - ics->hwirq_start, 0);
+ }
+
+ /* Ensure LSIs are not available for allocation */
+ bitmap_allocate_region(ics->bitmap, ics->lsi_base,
+ get_count_order(ics->lsi_count));
+
+ return 0;
+}
+
+static int __init wsp_ics_setup(struct wsp_ics *ics, struct device_node *dn)
+{
+ u32 lsi_buid, msi_buid, msi_base, msi_count;
+ void __iomem *regs;
+ const u32 *p;
+ int rc, len, i;
+ u64 caps, buid;
+
+ p = of_get_property(dn, "interrupt-ranges", &len);
+ if (!p || len < (2 * sizeof(u32))) {
+ pr_err("wsp_ics: No/bad interrupt-ranges found on %s\n",
+ dn->full_name);
+ return -ENOENT;
+ }
+
+ if (len > (2 * sizeof(u32))) {
+ pr_err("wsp_ics: Multiple ics ranges not supported.\n");
+ return -EINVAL;
+ }
+
+ regs = of_iomap(dn, 0);
+ if (!regs) {
+ pr_err("wsp_ics: of_iomap(%s) failed\n", dn->full_name);
+ return -ENXIO;
+ }
+
+ ics->hwirq_start = of_read_number(p, 1);
+ ics->count = of_read_number(p + 1, 1);
+ ics->regs = regs;
+
+ ics->chip_id = wsp_get_chip_id(dn);
+ if (WARN_ON(ics->chip_id < 0))
+ ics->chip_id = 0;
+
+ /* Get some informations about the critter */
+ caps = in_be64(ICS_INT_CAPS_REG(ics->regs));
+ buid = in_be64(INT_SRC_LAYER_BUID_REG(ics->regs));
+ ics->lsi_count = caps >> 56;
+ msi_count = (caps >> 44) & 0x7ff;
+
+ /* Note: LSI BUID is 9 bits, but really only 3 are BUID and the
+ * rest is mixed in the interrupt number. We store the whole
+ * thing though
+ */
+ lsi_buid = (buid >> 48) & 0x1ff;
+ ics->lsi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | lsi_buid << 5;
+ msi_buid = (buid >> 37) & 0x7;
+ msi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | msi_buid << 11;
+
+ pr_info("wsp_ics: Found %s\n", dn->full_name);
+ pr_info("wsp_ics: irq range : 0x%06llx..0x%06llx\n",
+ ics->hwirq_start, ics->hwirq_start + ics->count - 1);
+ pr_info("wsp_ics: %4d LSIs : 0x%06x..0x%06x\n",
+ ics->lsi_count, ics->lsi_base,
+ ics->lsi_base + ics->lsi_count - 1);
+ pr_info("wsp_ics: %4d MSIs : 0x%06x..0x%06x\n",
+ msi_count, msi_base,
+ msi_base + msi_count - 1);
+
+ /* Let's check the HW config is sane */
+ if (ics->lsi_base < ics->hwirq_start ||
+ (ics->lsi_base + ics->lsi_count) > (ics->hwirq_start + ics->count))
+ pr_warning("wsp_ics: WARNING ! LSIs out of interrupt-ranges !\n");
+ if (msi_base < ics->hwirq_start ||
+ (msi_base + msi_count) > (ics->hwirq_start + ics->count))
+ pr_warning("wsp_ics: WARNING ! MSIs out of interrupt-ranges !\n");
+
+ /* We don't check for overlap between LSI and MSI, which will happen
+ * if we use the same BUID, I'm not sure yet how legit that is.
+ */
+
+ rc = wsp_ics_bitmap_setup(ics, dn);
+ if (rc) {
+ iounmap(regs);
+ return rc;
+ }
+
+ ics->dn = of_node_get(dn);
+ alloc_irq_map(ics);
+
+ for(i = 0; i < ics->count; i++)
+ wsp_mask_real_irq(ics->hwirq_start + i, ics);
+
+ ics->ics.map = wsp_ics_map;
+ ics->ics.mask_unknown = wsp_ics_mask_unknown;
+ ics->ics.get_server = wsp_ics_get_server;
+ ics->ics.host_match = wsp_ics_host_match;
+
+ xics_register_ics(&ics->ics);
+
+ return 0;
+}
+
+static void __init wsp_ics_set_default_server(void)
+{
+ struct device_node *np;
+ u32 hwid;
+
+ /* Find the server number for the boot cpu. */
+ np = of_get_cpu_node(boot_cpuid, NULL);
+ BUG_ON(!np);
+
+ hwid = get_hard_smp_processor_id(boot_cpuid);
+
+ pr_info("wsp_ics: default server is %#x, CPU %s\n", hwid, np->full_name);
+ xics_default_server = hwid;
+
+ of_node_put(np);
+}
+
+static int __init wsp_ics_init(void)
+{
+ struct device_node *dn;
+ struct wsp_ics *ics;
+ int rc, found;
+
+ wsp_ics_set_default_server();
+
+ found = 0;
+ for_each_compatible_node(dn, NULL, "ibm,ppc-xics")
+ found++;
+
+ if (found == 0) {
+ pr_err("wsp_ics: No ICS's found!\n");
+ return -ENODEV;
+ }
+
+ ics_list = kmalloc(sizeof(*ics) * found, GFP_KERNEL);
+ if (!ics_list) {
+ pr_err("wsp_ics: No memory for structs.\n");
+ return -ENOMEM;
+ }
+
+ num_ics = 0;
+ ics = ics_list;
+ for_each_compatible_node(dn, NULL, "ibm,wsp-xics") {
+ rc = wsp_ics_setup(ics, dn);
+ if (rc == 0) {
+ ics++;
+ num_ics++;
+ }
+ }
+
+ if (found != num_ics) {
+ pr_err("wsp_ics: Failed setting up %d ICS's\n",
+ found - num_ics);
+ return -1;
+ }
+
+ return 0;
+}
+
+void __init wsp_init_irq(void)
+{
+ wsp_ics_init();
+ xics_init();
+
+ /* We need to patch our irq chip's EOI to point to the right ICP */
+ wsp_irq_chip.irq_eoi = icp_ops->eoi;
+}
diff --git a/arch/powerpc/platforms/wsp/ics.h b/arch/powerpc/platforms/wsp/ics.h
new file mode 100644
index 00000000000..e34d5310264
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/ics.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2009 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ICS_H
+#define __ICS_H
+
+#define XIVE_ADDR_MASK 0x7FFULL
+
+extern void wsp_init_irq(void);
+
+extern int wsp_ics_alloc_irq(struct device_node *dn, int num);
+extern void wsp_ics_free_irq(struct device_node *dn, unsigned int irq);
+
+#endif /* __ICS_H */
diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c
new file mode 100644
index 00000000000..be05631a3c1
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/opb_pic.c
@@ -0,0 +1,332 @@
+/*
+ * IBM Onboard Peripheral Bus Interrupt Controller
+ *
+ * Copyright 2010 Jack Miller, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+
+#include <asm/reg_a2.h>
+#include <asm/irq.h>
+
+#define OPB_NR_IRQS 32
+
+#define OPB_MLSASIER 0x04 /* MLS Accumulated Status IER */
+#define OPB_MLSIR 0x50 /* MLS Interrupt Register */
+#define OPB_MLSIER 0x54 /* MLS Interrupt Enable Register */
+#define OPB_MLSIPR 0x58 /* MLS Interrupt Polarity Register */
+#define OPB_MLSIIR 0x5c /* MLS Interrupt Inputs Register */
+
+static int opb_index = 0;
+
+struct opb_pic {
+ struct irq_host *host;
+ void *regs;
+ int index;
+ spinlock_t lock;
+};
+
+static u32 opb_in(struct opb_pic *opb, int offset)
+{
+ return in_be32(opb->regs + offset);
+}
+
+static void opb_out(struct opb_pic *opb, int offset, u32 val)
+{
+ out_be32(opb->regs + offset, val);
+}
+
+static void opb_unmask_irq(struct irq_data *d)
+{
+ struct opb_pic *opb;
+ unsigned long flags;
+ u32 ier, bitset;
+
+ opb = d->chip_data;
+ bitset = (1 << (31 - irqd_to_hwirq(d)));
+
+ spin_lock_irqsave(&opb->lock, flags);
+
+ ier = opb_in(opb, OPB_MLSIER);
+ opb_out(opb, OPB_MLSIER, ier | bitset);
+ ier = opb_in(opb, OPB_MLSIER);
+
+ spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static void opb_mask_irq(struct irq_data *d)
+{
+ struct opb_pic *opb;
+ unsigned long flags;
+ u32 ier, mask;
+
+ opb = d->chip_data;
+ mask = ~(1 << (31 - irqd_to_hwirq(d)));
+
+ spin_lock_irqsave(&opb->lock, flags);
+
+ ier = opb_in(opb, OPB_MLSIER);
+ opb_out(opb, OPB_MLSIER, ier & mask);
+ ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
+
+ spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static void opb_ack_irq(struct irq_data *d)
+{
+ struct opb_pic *opb;
+ unsigned long flags;
+ u32 bitset;
+
+ opb = d->chip_data;
+ bitset = (1 << (31 - irqd_to_hwirq(d)));
+
+ spin_lock_irqsave(&opb->lock, flags);
+
+ opb_out(opb, OPB_MLSIR, bitset);
+ opb_in(opb, OPB_MLSIR); // Flush posted writes
+
+ spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static void opb_mask_ack_irq(struct irq_data *d)
+{
+ struct opb_pic *opb;
+ unsigned long flags;
+ u32 bitset;
+ u32 ier, ir;
+
+ opb = d->chip_data;
+ bitset = (1 << (31 - irqd_to_hwirq(d)));
+
+ spin_lock_irqsave(&opb->lock, flags);
+
+ ier = opb_in(opb, OPB_MLSIER);
+ opb_out(opb, OPB_MLSIER, ier & ~bitset);
+ ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
+
+ opb_out(opb, OPB_MLSIR, bitset);
+ ir = opb_in(opb, OPB_MLSIR); // Flush posted writes
+
+ spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static int opb_set_irq_type(struct irq_data *d, unsigned int flow)
+{
+ struct opb_pic *opb;
+ unsigned long flags;
+ int invert, ipr, mask, bit;
+
+ opb = d->chip_data;
+
+ /* The only information we're interested in in the type is whether it's
+ * a high or low trigger. For high triggered interrupts, the polarity
+ * set for it in the MLS Interrupt Polarity Register is 0, for low
+ * interrupts it's 1 so that the proper input in the MLS Interrupt Input
+ * Register is interrupted as asserting the interrupt. */
+
+ switch (flow) {
+ case IRQ_TYPE_NONE:
+ opb_mask_irq(d);
+ return 0;
+
+ case IRQ_TYPE_LEVEL_HIGH:
+ invert = 0;
+ break;
+
+ case IRQ_TYPE_LEVEL_LOW:
+ invert = 1;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ bit = (1 << (31 - irqd_to_hwirq(d)));
+ mask = ~bit;
+
+ spin_lock_irqsave(&opb->lock, flags);
+
+ ipr = opb_in(opb, OPB_MLSIPR);
+ ipr = (ipr & mask) | (invert ? bit : 0);
+ opb_out(opb, OPB_MLSIPR, ipr);
+ ipr = opb_in(opb, OPB_MLSIPR); // Flush posted writes
+
+ spin_unlock_irqrestore(&opb->lock, flags);
+
+ /* Record the type in the interrupt descriptor */
+ irqd_set_trigger_type(d, flow);
+
+ return 0;
+}
+
+static struct irq_chip opb_irq_chip = {
+ .name = "OPB",
+ .irq_mask = opb_mask_irq,
+ .irq_unmask = opb_unmask_irq,
+ .irq_mask_ack = opb_mask_ack_irq,
+ .irq_ack = opb_ack_irq,
+ .irq_set_type = opb_set_irq_type
+};
+
+static int opb_host_map(struct irq_host *host, unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ struct opb_pic *opb;
+
+ opb = host->host_data;
+
+ /* Most of the important stuff is handled by the generic host code, like
+ * the lookup, so just attach some info to the virtual irq */
+
+ irq_set_chip_data(virq, opb);
+ irq_set_chip_and_handler(virq, &opb_irq_chip, handle_level_irq);
+ irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+ return 0;
+}
+
+static int opb_host_xlate(struct irq_host *host, struct device_node *dn,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_type)
+{
+ /* Interrupt size must == 2 */
+ BUG_ON(intsize != 2);
+ *out_hwirq = intspec[0];
+ *out_type = intspec[1];
+ return 0;
+}
+
+static struct irq_host_ops opb_host_ops = {
+ .map = opb_host_map,
+ .xlate = opb_host_xlate,
+};
+
+irqreturn_t opb_irq_handler(int irq, void *private)
+{
+ struct opb_pic *opb;
+ u32 ir, src, subvirq;
+
+ opb = (struct opb_pic *) private;
+
+ /* Read the OPB MLS Interrupt Register for
+ * asserted interrupts */
+ ir = opb_in(opb, OPB_MLSIR);
+ if (!ir)
+ return IRQ_NONE;
+
+ do {
+ /* Get 1 - 32 source, *NOT* bit */
+ src = 32 - ffs(ir);
+
+ /* Translate from the OPB's conception of interrupt number to
+ * Linux's virtual IRQ */
+
+ subvirq = irq_linear_revmap(opb->host, src);
+
+ generic_handle_irq(subvirq);
+ } while ((ir = opb_in(opb, OPB_MLSIR)));
+
+ return IRQ_HANDLED;
+}
+
+struct opb_pic *opb_pic_init_one(struct device_node *dn)
+{
+ struct opb_pic *opb;
+ struct resource res;
+
+ if (of_address_to_resource(dn, 0, &res)) {
+ printk(KERN_ERR "opb: Couldn't translate resource\n");
+ return NULL;
+ }
+
+ opb = kzalloc(sizeof(struct opb_pic), GFP_KERNEL);
+ if (!opb) {
+ printk(KERN_ERR "opb: Failed to allocate opb struct!\n");
+ return NULL;
+ }
+
+ /* Get access to the OPB MMIO registers */
+ opb->regs = ioremap(res.start + 0x10000, 0x1000);
+ if (!opb->regs) {
+ printk(KERN_ERR "opb: Failed to allocate register space!\n");
+ goto free_opb;
+ }
+
+ /* Allocate an irq host so that Linux knows that despite only
+ * having one interrupt to issue, we're the controller for multiple
+ * hardware IRQs, so later we can lookup their virtual IRQs. */
+
+ opb->host = irq_alloc_host(dn, IRQ_HOST_MAP_LINEAR,
+ OPB_NR_IRQS, &opb_host_ops, -1);
+
+ if (!opb->host) {
+ printk(KERN_ERR "opb: Failed to allocate IRQ host!\n");
+ goto free_regs;
+ }
+
+ opb->index = opb_index++;
+ spin_lock_init(&opb->lock);
+ opb->host->host_data = opb;
+
+ /* Disable all interrupts by default */
+ opb_out(opb, OPB_MLSASIER, 0);
+ opb_out(opb, OPB_MLSIER, 0);
+
+ /* ACK any interrupts left by FW */
+ opb_out(opb, OPB_MLSIR, 0xFFFFFFFF);
+
+ return opb;
+
+free_regs:
+ iounmap(opb->regs);
+free_opb:
+ kfree(opb);
+ return NULL;
+}
+
+void __init opb_pic_init(void)
+{
+ struct device_node *dn;
+ struct opb_pic *opb;
+ int virq;
+ int rc;
+
+ /* Call init_one for each OPB device */
+ for_each_compatible_node(dn, NULL, "ibm,opb") {
+
+ /* Fill in an OPB struct */
+ opb = opb_pic_init_one(dn);
+ if (!opb) {
+ printk(KERN_WARNING "opb: Failed to init node, skipped!\n");
+ continue;
+ }
+
+ /* Map / get opb's hardware virtual irq */
+ virq = irq_of_parse_and_map(dn, 0);
+ if (virq <= 0) {
+ printk("opb: irq_op_parse_and_map failed!\n");
+ continue;
+ }
+
+ /* Attach opb interrupt handler to new virtual IRQ */
+ rc = request_irq(virq, opb_irq_handler, 0, "OPB LS Cascade", opb);
+ if (rc) {
+ printk("opb: request_irq failed: %d\n", rc);
+ continue;
+ }
+
+ printk("OPB%d init with %d IRQs at %p\n", opb->index,
+ OPB_NR_IRQS, opb->regs);
+ }
+}
diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c
new file mode 100644
index 00000000000..40f28916ff6
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/psr2.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2008-2011, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/smp.h>
+
+#include <asm/machdep.h>
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+
+#include "ics.h"
+#include "wsp.h"
+
+
+static void psr2_spin(void)
+{
+ hard_irq_disable();
+ for (;;) ;
+}
+
+static void psr2_restart(char *cmd)
+{
+ psr2_spin();
+}
+
+static int psr2_probe_devices(void)
+{
+ struct device_node *np;
+
+ /* Our RTC is a ds1500. It seems to be programatically compatible
+ * with the ds1511 for which we have a driver so let's use that
+ */
+ np = of_find_compatible_node(NULL, NULL, "dallas,ds1500");
+ if (np != NULL) {
+ struct resource res;
+ if (of_address_to_resource(np, 0, &res) == 0)
+ platform_device_register_simple("ds1511", 0, &res, 1);
+ }
+ return 0;
+}
+machine_arch_initcall(psr2_md, psr2_probe_devices);
+
+static void __init psr2_setup_arch(void)
+{
+ /* init to some ~sane value until calibrate_delay() runs */
+ loops_per_jiffy = 50000000;
+
+ scom_init_wsp();
+
+ /* Setup SMP callback */
+#ifdef CONFIG_SMP
+ a2_setup_smp();
+#endif
+}
+
+static int __init psr2_probe(void)
+{
+ unsigned long root = of_get_flat_dt_root();
+
+ if (!of_flat_dt_is_compatible(root, "ibm,psr2"))
+ return 0;
+
+ return 1;
+}
+
+static void __init psr2_init_irq(void)
+{
+ wsp_init_irq();
+ opb_pic_init();
+}
+
+define_machine(psr2_md) {
+ .name = "PSR2 A2",
+ .probe = psr2_probe,
+ .setup_arch = psr2_setup_arch,
+ .restart = psr2_restart,
+ .power_off = psr2_spin,
+ .halt = psr2_spin,
+ .calibrate_decr = generic_calibrate_decr,
+ .init_IRQ = psr2_init_irq,
+ .progress = udbg_progress,
+ .power_save = book3e_idle,
+};
diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c
new file mode 100644
index 00000000000..141e7803209
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/scom_smp.c
@@ -0,0 +1,427 @@
+/*
+ * SCOM support for A2 platforms
+ *
+ * Copyright 2007-2011 Benjamin Herrenschmidt, David Gibson,
+ * Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/cputhreads.h>
+#include <asm/reg_a2.h>
+#include <asm/scom.h>
+#include <asm/udbg.h>
+
+#include "wsp.h"
+
+#define SCOM_RAMC 0x2a /* Ram Command */
+#define SCOM_RAMC_TGT1_EXT 0x80000000
+#define SCOM_RAMC_SRC1_EXT 0x40000000
+#define SCOM_RAMC_SRC2_EXT 0x20000000
+#define SCOM_RAMC_SRC3_EXT 0x10000000
+#define SCOM_RAMC_ENABLE 0x00080000
+#define SCOM_RAMC_THREADSEL 0x00060000
+#define SCOM_RAMC_EXECUTE 0x00010000
+#define SCOM_RAMC_MSR_OVERRIDE 0x00008000
+#define SCOM_RAMC_MSR_PR 0x00004000
+#define SCOM_RAMC_MSR_GS 0x00002000
+#define SCOM_RAMC_FORCE 0x00001000
+#define SCOM_RAMC_FLUSH 0x00000800
+#define SCOM_RAMC_INTERRUPT 0x00000004
+#define SCOM_RAMC_ERROR 0x00000002
+#define SCOM_RAMC_DONE 0x00000001
+#define SCOM_RAMI 0x29 /* Ram Instruction */
+#define SCOM_RAMIC 0x28 /* Ram Instruction and Command */
+#define SCOM_RAMIC_INSN 0xffffffff00000000
+#define SCOM_RAMD 0x2d /* Ram Data */
+#define SCOM_RAMDH 0x2e /* Ram Data High */
+#define SCOM_RAMDL 0x2f /* Ram Data Low */
+#define SCOM_PCCR0 0x33 /* PC Configuration Register 0 */
+#define SCOM_PCCR0_ENABLE_DEBUG 0x80000000
+#define SCOM_PCCR0_ENABLE_RAM 0x40000000
+#define SCOM_THRCTL 0x30 /* Thread Control and Status */
+#define SCOM_THRCTL_T0_STOP 0x80000000
+#define SCOM_THRCTL_T1_STOP 0x40000000
+#define SCOM_THRCTL_T2_STOP 0x20000000
+#define SCOM_THRCTL_T3_STOP 0x10000000
+#define SCOM_THRCTL_T0_STEP 0x08000000
+#define SCOM_THRCTL_T1_STEP 0x04000000
+#define SCOM_THRCTL_T2_STEP 0x02000000
+#define SCOM_THRCTL_T3_STEP 0x01000000
+#define SCOM_THRCTL_T0_RUN 0x00800000
+#define SCOM_THRCTL_T1_RUN 0x00400000
+#define SCOM_THRCTL_T2_RUN 0x00200000
+#define SCOM_THRCTL_T3_RUN 0x00100000
+#define SCOM_THRCTL_T0_PM 0x00080000
+#define SCOM_THRCTL_T1_PM 0x00040000
+#define SCOM_THRCTL_T2_PM 0x00020000
+#define SCOM_THRCTL_T3_PM 0x00010000
+#define SCOM_THRCTL_T0_UDE 0x00008000
+#define SCOM_THRCTL_T1_UDE 0x00004000
+#define SCOM_THRCTL_T2_UDE 0x00002000
+#define SCOM_THRCTL_T3_UDE 0x00001000
+#define SCOM_THRCTL_ASYNC_DIS 0x00000800
+#define SCOM_THRCTL_TB_DIS 0x00000400
+#define SCOM_THRCTL_DEC_DIS 0x00000200
+#define SCOM_THRCTL_AND 0x31 /* Thread Control and Status */
+#define SCOM_THRCTL_OR 0x32 /* Thread Control and Status */
+
+
+static DEFINE_PER_CPU(scom_map_t, scom_ptrs);
+
+static scom_map_t get_scom(int cpu, struct device_node *np, int *first_thread)
+{
+ scom_map_t scom = per_cpu(scom_ptrs, cpu);
+ int tcpu;
+
+ if (scom_map_ok(scom)) {
+ *first_thread = 0;
+ return scom;
+ }
+
+ *first_thread = 1;
+
+ scom = scom_map_device(np, 0);
+
+ for (tcpu = cpu_first_thread_sibling(cpu);
+ tcpu <= cpu_last_thread_sibling(cpu); tcpu++)
+ per_cpu(scom_ptrs, tcpu) = scom;
+
+ /* Hack: for the boot core, this will actually get called on
+ * the second thread up, not the first so our test above will
+ * set first_thread incorrectly. */
+ if (cpu_first_thread_sibling(cpu) == 0)
+ *first_thread = 0;
+
+ return scom;
+}
+
+static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask)
+{
+ u64 cmd, mask, val;
+ int n = 0;
+
+ cmd = ((u64)insn << 32) | (((u64)extmask & 0xf) << 28)
+ | ((u64)thread << 17) | SCOM_RAMC_ENABLE | SCOM_RAMC_EXECUTE;
+ mask = SCOM_RAMC_DONE | SCOM_RAMC_INTERRUPT | SCOM_RAMC_ERROR;
+
+ scom_write(scom, SCOM_RAMIC, cmd);
+
+ while (!((val = scom_read(scom, SCOM_RAMC)) & mask)) {
+ pr_devel("Waiting on RAMC = 0x%llx\n", val);
+ if (++n == 3) {
+ pr_err("RAMC timeout on instruction 0x%08x, thread %d\n",
+ insn, thread);
+ return -1;
+ }
+ }
+
+ if (val & SCOM_RAMC_INTERRUPT) {
+ pr_err("RAMC interrupt on instruction 0x%08x, thread %d\n",
+ insn, thread);
+ return -SCOM_RAMC_INTERRUPT;
+ }
+
+ if (val & SCOM_RAMC_ERROR) {
+ pr_err("RAMC error on instruction 0x%08x, thread %d\n",
+ insn, thread);
+ return -SCOM_RAMC_ERROR;
+ }
+
+ return 0;
+}
+
+static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt,
+ u64 *out_gpr)
+{
+ int rc;
+
+ /* or rN, rN, rN */
+ u32 insn = 0x7c000378 | (gpr << 21) | (gpr << 16) | (gpr << 11);
+ rc = a2_scom_ram(scom, thread, insn, alt ? 0xf : 0x0);
+ if (rc)
+ return rc;
+
+ *out_gpr = scom_read(scom, SCOM_RAMD);
+
+ return 0;
+}
+
+static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr)
+{
+ int rc, sprhi, sprlo;
+ u32 insn;
+
+ sprhi = spr >> 5;
+ sprlo = spr & 0x1f;
+ insn = 0x7c2002a6 | (sprlo << 16) | (sprhi << 11); /* mfspr r1,spr */
+
+ if (spr == 0x0ff0)
+ insn = 0x7c2000a6; /* mfmsr r1 */
+
+ rc = a2_scom_ram(scom, thread, insn, 0xf);
+ if (rc)
+ return rc;
+ return a2_scom_getgpr(scom, thread, 1, 1, out_spr);
+}
+
+static int a2_scom_setgpr(scom_map_t scom, int thread, int gpr,
+ int alt, u64 val)
+{
+ u32 lis = 0x3c000000 | (gpr << 21);
+ u32 li = 0x38000000 | (gpr << 21);
+ u32 oris = 0x64000000 | (gpr << 21) | (gpr << 16);
+ u32 ori = 0x60000000 | (gpr << 21) | (gpr << 16);
+ u32 rldicr32 = 0x780007c6 | (gpr << 21) | (gpr << 16);
+ u32 highest = val >> 48;
+ u32 higher = (val >> 32) & 0xffff;
+ u32 high = (val >> 16) & 0xffff;
+ u32 low = val & 0xffff;
+ int lext = alt ? 0x8 : 0x0;
+ int oext = alt ? 0xf : 0x0;
+ int rc = 0;
+
+ if (highest)
+ rc |= a2_scom_ram(scom, thread, lis | highest, lext);
+
+ if (higher) {
+ if (highest)
+ rc |= a2_scom_ram(scom, thread, oris | higher, oext);
+ else
+ rc |= a2_scom_ram(scom, thread, li | higher, lext);
+ }
+
+ if (highest || higher)
+ rc |= a2_scom_ram(scom, thread, rldicr32, oext);
+
+ if (high) {
+ if (highest || higher)
+ rc |= a2_scom_ram(scom, thread, oris | high, oext);
+ else
+ rc |= a2_scom_ram(scom, thread, lis | high, lext);
+ }
+
+ if (highest || higher || high)
+ rc |= a2_scom_ram(scom, thread, ori | low, oext);
+ else
+ rc |= a2_scom_ram(scom, thread, li | low, lext);
+
+ return rc;
+}
+
+static int a2_scom_setspr(scom_map_t scom, int thread, int spr, u64 val)
+{
+ int sprhi = spr >> 5;
+ int sprlo = spr & 0x1f;
+ /* mtspr spr, r1 */
+ u32 insn = 0x7c2003a6 | (sprlo << 16) | (sprhi << 11);
+
+ if (spr == 0x0ff0)
+ insn = 0x7c200124; /* mtmsr r1 */
+
+ if (a2_scom_setgpr(scom, thread, 1, 1, val))
+ return -1;
+
+ return a2_scom_ram(scom, thread, insn, 0xf);
+}
+
+static int a2_scom_initial_tlb(scom_map_t scom, int thread)
+{
+ extern u32 a2_tlbinit_code_start[], a2_tlbinit_code_end[];
+ extern u32 a2_tlbinit_after_iprot_flush[];
+ extern u32 a2_tlbinit_after_linear_map[];
+ u32 assoc, entries, i;
+ u64 epn, tlbcfg;
+ u32 *p;
+ int rc;
+
+ /* Invalidate all entries (including iprot) */
+
+ rc = a2_scom_getspr(scom, thread, SPRN_TLB0CFG, &tlbcfg);
+ if (rc)
+ goto scom_fail;
+ entries = tlbcfg & TLBnCFG_N_ENTRY;
+ assoc = (tlbcfg & TLBnCFG_ASSOC) >> 24;
+ epn = 0;
+
+ /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
+ a2_scom_setspr(scom, thread, SPRN_MMUCR2, 0x000a7531);
+ /* Set MMUCR3 to write all thids bit to the TLB */
+ a2_scom_setspr(scom, thread, SPRN_MMUCR3, 0x0000000f);
+
+ /* Set MAS1 for 1G page size, and MAS2 to our initial EPN */
+ a2_scom_setspr(scom, thread, SPRN_MAS1, MAS1_TSIZE(BOOK3E_PAGESZ_1GB));
+ a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
+ for (i = 0; i < entries; i++) {
+
+ a2_scom_setspr(scom, thread, SPRN_MAS0, MAS0_ESEL(i % assoc));
+
+ /* tlbwe */
+ rc = a2_scom_ram(scom, thread, 0x7c0007a4, 0);
+ if (rc)
+ goto scom_fail;
+
+ /* Next entry is new address? */
+ if((i + 1) % assoc == 0) {
+ epn += (1 << 30);
+ a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
+ }
+ }
+
+ /* Setup args for linear mapping */
+ rc = a2_scom_setgpr(scom, thread, 3, 0, MAS0_TLBSEL(0));
+ if (rc)
+ goto scom_fail;
+
+ /* Linear mapping */
+ for (p = a2_tlbinit_code_start; p < a2_tlbinit_after_linear_map; p++) {
+ rc = a2_scom_ram(scom, thread, *p, 0);
+ if (rc)
+ goto scom_fail;
+ }
+
+ /*
+ * For the boot thread, between the linear mapping and the debug
+ * mappings there is a loop to flush iprot mappings. Ramming doesn't do
+ * branches, but the secondary threads don't need to be nearly as smart
+ * (i.e. we don't need to worry about invalidating the mapping we're
+ * standing on).
+ */
+
+ /* Debug mappings. Expects r11 = MAS0 from linear map (set above) */
+ for (p = a2_tlbinit_after_iprot_flush; p < a2_tlbinit_code_end; p++) {
+ rc = a2_scom_ram(scom, thread, *p, 0);
+ if (rc)
+ goto scom_fail;
+ }
+
+scom_fail:
+ if (rc)
+ pr_err("Setting up initial TLB failed, err %d\n", rc);
+
+ if (rc == -SCOM_RAMC_INTERRUPT) {
+ /* Interrupt, dump some status */
+ int rc[10];
+ u64 iar, srr0, srr1, esr, mas0, mas1, mas2, mas7_3, mas8, ccr2;
+ rc[0] = a2_scom_getspr(scom, thread, SPRN_IAR, &iar);
+ rc[1] = a2_scom_getspr(scom, thread, SPRN_SRR0, &srr0);
+ rc[2] = a2_scom_getspr(scom, thread, SPRN_SRR1, &srr1);
+ rc[3] = a2_scom_getspr(scom, thread, SPRN_ESR, &esr);
+ rc[4] = a2_scom_getspr(scom, thread, SPRN_MAS0, &mas0);
+ rc[5] = a2_scom_getspr(scom, thread, SPRN_MAS1, &mas1);
+ rc[6] = a2_scom_getspr(scom, thread, SPRN_MAS2, &mas2);
+ rc[7] = a2_scom_getspr(scom, thread, SPRN_MAS7_MAS3, &mas7_3);
+ rc[8] = a2_scom_getspr(scom, thread, SPRN_MAS8, &mas8);
+ rc[9] = a2_scom_getspr(scom, thread, SPRN_A2_CCR2, &ccr2);
+ pr_err(" -> retreived IAR =0x%llx (err %d)\n", iar, rc[0]);
+ pr_err(" retreived SRR0=0x%llx (err %d)\n", srr0, rc[1]);
+ pr_err(" retreived SRR1=0x%llx (err %d)\n", srr1, rc[2]);
+ pr_err(" retreived ESR =0x%llx (err %d)\n", esr, rc[3]);
+ pr_err(" retreived MAS0=0x%llx (err %d)\n", mas0, rc[4]);
+ pr_err(" retreived MAS1=0x%llx (err %d)\n", mas1, rc[5]);
+ pr_err(" retreived MAS2=0x%llx (err %d)\n", mas2, rc[6]);
+ pr_err(" retreived MS73=0x%llx (err %d)\n", mas7_3, rc[7]);
+ pr_err(" retreived MAS8=0x%llx (err %d)\n", mas8, rc[8]);
+ pr_err(" retreived CCR2=0x%llx (err %d)\n", ccr2, rc[9]);
+ }
+
+ return rc;
+}
+
+int __devinit a2_scom_startup_cpu(unsigned int lcpu, int thr_idx,
+ struct device_node *np)
+{
+ u64 init_iar, init_msr, init_ccr2;
+ unsigned long start_here;
+ int rc, core_setup;
+ scom_map_t scom;
+ u64 pccr0;
+
+ scom = get_scom(lcpu, np, &core_setup);
+ if (!scom) {
+ printk(KERN_ERR "Couldn't map SCOM for CPU%d\n", lcpu);
+ return -1;
+ }
+
+ pr_devel("Bringing up CPU%d using SCOM...\n", lcpu);
+
+ pccr0 = scom_read(scom, SCOM_PCCR0);
+ scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG |
+ SCOM_PCCR0_ENABLE_RAM);
+
+ /* Stop the thead with THRCTL. If we are setting up the TLB we stop all
+ * threads. We also disable asynchronous interrupts while RAMing.
+ */
+ if (core_setup)
+ scom_write(scom, SCOM_THRCTL_OR,
+ SCOM_THRCTL_T0_STOP |
+ SCOM_THRCTL_T1_STOP |
+ SCOM_THRCTL_T2_STOP |
+ SCOM_THRCTL_T3_STOP |
+ SCOM_THRCTL_ASYNC_DIS);
+ else
+ scom_write(scom, SCOM_THRCTL_OR, SCOM_THRCTL_T0_STOP >> thr_idx);
+
+ /* Flush its pipeline just in case */
+ scom_write(scom, SCOM_RAMC, ((u64)thr_idx << 17) |
+ SCOM_RAMC_FLUSH | SCOM_RAMC_ENABLE);
+
+ a2_scom_getspr(scom, thr_idx, SPRN_IAR, &init_iar);
+ a2_scom_getspr(scom, thr_idx, 0x0ff0, &init_msr);
+ a2_scom_getspr(scom, thr_idx, SPRN_A2_CCR2, &init_ccr2);
+
+ /* Set MSR to MSR_CM (0x0ff0 is magic value for MSR_CM) */
+ rc = a2_scom_setspr(scom, thr_idx, 0x0ff0, MSR_CM);
+ if (rc) {
+ pr_err("Failed to set MSR ! err %d\n", rc);
+ return rc;
+ }
+
+ /* RAM in an sync/isync for the sake of it */
+ a2_scom_ram(scom, thr_idx, 0x7c0004ac, 0);
+ a2_scom_ram(scom, thr_idx, 0x4c00012c, 0);
+
+ if (core_setup) {
+ pr_devel("CPU%d is first thread in core, initializing TLB...\n",
+ lcpu);
+ rc = a2_scom_initial_tlb(scom, thr_idx);
+ if (rc)
+ goto fail;
+ }
+
+ start_here = *(unsigned long *)(core_setup ? generic_secondary_smp_init
+ : generic_secondary_thread_init);
+ pr_devel("CPU%d entry point at 0x%lx...\n", lcpu, start_here);
+
+ rc |= a2_scom_setspr(scom, thr_idx, SPRN_IAR, start_here);
+ rc |= a2_scom_setgpr(scom, thr_idx, 3, 0,
+ get_hard_smp_processor_id(lcpu));
+ /*
+ * Tell book3e_secondary_core_init not to set up the TLB, we've
+ * already done that.
+ */
+ rc |= a2_scom_setgpr(scom, thr_idx, 4, 0, 1);
+
+ rc |= a2_scom_setspr(scom, thr_idx, SPRN_TENS, 0x1 << thr_idx);
+
+ scom_write(scom, SCOM_RAMC, 0);
+ scom_write(scom, SCOM_THRCTL_AND, ~(SCOM_THRCTL_T0_STOP >> thr_idx));
+ scom_write(scom, SCOM_PCCR0, pccr0);
+fail:
+ pr_devel(" SCOM initialization %s\n", rc ? "failed" : "succeeded");
+ if (rc) {
+ pr_err("Old IAR=0x%08llx MSR=0x%08llx CCR2=0x%08llx\n",
+ init_iar, init_msr, init_ccr2);
+ }
+
+ return rc;
+}
diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c
new file mode 100644
index 00000000000..4052e2259f3
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/scom_wsp.c
@@ -0,0 +1,77 @@
+/*
+ * SCOM backend for WSP
+ *
+ * Copyright 2010 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/cputhreads.h>
+#include <asm/reg_a2.h>
+#include <asm/scom.h>
+#include <asm/udbg.h>
+
+#include "wsp.h"
+
+
+static scom_map_t wsp_scom_map(struct device_node *dev, u64 reg, u64 count)
+{
+ struct resource r;
+ u64 xscom_addr;
+
+ if (!of_get_property(dev, "scom-controller", NULL)) {
+ pr_err("%s: device %s is not a SCOM controller\n",
+ __func__, dev->full_name);
+ return SCOM_MAP_INVALID;
+ }
+
+ if (of_address_to_resource(dev, 0, &r)) {
+ pr_debug("Failed to find SCOM controller address\n");
+ return 0;
+ }
+
+ /* Transform the SCOM address into an XSCOM offset */
+ xscom_addr = ((reg & 0x7f000000) >> 1) | ((reg & 0xfffff) << 3);
+
+ return (scom_map_t)ioremap(r.start + xscom_addr, count << 3);
+}
+
+static void wsp_scom_unmap(scom_map_t map)
+{
+ iounmap((void *)map);
+}
+
+static u64 wsp_scom_read(scom_map_t map, u32 reg)
+{
+ u64 __iomem *addr = (u64 __iomem *)map;
+
+ return in_be64(addr + reg);
+}
+
+static void wsp_scom_write(scom_map_t map, u32 reg, u64 value)
+{
+ u64 __iomem *addr = (u64 __iomem *)map;
+
+ return out_be64(addr + reg, value);
+}
+
+static const struct scom_controller wsp_scom_controller = {
+ .map = wsp_scom_map,
+ .unmap = wsp_scom_unmap,
+ .read = wsp_scom_read,
+ .write = wsp_scom_write
+};
+
+void scom_init_wsp(void)
+{
+ scom_init(&wsp_scom_controller);
+}
diff --git a/arch/powerpc/platforms/wsp/setup.c b/arch/powerpc/platforms/wsp/setup.c
new file mode 100644
index 00000000000..11ac2f05e01
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/setup.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2010 Michael Ellerman, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+
+#include "wsp.h"
+
+/*
+ * Find chip-id by walking up device tree looking for ibm,wsp-chip-id property.
+ * Won't work for nodes that are not a descendant of a wsp node.
+ */
+int wsp_get_chip_id(struct device_node *dn)
+{
+ const u32 *p;
+ int rc;
+
+ /* Start looking at the specified node, not its parent */
+ dn = of_node_get(dn);
+ while (dn && !(p = of_get_property(dn, "ibm,wsp-chip-id", NULL)))
+ dn = of_get_next_parent(dn);
+
+ if (!dn)
+ return -1;
+
+ rc = *p;
+ of_node_put(dn);
+
+ return rc;
+}
diff --git a/arch/powerpc/platforms/wsp/smp.c b/arch/powerpc/platforms/wsp/smp.c
new file mode 100644
index 00000000000..9d20fa9d371
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/smp.c
@@ -0,0 +1,88 @@
+/*
+ * SMP Support for A2 platforms
+ *
+ * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/smp.h>
+
+#include <asm/dbell.h>
+#include <asm/machdep.h>
+#include <asm/xics.h>
+
+#include "ics.h"
+#include "wsp.h"
+
+static void __devinit smp_a2_setup_cpu(int cpu)
+{
+ doorbell_setup_this_cpu();
+
+ if (cpu != boot_cpuid)
+ xics_setup_cpu();
+}
+
+int __devinit smp_a2_kick_cpu(int nr)
+{
+ const char *enable_method;
+ struct device_node *np;
+ int thr_idx;
+
+ if (nr < 0 || nr >= NR_CPUS)
+ return -ENOENT;
+
+ np = of_get_cpu_node(nr, &thr_idx);
+ if (!np)
+ return -ENODEV;
+
+ enable_method = of_get_property(np, "enable-method", NULL);
+ pr_devel("CPU%d has enable-method: \"%s\"\n", nr, enable_method);
+
+ if (!enable_method) {
+ printk(KERN_ERR "CPU%d has no enable-method\n", nr);
+ return -ENOENT;
+ } else if (strcmp(enable_method, "ibm,a2-scom") == 0) {
+ if (a2_scom_startup_cpu(nr, thr_idx, np))
+ return -1;
+ } else {
+ printk(KERN_ERR "CPU%d: Don't understand enable-method \"%s\"\n",
+ nr, enable_method);
+ return -EINVAL;
+ }
+
+ /*
+ * The processor is currently spinning, waiting for the
+ * cpu_start field to become non-zero After we set cpu_start,
+ * the processor will continue on to secondary_start
+ */
+ paca[nr].cpu_start = 1;
+
+ return 0;
+}
+
+static int __init smp_a2_probe(void)
+{
+ return cpus_weight(cpu_possible_map);
+}
+
+static struct smp_ops_t a2_smp_ops = {
+ .message_pass = smp_muxed_ipi_message_pass,
+ .cause_ipi = doorbell_cause_ipi,
+ .probe = smp_a2_probe,
+ .kick_cpu = smp_a2_kick_cpu,
+ .setup_cpu = smp_a2_setup_cpu,
+};
+
+void __init a2_setup_smp(void)
+{
+ smp_ops = &a2_smp_ops;
+}
diff --git a/arch/powerpc/platforms/wsp/wsp.h b/arch/powerpc/platforms/wsp/wsp.h
new file mode 100644
index 00000000000..7c3e087fd2f
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/wsp.h
@@ -0,0 +1,17 @@
+#ifndef __WSP_H
+#define __WSP_H
+
+#include <asm/wsp.h>
+
+extern void wsp_setup_pci(void);
+extern void scom_init_wsp(void);
+
+extern void a2_setup_smp(void);
+extern int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx,
+ struct device_node *np);
+int smp_a2_cpu_bootable(unsigned int nr);
+int __devinit smp_a2_kick_cpu(int nr);
+
+void opb_pic_init(void);
+
+#endif /* __WSP_H */
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index 396582835cb..d775fd148d1 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -12,3 +12,13 @@ config PPC_MSI_BITMAP
depends on PCI_MSI
default y if MPIC
default y if FSL_PCI
+
+source "arch/powerpc/sysdev/xics/Kconfig"
+
+config PPC_SCOM
+ bool
+
+config SCOM_DEBUGFS
+ bool "Expose SCOM controllers via debugfs"
+ depends on PPC_SCOM
+ default n
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 1e0c933ef77..6076e0074a8 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -57,3 +57,9 @@ obj-$(CONFIG_PPC_MPC52xx) += mpc5xxx_clocks.o
ifeq ($(CONFIG_SUSPEND),y)
obj-$(CONFIG_6xx) += 6xx-suspend.o
endif
+
+obj-$(CONFIG_PPC_SCOM) += scom.o
+
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+obj-$(CONFIG_PPC_XICS) += xics/
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 1636dd89670..bd0d54060b9 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -216,7 +216,7 @@ static int axon_ram_probe(struct platform_device *device)
AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20);
bank->ph_addr = resource.start;
- bank->io_addr = (unsigned long) ioremap_flags(
+ bank->io_addr = (unsigned long) ioremap_prot(
bank->ph_addr, bank->size, _PAGE_NO_CACHE);
if (bank->io_addr == 0) {
dev_err(&device->dev, "ioremap() failed\n");
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index e0bc944eb23..350787c83e2 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -58,21 +58,21 @@ static struct irq_host *cpm_pic_host;
static void cpm_mask_irq(struct irq_data *d)
{
- unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
clrbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec));
}
static void cpm_unmask_irq(struct irq_data *d)
{
- unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
setbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec));
}
static void cpm_end_irq(struct irq_data *d)
{
- unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
out_be32(&cpic_reg->cpic_cisr, (1 << cpm_vec));
}
@@ -157,7 +157,7 @@ unsigned int cpm_pic_init(void)
goto end;
/* Initialize the CPM interrupt controller. */
- hwirq = (unsigned int)irq_map[sirq].hwirq;
+ hwirq = (unsigned int)virq_to_hw(sirq);
out_be32(&cpic_reg->cpic_cicr,
(CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) |
((hwirq/2) << 13) | CICR_HP_MASK);
diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c
index 5495c1be472..bcab50e2a9e 100644
--- a/arch/powerpc/sysdev/cpm2_pic.c
+++ b/arch/powerpc/sysdev/cpm2_pic.c
@@ -81,7 +81,7 @@ static const u_char irq_to_siubit[] = {
static void cpm2_mask_irq(struct irq_data *d)
{
int bit, word;
- unsigned int irq_nr = virq_to_hw(d->irq);
+ unsigned int irq_nr = irqd_to_hwirq(d);
bit = irq_to_siubit[irq_nr];
word = irq_to_siureg[irq_nr];
@@ -93,7 +93,7 @@ static void cpm2_mask_irq(struct irq_data *d)
static void cpm2_unmask_irq(struct irq_data *d)
{
int bit, word;
- unsigned int irq_nr = virq_to_hw(d->irq);
+ unsigned int irq_nr = irqd_to_hwirq(d);
bit = irq_to_siubit[irq_nr];
word = irq_to_siureg[irq_nr];
@@ -105,7 +105,7 @@ static void cpm2_unmask_irq(struct irq_data *d)
static void cpm2_ack(struct irq_data *d)
{
int bit, word;
- unsigned int irq_nr = virq_to_hw(d->irq);
+ unsigned int irq_nr = irqd_to_hwirq(d);
bit = irq_to_siubit[irq_nr];
word = irq_to_siureg[irq_nr];
@@ -116,7 +116,7 @@ static void cpm2_ack(struct irq_data *d)
static void cpm2_end_irq(struct irq_data *d)
{
int bit, word;
- unsigned int irq_nr = virq_to_hw(d->irq);
+ unsigned int irq_nr = irqd_to_hwirq(d);
bit = irq_to_siubit[irq_nr];
word = irq_to_siureg[irq_nr];
@@ -133,7 +133,7 @@ static void cpm2_end_irq(struct irq_data *d)
static int cpm2_set_irq_type(struct irq_data *d, unsigned int flow_type)
{
- unsigned int src = virq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned int vold, vnew, edibit;
/* Port C interrupts are either IRQ_TYPE_EDGE_FALLING or
diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
index 54fb1922fe3..11641589917 100644
--- a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
+++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
@@ -106,10 +106,10 @@ int __init instantiate_cache_sram(struct platform_device *dev,
goto out_free;
}
- cache_sram->base_virt = ioremap_flags(cache_sram->base_phys,
+ cache_sram->base_virt = ioremap_prot(cache_sram->base_phys,
cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL);
if (!cache_sram->base_virt) {
- dev_err(&dev->dev, "%s: ioremap_flags failed\n",
+ dev_err(&dev->dev, "%s: ioremap_prot failed\n",
dev->dev.of_node->full_name);
ret = -ENOMEM;
goto out_release;
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 01cd2f08951..92e78333c47 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -110,7 +110,7 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev)
list_for_each_entry(entry, &pdev->msi_list, list) {
if (entry->irq == NO_IRQ)
continue;
- msi_data = irq_get_handler_data(entry->irq);
+ msi_data = irq_get_chip_data(entry->irq);
irq_set_msi_desc(entry->irq, NULL);
msi_bitmap_free_hwirqs(&msi_data->bitmap,
virq_to_hw(entry->irq), 1);
@@ -168,7 +168,7 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
rc = -ENOSPC;
goto out_free;
}
- irq_set_handler_data(virq, msi_data);
+ /* chip_data is msi_data via host->hostdata in host->map() */
irq_set_msi_desc(virq, entry);
fsl_compose_msi_msg(pdev, hwirq, &msg, msi_data);
@@ -193,7 +193,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
u32 have_shift = 0;
struct fsl_msi_cascade_data *cascade_data;
- cascade_data = (struct fsl_msi_cascade_data *)irq_get_handler_data(irq);
+ cascade_data = irq_get_handler_data(irq);
msi_data = cascade_data->msi_data;
raw_spin_lock(&desc->lock);
@@ -253,7 +253,7 @@ unlock:
static int fsl_of_msi_remove(struct platform_device *ofdev)
{
- struct fsl_msi *msi = ofdev->dev.platform_data;
+ struct fsl_msi *msi = platform_get_drvdata(ofdev);
int virq, i;
struct fsl_msi_cascade_data *cascade_data;
@@ -330,7 +330,7 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev)
dev_err(&dev->dev, "No memory for MSI structure\n");
return -ENOMEM;
}
- dev->dev.platform_data = msi;
+ platform_set_drvdata(dev, msi);
msi->irqhost = irq_alloc_host(dev->dev.of_node, IRQ_HOST_MAP_LINEAR,
NR_MSI_IRQS, &fsl_msi_host_ops, 0);
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 142770cb84b..d18bb27e4df 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -185,18 +185,6 @@ static int i8259_host_map(struct irq_host *h, unsigned int virq,
return 0;
}
-static void i8259_host_unmap(struct irq_host *h, unsigned int virq)
-{
- /* Make sure irq is masked in hardware */
- i8259_mask_irq(irq_get_irq_data(virq));
-
- /* remove chip and handler */
- irq_set_chip_and_handler(virq, NULL, NULL);
-
- /* Make sure it's completed */
- synchronize_irq(virq);
-}
-
static int i8259_host_xlate(struct irq_host *h, struct device_node *ct,
const u32 *intspec, unsigned int intsize,
irq_hw_number_t *out_hwirq, unsigned int *out_flags)
@@ -220,7 +208,6 @@ static int i8259_host_xlate(struct irq_host *h, struct device_node *ct,
static struct irq_host_ops i8259_host_ops = {
.match = i8259_host_match,
.map = i8259_host_map,
- .unmap = i8259_host_unmap,
.xlate = i8259_host_xlate,
};
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index 596554a8725..7367d17364c 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -521,12 +521,10 @@ static inline struct ipic * ipic_from_irq(unsigned int virq)
return primary_ipic;
}
-#define ipic_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq)
-
static void ipic_unmask_irq(struct irq_data *d)
{
struct ipic *ipic = ipic_from_irq(d->irq);
- unsigned int src = ipic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned long flags;
u32 temp;
@@ -542,7 +540,7 @@ static void ipic_unmask_irq(struct irq_data *d)
static void ipic_mask_irq(struct irq_data *d)
{
struct ipic *ipic = ipic_from_irq(d->irq);
- unsigned int src = ipic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned long flags;
u32 temp;
@@ -562,7 +560,7 @@ static void ipic_mask_irq(struct irq_data *d)
static void ipic_ack_irq(struct irq_data *d)
{
struct ipic *ipic = ipic_from_irq(d->irq);
- unsigned int src = ipic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned long flags;
u32 temp;
@@ -581,7 +579,7 @@ static void ipic_ack_irq(struct irq_data *d)
static void ipic_mask_irq_and_ack(struct irq_data *d)
{
struct ipic *ipic = ipic_from_irq(d->irq);
- unsigned int src = ipic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned long flags;
u32 temp;
@@ -604,7 +602,7 @@ static void ipic_mask_irq_and_ack(struct irq_data *d)
static int ipic_set_irq_type(struct irq_data *d, unsigned int flow_type)
{
struct ipic *ipic = ipic_from_irq(d->irq);
- unsigned int src = ipic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned int vold, vnew, edibit;
if (flow_type == IRQ_TYPE_NONE)
@@ -793,7 +791,7 @@ struct ipic * __init ipic_init(struct device_node *node, unsigned int flags)
int ipic_set_priority(unsigned int virq, unsigned int priority)
{
struct ipic *ipic = ipic_from_irq(virq);
- unsigned int src = ipic_irq_to_hw(virq);
+ unsigned int src = virq_to_hw(virq);
u32 temp;
if (priority > 7)
@@ -821,7 +819,7 @@ int ipic_set_priority(unsigned int virq, unsigned int priority)
void ipic_set_highest_priority(unsigned int virq)
{
struct ipic *ipic = ipic_from_irq(virq);
- unsigned int src = ipic_irq_to_hw(virq);
+ unsigned int src = virq_to_hw(virq);
u32 temp;
temp = ipic_read(ipic->regs, IPIC_SICFR);
diff --git a/arch/powerpc/sysdev/mmio_nvram.c b/arch/powerpc/sysdev/mmio_nvram.c
index 20732420906..ddc877a3a23 100644
--- a/arch/powerpc/sysdev/mmio_nvram.c
+++ b/arch/powerpc/sysdev/mmio_nvram.c
@@ -115,6 +115,8 @@ int __init mmio_nvram_init(void)
int ret;
nvram_node = of_find_node_by_type(NULL, "nvram");
+ if (!nvram_node)
+ nvram_node = of_find_compatible_node(NULL, NULL, "nvram");
if (!nvram_node) {
printk(KERN_WARNING "nvram: no node found in device-tree\n");
return -ENODEV;
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c
index a88800ff4d0..20924f2246f 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.c
+++ b/arch/powerpc/sysdev/mpc8xx_pic.c
@@ -28,7 +28,7 @@ int cpm_get_irq(struct pt_regs *regs);
static void mpc8xx_unmask_irq(struct irq_data *d)
{
int bit, word;
- unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
bit = irq_nr & 0x1f;
word = irq_nr >> 5;
@@ -40,7 +40,7 @@ static void mpc8xx_unmask_irq(struct irq_data *d)
static void mpc8xx_mask_irq(struct irq_data *d)
{
int bit, word;
- unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
bit = irq_nr & 0x1f;
word = irq_nr >> 5;
@@ -52,7 +52,7 @@ static void mpc8xx_mask_irq(struct irq_data *d)
static void mpc8xx_ack(struct irq_data *d)
{
int bit;
- unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
bit = irq_nr & 0x1f;
out_be32(&siu_reg->sc_sipend, 1 << (31-bit));
@@ -61,7 +61,7 @@ static void mpc8xx_ack(struct irq_data *d)
static void mpc8xx_end_irq(struct irq_data *d)
{
int bit, word;
- unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+ unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
bit = irq_nr & 0x1f;
word = irq_nr >> 5;
@@ -73,7 +73,7 @@ static void mpc8xx_end_irq(struct irq_data *d)
static int mpc8xx_set_irq_type(struct irq_data *d, unsigned int flow_type)
{
if (flow_type & IRQ_TYPE_EDGE_FALLING) {
- irq_hw_number_t hw = (unsigned int)irq_map[d->irq].hwirq;
+ irq_hw_number_t hw = (unsigned int)irqd_to_hwirq(d);
unsigned int siel = in_be32(&siu_reg->sc_siel);
/* only external IRQ senses are programmable */
diff --git a/arch/powerpc/sysdev/mpc8xxx_gpio.c b/arch/powerpc/sysdev/mpc8xxx_gpio.c
index 0892a2841c2..fb4963abdf5 100644
--- a/arch/powerpc/sysdev/mpc8xxx_gpio.c
+++ b/arch/powerpc/sysdev/mpc8xxx_gpio.c
@@ -163,7 +163,7 @@ static void mpc8xxx_irq_unmask(struct irq_data *d)
spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
- setbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+ setbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
}
@@ -176,7 +176,7 @@ static void mpc8xxx_irq_mask(struct irq_data *d)
spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
- clrbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+ clrbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
}
@@ -186,7 +186,7 @@ static void mpc8xxx_irq_ack(struct irq_data *d)
struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d);
struct of_mm_gpio_chip *mm = &mpc8xxx_gc->mm_gc;
- out_be32(mm->regs + GPIO_IER, mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+ out_be32(mm->regs + GPIO_IER, mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
}
static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type)
@@ -199,14 +199,14 @@ static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type)
case IRQ_TYPE_EDGE_FALLING:
spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
setbits32(mm->regs + GPIO_ICR,
- mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+ mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
break;
case IRQ_TYPE_EDGE_BOTH:
spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
clrbits32(mm->regs + GPIO_ICR,
- mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+ mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
break;
@@ -221,7 +221,7 @@ static int mpc512x_irq_set_type(struct irq_data *d, unsigned int flow_type)
{
struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d);
struct of_mm_gpio_chip *mm = &mpc8xxx_gc->mm_gc;
- unsigned long gpio = virq_to_hw(d->irq);
+ unsigned long gpio = irqd_to_hwirq(d);
void __iomem *reg;
unsigned int shift;
unsigned long flags;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 7e5dc8f4984..3a8de5bb628 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -6,6 +6,7 @@
* with various broken implementations of this HW.
*
* Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of this archive
@@ -219,6 +220,28 @@ static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 valu
_mpic_write(mpic->reg_type, &mpic->gregs, offset, value);
}
+static inline u32 _mpic_tm_read(struct mpic *mpic, unsigned int tm)
+{
+ unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) +
+ ((tm & 3) * MPIC_INFO(TIMER_STRIDE));
+
+ if (tm >= 4)
+ offset += 0x1000 / 4;
+
+ return _mpic_read(mpic->reg_type, &mpic->tmregs, offset);
+}
+
+static inline void _mpic_tm_write(struct mpic *mpic, unsigned int tm, u32 value)
+{
+ unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) +
+ ((tm & 3) * MPIC_INFO(TIMER_STRIDE));
+
+ if (tm >= 4)
+ offset += 0x1000 / 4;
+
+ _mpic_write(mpic->reg_type, &mpic->tmregs, offset, value);
+}
+
static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg)
{
unsigned int cpu = mpic_processor_id(mpic);
@@ -269,6 +292,8 @@ static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no,
#define mpic_write(b,r,v) _mpic_write(mpic->reg_type,&(b),(r),(v))
#define mpic_ipi_read(i) _mpic_ipi_read(mpic,(i))
#define mpic_ipi_write(i,v) _mpic_ipi_write(mpic,(i),(v))
+#define mpic_tm_read(i) _mpic_tm_read(mpic,(i))
+#define mpic_tm_write(i,v) _mpic_tm_write(mpic,(i),(v))
#define mpic_cpu_read(i) _mpic_cpu_read(mpic,(i))
#define mpic_cpu_write(i,v) _mpic_cpu_write(mpic,(i),(v))
#define mpic_irq_read(s,r) _mpic_irq_read(mpic,(s),(r))
@@ -608,8 +633,6 @@ static int irq_choose_cpu(const struct cpumask *mask)
}
#endif
-#define mpic_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq)
-
/* Find an mpic associated with a given linux interrupt */
static struct mpic *mpic_find(unsigned int irq)
{
@@ -622,11 +645,18 @@ static struct mpic *mpic_find(unsigned int irq)
/* Determine if the linux irq is an IPI */
static unsigned int mpic_is_ipi(struct mpic *mpic, unsigned int irq)
{
- unsigned int src = mpic_irq_to_hw(irq);
+ unsigned int src = virq_to_hw(irq);
return (src >= mpic->ipi_vecs[0] && src <= mpic->ipi_vecs[3]);
}
+/* Determine if the linux irq is a timer */
+static unsigned int mpic_is_tm(struct mpic *mpic, unsigned int irq)
+{
+ unsigned int src = virq_to_hw(irq);
+
+ return (src >= mpic->timer_vecs[0] && src <= mpic->timer_vecs[7]);
+}
/* Convert a cpu mask from logical to physical cpu numbers. */
static inline u32 mpic_physmask(u32 cpumask)
@@ -634,7 +664,7 @@ static inline u32 mpic_physmask(u32 cpumask)
int i;
u32 mask = 0;
- for (i = 0; i < NR_CPUS; ++i, cpumask >>= 1)
+ for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1)
mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
return mask;
}
@@ -675,7 +705,7 @@ void mpic_unmask_irq(struct irq_data *d)
{
unsigned int loops = 100000;
struct mpic *mpic = mpic_from_irq_data(d);
- unsigned int src = mpic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, d->irq, src);
@@ -696,7 +726,7 @@ void mpic_mask_irq(struct irq_data *d)
{
unsigned int loops = 100000;
struct mpic *mpic = mpic_from_irq_data(d);
- unsigned int src = mpic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
DBG("%s: disable_irq: %d (src %d)\n", mpic->name, d->irq, src);
@@ -734,7 +764,7 @@ void mpic_end_irq(struct irq_data *d)
static void mpic_unmask_ht_irq(struct irq_data *d)
{
struct mpic *mpic = mpic_from_irq_data(d);
- unsigned int src = mpic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
mpic_unmask_irq(d);
@@ -745,7 +775,7 @@ static void mpic_unmask_ht_irq(struct irq_data *d)
static unsigned int mpic_startup_ht_irq(struct irq_data *d)
{
struct mpic *mpic = mpic_from_irq_data(d);
- unsigned int src = mpic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
mpic_unmask_irq(d);
mpic_startup_ht_interrupt(mpic, src, irqd_is_level_type(d));
@@ -756,7 +786,7 @@ static unsigned int mpic_startup_ht_irq(struct irq_data *d)
static void mpic_shutdown_ht_irq(struct irq_data *d)
{
struct mpic *mpic = mpic_from_irq_data(d);
- unsigned int src = mpic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
mpic_shutdown_ht_interrupt(mpic, src);
mpic_mask_irq(d);
@@ -765,7 +795,7 @@ static void mpic_shutdown_ht_irq(struct irq_data *d)
static void mpic_end_ht_irq(struct irq_data *d)
{
struct mpic *mpic = mpic_from_irq_data(d);
- unsigned int src = mpic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
#ifdef DEBUG_IRQ
DBG("%s: end_irq: %d\n", mpic->name, d->irq);
@@ -786,7 +816,7 @@ static void mpic_end_ht_irq(struct irq_data *d)
static void mpic_unmask_ipi(struct irq_data *d)
{
struct mpic *mpic = mpic_from_ipi(d);
- unsigned int src = mpic_irq_to_hw(d->irq) - mpic->ipi_vecs[0];
+ unsigned int src = virq_to_hw(d->irq) - mpic->ipi_vecs[0];
DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, d->irq, src);
mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK);
@@ -813,27 +843,42 @@ static void mpic_end_ipi(struct irq_data *d)
#endif /* CONFIG_SMP */
+static void mpic_unmask_tm(struct irq_data *d)
+{
+ struct mpic *mpic = mpic_from_irq_data(d);
+ unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0];
+
+ DBG("%s: enable_tm: %d (tm %d)\n", mpic->name, irq, src);
+ mpic_tm_write(src, mpic_tm_read(src) & ~MPIC_VECPRI_MASK);
+ mpic_tm_read(src);
+}
+
+static void mpic_mask_tm(struct irq_data *d)
+{
+ struct mpic *mpic = mpic_from_irq_data(d);
+ unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0];
+
+ mpic_tm_write(src, mpic_tm_read(src) | MPIC_VECPRI_MASK);
+ mpic_tm_read(src);
+}
+
int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
bool force)
{
struct mpic *mpic = mpic_from_irq_data(d);
- unsigned int src = mpic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
if (mpic->flags & MPIC_SINGLE_DEST_CPU) {
int cpuid = irq_choose_cpu(cpumask);
mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 1 << cpuid);
} else {
- cpumask_var_t tmp;
-
- alloc_cpumask_var(&tmp, GFP_KERNEL);
+ u32 mask = cpumask_bits(cpumask)[0];
- cpumask_and(tmp, cpumask, cpu_online_mask);
+ mask &= cpumask_bits(cpu_online_mask)[0];
mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
- mpic_physmask(cpumask_bits(tmp)[0]));
-
- free_cpumask_var(tmp);
+ mpic_physmask(mask));
}
return 0;
@@ -863,7 +908,7 @@ static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type)
{
struct mpic *mpic = mpic_from_irq_data(d);
- unsigned int src = mpic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned int vecpri, vold, vnew;
DBG("mpic: set_irq_type(mpic:@%p,virq:%d,src:0x%x,type:0x%x)\n",
@@ -899,7 +944,7 @@ int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type)
void mpic_set_vector(unsigned int virq, unsigned int vector)
{
struct mpic *mpic = mpic_from_irq(virq);
- unsigned int src = mpic_irq_to_hw(virq);
+ unsigned int src = virq_to_hw(virq);
unsigned int vecpri;
DBG("mpic: set_vector(mpic:@%p,virq:%d,src:%d,vector:0x%x)\n",
@@ -917,7 +962,7 @@ void mpic_set_vector(unsigned int virq, unsigned int vector)
void mpic_set_destination(unsigned int virq, unsigned int cpuid)
{
struct mpic *mpic = mpic_from_irq(virq);
- unsigned int src = mpic_irq_to_hw(virq);
+ unsigned int src = virq_to_hw(virq);
DBG("mpic: set_destination(mpic:@%p,virq:%d,src:%d,cpuid:0x%x)\n",
mpic, virq, src, cpuid);
@@ -943,6 +988,12 @@ static struct irq_chip mpic_ipi_chip = {
};
#endif /* CONFIG_SMP */
+static struct irq_chip mpic_tm_chip = {
+ .irq_mask = mpic_mask_tm,
+ .irq_unmask = mpic_unmask_tm,
+ .irq_eoi = mpic_end_irq,
+};
+
#ifdef CONFIG_MPIC_U3_HT_IRQS
static struct irq_chip mpic_irq_ht_chip = {
.irq_startup = mpic_startup_ht_irq,
@@ -986,6 +1037,16 @@ static int mpic_host_map(struct irq_host *h, unsigned int virq,
}
#endif /* CONFIG_SMP */
+ if (hw >= mpic->timer_vecs[0] && hw <= mpic->timer_vecs[7]) {
+ WARN_ON(!(mpic->flags & MPIC_PRIMARY));
+
+ DBG("mpic: mapping as timer\n");
+ irq_set_chip_data(virq, mpic);
+ irq_set_chip_and_handler(virq, &mpic->hc_tm,
+ handle_fasteoi_irq);
+ return 0;
+ }
+
if (hw >= mpic->irq_count)
return -EINVAL;
@@ -1026,6 +1087,7 @@ static int mpic_host_xlate(struct irq_host *h, struct device_node *ct,
irq_hw_number_t *out_hwirq, unsigned int *out_flags)
{
+ struct mpic *mpic = h->host_data;
static unsigned char map_mpic_senses[4] = {
IRQ_TYPE_EDGE_RISING,
IRQ_TYPE_LEVEL_LOW,
@@ -1034,7 +1096,38 @@ static int mpic_host_xlate(struct irq_host *h, struct device_node *ct,
};
*out_hwirq = intspec[0];
- if (intsize > 1) {
+ if (intsize >= 4 && (mpic->flags & MPIC_FSL)) {
+ /*
+ * Freescale MPIC with extended intspec:
+ * First two cells are as usual. Third specifies
+ * an "interrupt type". Fourth is type-specific data.
+ *
+ * See Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
+ */
+ switch (intspec[2]) {
+ case 0:
+ case 1: /* no EISR/EIMR support for now, treat as shared IRQ */
+ break;
+ case 2:
+ if (intspec[0] >= ARRAY_SIZE(mpic->ipi_vecs))
+ return -EINVAL;
+
+ *out_hwirq = mpic->ipi_vecs[intspec[0]];
+ break;
+ case 3:
+ if (intspec[0] >= ARRAY_SIZE(mpic->timer_vecs))
+ return -EINVAL;
+
+ *out_hwirq = mpic->timer_vecs[intspec[0]];
+ break;
+ default:
+ pr_debug("%s: unknown irq type %u\n",
+ __func__, intspec[2]);
+ return -EINVAL;
+ }
+
+ *out_flags = map_mpic_senses[intspec[1] & 3];
+ } else if (intsize > 1) {
u32 mask = 0x3;
/* Apple invented a new race of encoding on machines with
@@ -1110,6 +1203,9 @@ struct mpic * __init mpic_alloc(struct device_node *node,
mpic->hc_ipi.name = name;
#endif /* CONFIG_SMP */
+ mpic->hc_tm = mpic_tm_chip;
+ mpic->hc_tm.name = name;
+
mpic->flags = flags;
mpic->isu_size = isu_size;
mpic->irq_count = irq_count;
@@ -1120,10 +1216,14 @@ struct mpic * __init mpic_alloc(struct device_node *node,
else
intvec_top = 255;
- mpic->timer_vecs[0] = intvec_top - 8;
- mpic->timer_vecs[1] = intvec_top - 7;
- mpic->timer_vecs[2] = intvec_top - 6;
- mpic->timer_vecs[3] = intvec_top - 5;
+ mpic->timer_vecs[0] = intvec_top - 12;
+ mpic->timer_vecs[1] = intvec_top - 11;
+ mpic->timer_vecs[2] = intvec_top - 10;
+ mpic->timer_vecs[3] = intvec_top - 9;
+ mpic->timer_vecs[4] = intvec_top - 8;
+ mpic->timer_vecs[5] = intvec_top - 7;
+ mpic->timer_vecs[6] = intvec_top - 6;
+ mpic->timer_vecs[7] = intvec_top - 5;
mpic->ipi_vecs[0] = intvec_top - 4;
mpic->ipi_vecs[1] = intvec_top - 3;
mpic->ipi_vecs[2] = intvec_top - 2;
@@ -1133,6 +1233,8 @@ struct mpic * __init mpic_alloc(struct device_node *node,
/* Check for "big-endian" in device-tree */
if (node && of_get_property(node, "big-endian", NULL) != NULL)
mpic->flags |= MPIC_BIG_ENDIAN;
+ if (node && of_device_is_compatible(node, "fsl,mpic"))
+ mpic->flags |= MPIC_FSL;
/* Look for protected sources */
if (node) {
@@ -1324,15 +1426,17 @@ void __init mpic_init(struct mpic *mpic)
/* Set current processor priority to max */
mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf);
- /* Initialize timers: just disable them all */
+ /* Initialize timers to our reserved vectors and mask them for now */
for (i = 0; i < 4; i++) {
mpic_write(mpic->tmregs,
i * MPIC_INFO(TIMER_STRIDE) +
- MPIC_INFO(TIMER_DESTINATION), 0);
+ MPIC_INFO(TIMER_DESTINATION),
+ 1 << hard_smp_processor_id());
mpic_write(mpic->tmregs,
i * MPIC_INFO(TIMER_STRIDE) +
MPIC_INFO(TIMER_VECTOR_PRI),
MPIC_VECPRI_MASK |
+ (9 << MPIC_VECPRI_PRIORITY_SHIFT) |
(mpic->timer_vecs[0] + i));
}
@@ -1428,7 +1532,7 @@ void __init mpic_set_serial_int(struct mpic *mpic, int enable)
void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
{
struct mpic *mpic = mpic_find(irq);
- unsigned int src = mpic_irq_to_hw(irq);
+ unsigned int src = virq_to_hw(irq);
unsigned long flags;
u32 reg;
@@ -1441,6 +1545,11 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
~MPIC_VECPRI_PRIORITY_MASK;
mpic_ipi_write(src - mpic->ipi_vecs[0],
reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+ } else if (mpic_is_tm(mpic, irq)) {
+ reg = mpic_tm_read(src - mpic->timer_vecs[0]) &
+ ~MPIC_VECPRI_PRIORITY_MASK;
+ mpic_tm_write(src - mpic->timer_vecs[0],
+ reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
} else {
reg = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI))
& ~MPIC_VECPRI_PRIORITY_MASK;
@@ -1620,46 +1729,28 @@ void mpic_request_ipis(void)
}
}
-static void mpic_send_ipi(unsigned int ipi_no, const struct cpumask *cpu_mask)
+void smp_mpic_message_pass(int cpu, int msg)
{
struct mpic *mpic = mpic_primary;
+ u32 physmask;
BUG_ON(mpic == NULL);
-#ifdef DEBUG_IPI
- DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no);
-#endif
-
- mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) +
- ipi_no * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE),
- mpic_physmask(cpumask_bits(cpu_mask)[0]));
-}
-
-void smp_mpic_message_pass(int target, int msg)
-{
- cpumask_var_t tmp;
-
/* make sure we're sending something that translates to an IPI */
if ((unsigned int)msg > 3) {
printk("SMP %d: smp_message_pass: unknown msg %d\n",
smp_processor_id(), msg);
return;
}
- switch (target) {
- case MSG_ALL:
- mpic_send_ipi(msg, cpu_online_mask);
- break;
- case MSG_ALL_BUT_SELF:
- alloc_cpumask_var(&tmp, GFP_NOWAIT);
- cpumask_andnot(tmp, cpu_online_mask,
- cpumask_of(smp_processor_id()));
- mpic_send_ipi(msg, tmp);
- free_cpumask_var(tmp);
- break;
- default:
- mpic_send_ipi(msg, cpumask_of(target));
- break;
- }
+
+#ifdef DEBUG_IPI
+ DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, msg);
+#endif
+
+ physmask = 1 << get_hard_smp_processor_id(cpu);
+
+ mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) +
+ msg * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE), physmask);
}
int __init smp_mpic_probe(void)
diff --git a/arch/powerpc/sysdev/mv64x60_pic.c b/arch/powerpc/sysdev/mv64x60_pic.c
index e9c633c7c08..14d130268e7 100644
--- a/arch/powerpc/sysdev/mv64x60_pic.c
+++ b/arch/powerpc/sysdev/mv64x60_pic.c
@@ -78,7 +78,7 @@ static struct irq_host *mv64x60_irq_host;
static void mv64x60_mask_low(struct irq_data *d)
{
- int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+ int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
unsigned long flags;
spin_lock_irqsave(&mv64x60_lock, flags);
@@ -91,7 +91,7 @@ static void mv64x60_mask_low(struct irq_data *d)
static void mv64x60_unmask_low(struct irq_data *d)
{
- int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+ int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
unsigned long flags;
spin_lock_irqsave(&mv64x60_lock, flags);
@@ -115,7 +115,7 @@ static struct irq_chip mv64x60_chip_low = {
static void mv64x60_mask_high(struct irq_data *d)
{
- int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+ int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
unsigned long flags;
spin_lock_irqsave(&mv64x60_lock, flags);
@@ -128,7 +128,7 @@ static void mv64x60_mask_high(struct irq_data *d)
static void mv64x60_unmask_high(struct irq_data *d)
{
- int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+ int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
unsigned long flags;
spin_lock_irqsave(&mv64x60_lock, flags);
@@ -152,7 +152,7 @@ static struct irq_chip mv64x60_chip_high = {
static void mv64x60_mask_gpp(struct irq_data *d)
{
- int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+ int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
unsigned long flags;
spin_lock_irqsave(&mv64x60_lock, flags);
@@ -165,7 +165,7 @@ static void mv64x60_mask_gpp(struct irq_data *d)
static void mv64x60_mask_ack_gpp(struct irq_data *d)
{
- int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+ int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
unsigned long flags;
spin_lock_irqsave(&mv64x60_lock, flags);
@@ -180,7 +180,7 @@ static void mv64x60_mask_ack_gpp(struct irq_data *d)
static void mv64x60_unmask_gpp(struct irq_data *d)
{
- int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+ int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
unsigned long flags;
spin_lock_irqsave(&mv64x60_lock, flags);
diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c
index 832d6924ad1..b2acda07220 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_ic.c
+++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c
@@ -197,12 +197,10 @@ static inline struct qe_ic *qe_ic_from_irq_data(struct irq_data *d)
return irq_data_get_irq_chip_data(d);
}
-#define virq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq)
-
static void qe_ic_unmask_irq(struct irq_data *d)
{
struct qe_ic *qe_ic = qe_ic_from_irq_data(d);
- unsigned int src = virq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned long flags;
u32 temp;
@@ -218,7 +216,7 @@ static void qe_ic_unmask_irq(struct irq_data *d)
static void qe_ic_mask_irq(struct irq_data *d)
{
struct qe_ic *qe_ic = qe_ic_from_irq_data(d);
- unsigned int src = virq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned long flags;
u32 temp;
diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
new file mode 100644
index 00000000000..b2593ce30c9
--- /dev/null
+++ b/arch/powerpc/sysdev/scom.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
+ * <benh@kernel.crashing.org>
+ * and David Gibson, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <asm/prom.h>
+#include <asm/scom.h>
+
+const struct scom_controller *scom_controller;
+EXPORT_SYMBOL_GPL(scom_controller);
+
+struct device_node *scom_find_parent(struct device_node *node)
+{
+ struct device_node *par, *tmp;
+ const u32 *p;
+
+ for (par = of_node_get(node); par;) {
+ if (of_get_property(par, "scom-controller", NULL))
+ break;
+ p = of_get_property(par, "scom-parent", NULL);
+ tmp = par;
+ if (p == NULL)
+ par = of_get_parent(par);
+ else
+ par = of_find_node_by_phandle(*p);
+ of_node_put(tmp);
+ }
+ return par;
+}
+EXPORT_SYMBOL_GPL(scom_find_parent);
+
+scom_map_t scom_map_device(struct device_node *dev, int index)
+{
+ struct device_node *parent;
+ unsigned int cells, size;
+ const u32 *prop;
+ u64 reg, cnt;
+ scom_map_t ret;
+
+ parent = scom_find_parent(dev);
+
+ if (parent == NULL)
+ return 0;
+
+ prop = of_get_property(parent, "#scom-cells", NULL);
+ cells = prop ? *prop : 1;
+
+ prop = of_get_property(dev, "scom-reg", &size);
+ if (!prop)
+ return 0;
+ size >>= 2;
+
+ if (index >= (size / (2*cells)))
+ return 0;
+
+ reg = of_read_number(&prop[index * cells * 2], cells);
+ cnt = of_read_number(&prop[index * cells * 2 + cells], cells);
+
+ ret = scom_map(parent, reg, cnt);
+ of_node_put(parent);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(scom_map_device);
+
+#ifdef CONFIG_SCOM_DEBUGFS
+struct scom_debug_entry {
+ struct device_node *dn;
+ unsigned long addr;
+ scom_map_t map;
+ spinlock_t lock;
+ char name[8];
+ struct debugfs_blob_wrapper blob;
+};
+
+static int scom_addr_set(void *data, u64 val)
+{
+ struct scom_debug_entry *ent = data;
+
+ ent->addr = 0;
+ scom_unmap(ent->map);
+
+ ent->map = scom_map(ent->dn, val, 1);
+ if (scom_map_ok(ent->map))
+ ent->addr = val;
+ else
+ return -EFAULT;
+
+ return 0;
+}
+
+static int scom_addr_get(void *data, u64 *val)
+{
+ struct scom_debug_entry *ent = data;
+ *val = ent->addr;
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(scom_addr_fops, scom_addr_get, scom_addr_set,
+ "0x%llx\n");
+
+static int scom_val_set(void *data, u64 val)
+{
+ struct scom_debug_entry *ent = data;
+
+ if (!scom_map_ok(ent->map))
+ return -EFAULT;
+
+ scom_write(ent->map, 0, val);
+
+ return 0;
+}
+
+static int scom_val_get(void *data, u64 *val)
+{
+ struct scom_debug_entry *ent = data;
+
+ if (!scom_map_ok(ent->map))
+ return -EFAULT;
+
+ *val = scom_read(ent->map, 0);
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(scom_val_fops, scom_val_get, scom_val_set,
+ "0x%llx\n");
+
+static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
+ int i)
+{
+ struct scom_debug_entry *ent;
+ struct dentry *dir;
+
+ ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+ if (!ent)
+ return -ENOMEM;
+
+ ent->dn = of_node_get(dn);
+ ent->map = SCOM_MAP_INVALID;
+ spin_lock_init(&ent->lock);
+ snprintf(ent->name, 8, "scom%d", i);
+ ent->blob.data = dn->full_name;
+ ent->blob.size = strlen(dn->full_name);
+
+ dir = debugfs_create_dir(ent->name, root);
+ if (!dir) {
+ of_node_put(dn);
+ kfree(ent);
+ return -1;
+ }
+
+ debugfs_create_file("addr", 0600, dir, ent, &scom_addr_fops);
+ debugfs_create_file("value", 0600, dir, ent, &scom_val_fops);
+ debugfs_create_blob("path", 0400, dir, &ent->blob);
+
+ return 0;
+}
+
+static int scom_debug_init(void)
+{
+ struct device_node *dn;
+ struct dentry *root;
+ int i, rc;
+
+ root = debugfs_create_dir("scom", powerpc_debugfs_root);
+ if (!root)
+ return -1;
+
+ i = rc = 0;
+ for_each_node_with_property(dn, "scom-controller")
+ rc |= scom_debug_init_one(root, dn, i++);
+
+ return rc;
+}
+device_initcall(scom_debug_init);
+#endif /* CONFIG_SCOM_DEBUGFS */
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c
index 5d913851662..984cd202915 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/sysdev/uic.c
@@ -41,8 +41,6 @@
#define UIC_VR 0x7
#define UIC_VCR 0x8
-#define uic_irq_to_hw(virq) (irq_map[virq].hwirq)
-
struct uic *primary_uic;
struct uic {
@@ -58,7 +56,7 @@ struct uic {
static void uic_unmask_irq(struct irq_data *d)
{
struct uic *uic = irq_data_get_irq_chip_data(d);
- unsigned int src = uic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned long flags;
u32 er, sr;
@@ -76,7 +74,7 @@ static void uic_unmask_irq(struct irq_data *d)
static void uic_mask_irq(struct irq_data *d)
{
struct uic *uic = irq_data_get_irq_chip_data(d);
- unsigned int src = uic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned long flags;
u32 er;
@@ -90,7 +88,7 @@ static void uic_mask_irq(struct irq_data *d)
static void uic_ack_irq(struct irq_data *d)
{
struct uic *uic = irq_data_get_irq_chip_data(d);
- unsigned int src = uic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned long flags;
spin_lock_irqsave(&uic->lock, flags);
@@ -101,7 +99,7 @@ static void uic_ack_irq(struct irq_data *d)
static void uic_mask_ack_irq(struct irq_data *d)
{
struct uic *uic = irq_data_get_irq_chip_data(d);
- unsigned int src = uic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned long flags;
u32 er, sr;
@@ -126,7 +124,7 @@ static void uic_mask_ack_irq(struct irq_data *d)
static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type)
{
struct uic *uic = irq_data_get_irq_chip_data(d);
- unsigned int src = uic_irq_to_hw(d->irq);
+ unsigned int src = irqd_to_hwirq(d);
unsigned long flags;
int trigger, polarity;
u32 tr, pr, mask;
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig
new file mode 100644
index 00000000000..0031eda320c
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -0,0 +1,13 @@
+config PPC_XICS
+ def_bool n
+ select PPC_SMP_MUXED_IPI
+
+config PPC_ICP_NATIVE
+ def_bool n
+
+config PPC_ICP_HV
+ def_bool n
+
+config PPC_ICS_RTAS
+ def_bool n
+
diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile
new file mode 100644
index 00000000000..b75a6059337
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Makefile
@@ -0,0 +1,6 @@
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+obj-y += xics-common.o
+obj-$(CONFIG_PPC_ICP_NATIVE) += icp-native.o
+obj-$(CONFIG_PPC_ICP_HV) += icp-hv.o
+obj-$(CONFIG_PPC_ICS_RTAS) += ics-rtas.o
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
new file mode 100644
index 00000000000..9518d367a64
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/io.h>
+#include <asm/hvcall.h>
+
+static inline unsigned int icp_hv_get_xirr(unsigned char cppr)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ rc = plpar_hcall(H_XIRR, retbuf, cppr);
+ if (rc != H_SUCCESS)
+ panic(" bad return code xirr - rc = %lx\n", rc);
+ return (unsigned int)retbuf[0];
+}
+
+static inline void icp_hv_set_xirr(unsigned int value)
+{
+ long rc = plpar_hcall_norets(H_EOI, value);
+ if (rc != H_SUCCESS)
+ panic("bad return code EOI - rc = %ld, value=%x\n", rc, value);
+}
+
+static inline void icp_hv_set_cppr(u8 value)
+{
+ long rc = plpar_hcall_norets(H_CPPR, value);
+ if (rc != H_SUCCESS)
+ panic("bad return code cppr - rc = %lx\n", rc);
+}
+
+static inline void icp_hv_set_qirr(int n_cpu , u8 value)
+{
+ long rc = plpar_hcall_norets(H_IPI, get_hard_smp_processor_id(n_cpu),
+ value);
+ if (rc != H_SUCCESS)
+ panic("bad return code qirr - rc = %lx\n", rc);
+}
+
+static void icp_hv_eoi(struct irq_data *d)
+{
+ unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+ iosync();
+ icp_hv_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_hv_teardown_cpu(void)
+{
+ int cpu = smp_processor_id();
+
+ /* Clear any pending IPI */
+ icp_hv_set_qirr(cpu, 0xff);
+}
+
+static void icp_hv_flush_ipi(void)
+{
+ /* We take the ipi irq but and never return so we
+ * need to EOI the IPI, but want to leave our priority 0
+ *
+ * should we check all the other interrupts too?
+ * should we be flagging idle loop instead?
+ * or creating some task to be scheduled?
+ */
+
+ icp_hv_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_hv_get_irq(void)
+{
+ unsigned int xirr = icp_hv_get_xirr(xics_cppr_top());
+ unsigned int vec = xirr & 0x00ffffff;
+ unsigned int irq;
+
+ if (vec == XICS_IRQ_SPURIOUS)
+ return NO_IRQ;
+
+ irq = irq_radix_revmap_lookup(xics_host, vec);
+ if (likely(irq != NO_IRQ)) {
+ xics_push_cppr(vec);
+ return irq;
+ }
+
+ /* We don't have a linux mapping, so have rtas mask it. */
+ xics_mask_unknown_vec(vec);
+
+ /* We might learn about it later, so EOI it */
+ icp_hv_set_xirr(xirr);
+
+ return NO_IRQ;
+}
+
+static void icp_hv_set_cpu_priority(unsigned char cppr)
+{
+ xics_set_base_cppr(cppr);
+ icp_hv_set_cppr(cppr);
+ iosync();
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_hv_cause_ipi(int cpu, unsigned long data)
+{
+ icp_hv_set_qirr(cpu, IPI_PRIORITY);
+}
+
+static irqreturn_t icp_hv_ipi_action(int irq, void *dev_id)
+{
+ int cpu = smp_processor_id();
+
+ icp_hv_set_qirr(cpu, 0xff);
+
+ return smp_ipi_demux();
+}
+
+#endif /* CONFIG_SMP */
+
+static const struct icp_ops icp_hv_ops = {
+ .get_irq = icp_hv_get_irq,
+ .eoi = icp_hv_eoi,
+ .set_priority = icp_hv_set_cpu_priority,
+ .teardown_cpu = icp_hv_teardown_cpu,
+ .flush_ipi = icp_hv_flush_ipi,
+#ifdef CONFIG_SMP
+ .ipi_action = icp_hv_ipi_action,
+ .cause_ipi = icp_hv_cause_ipi,
+#endif
+};
+
+int icp_hv_init(void)
+{
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xicp");
+ if (!np)
+ np = of_find_node_by_type(NULL,
+ "PowerPC-External-Interrupt-Presentation");
+ if (!np)
+ return -ENODEV;
+
+ icp_ops = &icp_hv_ops;
+
+ return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
new file mode 100644
index 00000000000..1f15ad43614
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+
+struct icp_ipl {
+ union {
+ u32 word;
+ u8 bytes[4];
+ } xirr_poll;
+ union {
+ u32 word;
+ u8 bytes[4];
+ } xirr;
+ u32 dummy;
+ union {
+ u32 word;
+ u8 bytes[4];
+ } qirr;
+ u32 link_a;
+ u32 link_b;
+ u32 link_c;
+};
+
+static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
+
+static inline unsigned int icp_native_get_xirr(void)
+{
+ int cpu = smp_processor_id();
+
+ return in_be32(&icp_native_regs[cpu]->xirr.word);
+}
+
+static inline void icp_native_set_xirr(unsigned int value)
+{
+ int cpu = smp_processor_id();
+
+ out_be32(&icp_native_regs[cpu]->xirr.word, value);
+}
+
+static inline void icp_native_set_cppr(u8 value)
+{
+ int cpu = smp_processor_id();
+
+ out_8(&icp_native_regs[cpu]->xirr.bytes[0], value);
+}
+
+static inline void icp_native_set_qirr(int n_cpu, u8 value)
+{
+ out_8(&icp_native_regs[n_cpu]->qirr.bytes[0], value);
+}
+
+static void icp_native_set_cpu_priority(unsigned char cppr)
+{
+ xics_set_base_cppr(cppr);
+ icp_native_set_cppr(cppr);
+ iosync();
+}
+
+static void icp_native_eoi(struct irq_data *d)
+{
+ unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+ iosync();
+ icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_native_teardown_cpu(void)
+{
+ int cpu = smp_processor_id();
+
+ /* Clear any pending IPI */
+ icp_native_set_qirr(cpu, 0xff);
+}
+
+static void icp_native_flush_ipi(void)
+{
+ /* We take the ipi irq but and never return so we
+ * need to EOI the IPI, but want to leave our priority 0
+ *
+ * should we check all the other interrupts too?
+ * should we be flagging idle loop instead?
+ * or creating some task to be scheduled?
+ */
+
+ icp_native_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_native_get_irq(void)
+{
+ unsigned int xirr = icp_native_get_xirr();
+ unsigned int vec = xirr & 0x00ffffff;
+ unsigned int irq;
+
+ if (vec == XICS_IRQ_SPURIOUS)
+ return NO_IRQ;
+
+ irq = irq_radix_revmap_lookup(xics_host, vec);
+ if (likely(irq != NO_IRQ)) {
+ xics_push_cppr(vec);
+ return irq;
+ }
+
+ /* We don't have a linux mapping, so have rtas mask it. */
+ xics_mask_unknown_vec(vec);
+
+ /* We might learn about it later, so EOI it */
+ icp_native_set_xirr(xirr);
+
+ return NO_IRQ;
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_native_cause_ipi(int cpu, unsigned long data)
+{
+ icp_native_set_qirr(cpu, IPI_PRIORITY);
+}
+
+static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
+{
+ int cpu = smp_processor_id();
+
+ icp_native_set_qirr(cpu, 0xff);
+
+ return smp_ipi_demux();
+}
+
+#endif /* CONFIG_SMP */
+
+static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr,
+ unsigned long size)
+{
+ char *rname;
+ int i, cpu = -1;
+
+ /* This may look gross but it's good enough for now, we don't quite
+ * have a hard -> linux processor id matching.
+ */
+ for_each_possible_cpu(i) {
+ if (!cpu_present(i))
+ continue;
+ if (hw_id == get_hard_smp_processor_id(i)) {
+ cpu = i;
+ break;
+ }
+ }
+
+ /* Fail, skip that CPU. Don't print, it's normal, some XICS come up
+ * with way more entries in there than you have CPUs
+ */
+ if (cpu == -1)
+ return 0;
+
+ rname = kasprintf(GFP_KERNEL, "CPU %d [0x%x] Interrupt Presentation",
+ cpu, hw_id);
+
+ if (!request_mem_region(addr, size, rname)) {
+ pr_warning("icp_native: Could not reserve ICP MMIO"
+ " for CPU %d, interrupt server #0x%x\n",
+ cpu, hw_id);
+ return -EBUSY;
+ }
+
+ icp_native_regs[cpu] = ioremap(addr, size);
+ if (!icp_native_regs[cpu]) {
+ pr_warning("icp_native: Failed ioremap for CPU %d, "
+ "interrupt server #0x%x, addr %#lx\n",
+ cpu, hw_id, addr);
+ release_mem_region(addr, size);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static int __init icp_native_init_one_node(struct device_node *np,
+ unsigned int *indx)
+{
+ unsigned int ilen;
+ const u32 *ireg;
+ int i;
+ int reg_tuple_size;
+ int num_servers = 0;
+
+ /* This code does the theorically broken assumption that the interrupt
+ * server numbers are the same as the hard CPU numbers.
+ * This happens to be the case so far but we are playing with fire...
+ * should be fixed one of these days. -BenH.
+ */
+ ireg = of_get_property(np, "ibm,interrupt-server-ranges", &ilen);
+
+ /* Do that ever happen ? we'll know soon enough... but even good'old
+ * f80 does have that property ..
+ */
+ WARN_ON((ireg == NULL) || (ilen != 2*sizeof(u32)));
+
+ if (ireg) {
+ *indx = of_read_number(ireg, 1);
+ if (ilen >= 2*sizeof(u32))
+ num_servers = of_read_number(ireg + 1, 1);
+ }
+
+ ireg = of_get_property(np, "reg", &ilen);
+ if (!ireg) {
+ pr_err("icp_native: Can't find interrupt reg property");
+ return -1;
+ }
+
+ reg_tuple_size = (of_n_addr_cells(np) + of_n_size_cells(np)) * 4;
+ if (((ilen % reg_tuple_size) != 0)
+ || (num_servers && (num_servers != (ilen / reg_tuple_size)))) {
+ pr_err("icp_native: ICP reg len (%d) != num servers (%d)",
+ ilen / reg_tuple_size, num_servers);
+ return -1;
+ }
+
+ for (i = 0; i < (ilen / reg_tuple_size); i++) {
+ struct resource r;
+ int err;
+
+ err = of_address_to_resource(np, i, &r);
+ if (err) {
+ pr_err("icp_native: Could not translate ICP MMIO"
+ " for interrupt server 0x%x (%d)\n", *indx, err);
+ return -1;
+ }
+
+ if (icp_native_map_one_cpu(*indx, r.start, r.end - r.start))
+ return -1;
+
+ (*indx)++;
+ }
+ return 0;
+}
+
+static const struct icp_ops icp_native_ops = {
+ .get_irq = icp_native_get_irq,
+ .eoi = icp_native_eoi,
+ .set_priority = icp_native_set_cpu_priority,
+ .teardown_cpu = icp_native_teardown_cpu,
+ .flush_ipi = icp_native_flush_ipi,
+#ifdef CONFIG_SMP
+ .ipi_action = icp_native_ipi_action,
+ .cause_ipi = icp_native_cause_ipi,
+#endif
+};
+
+int icp_native_init(void)
+{
+ struct device_node *np;
+ u32 indx = 0;
+ int found = 0;
+
+ for_each_compatible_node(np, NULL, "ibm,ppc-xicp")
+ if (icp_native_init_one_node(np, &indx) == 0)
+ found = 1;
+ if (!found) {
+ for_each_node_by_type(np,
+ "PowerPC-External-Interrupt-Presentation") {
+ if (icp_native_init_one_node(np, &indx) == 0)
+ found = 1;
+ }
+ }
+
+ if (found == 0)
+ return -ENODEV;
+
+ icp_ops = &icp_native_ops;
+
+ return 0;
+}
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
new file mode 100644
index 00000000000..c782f85cf7e
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -0,0 +1,240 @@
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/rtas.h>
+
+/* RTAS service tokens */
+static int ibm_get_xive;
+static int ibm_set_xive;
+static int ibm_int_on;
+static int ibm_int_off;
+
+static int ics_rtas_map(struct ics *ics, unsigned int virq);
+static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec);
+static long ics_rtas_get_server(struct ics *ics, unsigned long vec);
+static int ics_rtas_host_match(struct ics *ics, struct device_node *node);
+
+/* Only one global & state struct ics */
+static struct ics ics_rtas = {
+ .map = ics_rtas_map,
+ .mask_unknown = ics_rtas_mask_unknown,
+ .get_server = ics_rtas_get_server,
+ .host_match = ics_rtas_host_match,
+};
+
+static void ics_rtas_unmask_irq(struct irq_data *d)
+{
+ unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+ int call_status;
+ int server;
+
+ pr_devel("xics: unmask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+ if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+ return;
+
+ server = xics_get_irq_server(d->irq, d->affinity, 0);
+
+ call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server,
+ DEFAULT_PRIORITY);
+ if (call_status != 0) {
+ printk(KERN_ERR
+ "%s: ibm_set_xive irq %u server %x returned %d\n",
+ __func__, hw_irq, server, call_status);
+ return;
+ }
+
+ /* Now unmask the interrupt (often a no-op) */
+ call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq);
+ if (call_status != 0) {
+ printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
+ __func__, hw_irq, call_status);
+ return;
+ }
+}
+
+static unsigned int ics_rtas_startup(struct irq_data *d)
+{
+#ifdef CONFIG_PCI_MSI
+ /*
+ * The generic MSI code returns with the interrupt disabled on the
+ * card, using the MSI mask bits. Firmware doesn't appear to unmask
+ * at that level, so we do it here by hand.
+ */
+ if (d->msi_desc)
+ unmask_msi_irq(d);
+#endif
+ /* unmask it */
+ ics_rtas_unmask_irq(d);
+ return 0;
+}
+
+static void ics_rtas_mask_real_irq(unsigned int hw_irq)
+{
+ int call_status;
+
+ if (hw_irq == XICS_IPI)
+ return;
+
+ call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq);
+ if (call_status != 0) {
+ printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
+ __func__, hw_irq, call_status);
+ return;
+ }
+
+ /* Have to set XIVE to 0xff to be able to remove a slot */
+ call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq,
+ xics_default_server, 0xff);
+ if (call_status != 0) {
+ printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
+ __func__, hw_irq, call_status);
+ return;
+ }
+}
+
+static void ics_rtas_mask_irq(struct irq_data *d)
+{
+ unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+ pr_devel("xics: mask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+ if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+ return;
+ ics_rtas_mask_real_irq(hw_irq);
+}
+
+static int ics_rtas_set_affinity(struct irq_data *d,
+ const struct cpumask *cpumask,
+ bool force)
+{
+ unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+ int status;
+ int xics_status[2];
+ int irq_server;
+
+ if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+ return -1;
+
+ status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq);
+
+ if (status) {
+ printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
+ __func__, hw_irq, status);
+ return -1;
+ }
+
+ irq_server = xics_get_irq_server(d->irq, cpumask, 1);
+ if (irq_server == -1) {
+ char cpulist[128];
+ cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
+ printk(KERN_WARNING
+ "%s: No online cpus in the mask %s for irq %d\n",
+ __func__, cpulist, d->irq);
+ return -1;
+ }
+
+ status = rtas_call(ibm_set_xive, 3, 1, NULL,
+ hw_irq, irq_server, xics_status[1]);
+
+ if (status) {
+ printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
+ __func__, hw_irq, status);
+ return -1;
+ }
+
+ return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_rtas_irq_chip = {
+ .name = "XICS",
+ .irq_startup = ics_rtas_startup,
+ .irq_mask = ics_rtas_mask_irq,
+ .irq_unmask = ics_rtas_unmask_irq,
+ .irq_eoi = NULL, /* Patched at init time */
+ .irq_set_affinity = ics_rtas_set_affinity
+};
+
+static int ics_rtas_map(struct ics *ics, unsigned int virq)
+{
+ unsigned int hw_irq = (unsigned int)virq_to_hw(virq);
+ int status[2];
+ int rc;
+
+ if (WARN_ON(hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS))
+ return -EINVAL;
+
+ /* Check if RTAS knows about this interrupt */
+ rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq);
+ if (rc)
+ return -ENXIO;
+
+ irq_set_chip_and_handler(virq, &ics_rtas_irq_chip, handle_fasteoi_irq);
+ irq_set_chip_data(virq, &ics_rtas);
+
+ return 0;
+}
+
+static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec)
+{
+ ics_rtas_mask_real_irq(vec);
+}
+
+static long ics_rtas_get_server(struct ics *ics, unsigned long vec)
+{
+ int rc, status[2];
+
+ rc = rtas_call(ibm_get_xive, 1, 3, status, vec);
+ if (rc)
+ return -1;
+ return status[0];
+}
+
+static int ics_rtas_host_match(struct ics *ics, struct device_node *node)
+{
+ /* IBM machines have interrupt parents of various funky types for things
+ * like vdevices, events, etc... The trick we use here is to match
+ * everything here except the legacy 8259 which is compatible "chrp,iic"
+ */
+ return !of_device_is_compatible(node, "chrp,iic");
+}
+
+int ics_rtas_init(void)
+{
+ ibm_get_xive = rtas_token("ibm,get-xive");
+ ibm_set_xive = rtas_token("ibm,set-xive");
+ ibm_int_on = rtas_token("ibm,int-on");
+ ibm_int_off = rtas_token("ibm,int-off");
+
+ /* We enable the RTAS "ICS" if RTAS is present with the
+ * appropriate tokens
+ */
+ if (ibm_get_xive == RTAS_UNKNOWN_SERVICE ||
+ ibm_set_xive == RTAS_UNKNOWN_SERVICE)
+ return -ENODEV;
+
+ /* We need to patch our irq chip's EOI to point to the
+ * right ICP
+ */
+ ics_rtas_irq_chip.irq_eoi = icp_ops->eoi;
+
+ /* Register ourselves */
+ xics_register_ics(&ics_rtas);
+
+ return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
new file mode 100644
index 00000000000..445c5a01b76
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/debugfs.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/rtas.h>
+#include <asm/xics.h>
+#include <asm/firmware.h>
+
+/* Globals common to all ICP/ICS implementations */
+const struct icp_ops *icp_ops;
+
+unsigned int xics_default_server = 0xff;
+unsigned int xics_default_distrib_server = 0;
+unsigned int xics_interrupt_server_size = 8;
+
+DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
+
+struct irq_host *xics_host;
+
+static LIST_HEAD(ics_list);
+
+void xics_update_irq_servers(void)
+{
+ int i, j;
+ struct device_node *np;
+ u32 ilen;
+ const u32 *ireg;
+ u32 hcpuid;
+
+ /* Find the server numbers for the boot cpu. */
+ np = of_get_cpu_node(boot_cpuid, NULL);
+ BUG_ON(!np);
+
+ hcpuid = get_hard_smp_processor_id(boot_cpuid);
+ xics_default_server = xics_default_distrib_server = hcpuid;
+
+ pr_devel("xics: xics_default_server = 0x%x\n", xics_default_server);
+
+ ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
+ if (!ireg) {
+ of_node_put(np);
+ return;
+ }
+
+ i = ilen / sizeof(int);
+
+ /* Global interrupt distribution server is specified in the last
+ * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last
+ * entry fom this property for current boot cpu id and use it as
+ * default distribution server
+ */
+ for (j = 0; j < i; j += 2) {
+ if (ireg[j] == hcpuid) {
+ xics_default_distrib_server = ireg[j+1];
+ break;
+ }
+ }
+ pr_devel("xics: xics_default_distrib_server = 0x%x\n",
+ xics_default_distrib_server);
+ of_node_put(np);
+}
+
+/* GIQ stuff, currently only supported on RTAS setups, will have
+ * to be sorted properly for bare metal
+ */
+void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
+{
+#ifdef CONFIG_PPC_RTAS
+ int index;
+ int status;
+
+ if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
+ return;
+
+ index = (1UL << xics_interrupt_server_size) - 1 - gserver;
+
+ status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
+
+ WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
+ GLOBAL_INTERRUPT_QUEUE, index, join, status);
+#endif
+}
+
+void xics_setup_cpu(void)
+{
+ icp_ops->set_priority(LOWEST_PRIORITY);
+
+ xics_set_cpu_giq(xics_default_distrib_server, 1);
+}
+
+void xics_mask_unknown_vec(unsigned int vec)
+{
+ struct ics *ics;
+
+ pr_err("Interrupt 0x%x (real) is invalid, disabling it.\n", vec);
+
+ list_for_each_entry(ics, &ics_list, link)
+ ics->mask_unknown(ics, vec);
+}
+
+
+#ifdef CONFIG_SMP
+
+static void xics_request_ipi(void)
+{
+ unsigned int ipi;
+
+ ipi = irq_create_mapping(xics_host, XICS_IPI);
+ BUG_ON(ipi == NO_IRQ);
+
+ /*
+ * IPIs are marked IRQF_DISABLED as they must run with irqs
+ * disabled, and PERCPU. The handler was set in map.
+ */
+ BUG_ON(request_irq(ipi, icp_ops->ipi_action,
+ IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL));
+}
+
+int __init xics_smp_probe(void)
+{
+ /* Setup cause_ipi callback based on which ICP is used */
+ smp_ops->cause_ipi = icp_ops->cause_ipi;
+
+ /* Register all the IPIs */
+ xics_request_ipi();
+
+ return cpumask_weight(cpu_possible_mask);
+}
+
+#endif /* CONFIG_SMP */
+
+void xics_teardown_cpu(void)
+{
+ struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+ /*
+ * we have to reset the cppr index to 0 because we're
+ * not going to return from the IPI
+ */
+ os_cppr->index = 0;
+ icp_ops->set_priority(0);
+ icp_ops->teardown_cpu();
+}
+
+void xics_kexec_teardown_cpu(int secondary)
+{
+ xics_teardown_cpu();
+
+ icp_ops->flush_ipi();
+
+ /*
+ * Some machines need to have at least one cpu in the GIQ,
+ * so leave the master cpu in the group.
+ */
+ if (secondary)
+ xics_set_cpu_giq(xics_default_distrib_server, 0);
+}
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* Interrupts are disabled. */
+void xics_migrate_irqs_away(void)
+{
+ int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
+ unsigned int irq, virq;
+
+ /* If we used to be the default server, move to the new "boot_cpuid" */
+ if (hw_cpu == xics_default_server)
+ xics_update_irq_servers();
+
+ /* Reject any interrupt that was queued to us... */
+ icp_ops->set_priority(0);
+
+ /* Remove ourselves from the global interrupt queue */
+ xics_set_cpu_giq(xics_default_distrib_server, 0);
+
+ /* Allow IPIs again... */
+ icp_ops->set_priority(DEFAULT_PRIORITY);
+
+ for_each_irq(virq) {
+ struct irq_desc *desc;
+ struct irq_chip *chip;
+ long server;
+ unsigned long flags;
+ struct ics *ics;
+
+ /* We can't set affinity on ISA interrupts */
+ if (virq < NUM_ISA_INTERRUPTS)
+ continue;
+ if (!virq_is_host(virq, xics_host))
+ continue;
+ irq = (unsigned int)virq_to_hw(virq);
+ /* We need to get IPIs still. */
+ if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+ continue;
+ desc = irq_to_desc(virq);
+ /* We only need to migrate enabled IRQS */
+ if (!desc || !desc->action)
+ continue;
+ chip = irq_desc_get_chip(desc);
+ if (!chip || !chip->irq_set_affinity)
+ continue;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+
+ /* Locate interrupt server */
+ server = -1;
+ ics = irq_get_chip_data(virq);
+ if (ics)
+ server = ics->get_server(ics, irq);
+ if (server < 0) {
+ printk(KERN_ERR "%s: Can't find server for irq %d\n",
+ __func__, irq);
+ goto unlock;
+ }
+
+ /* We only support delivery to all cpus or to one cpu.
+ * The irq has to be migrated only in the single cpu
+ * case.
+ */
+ if (server != hw_cpu)
+ goto unlock;
+
+ /* This is expected during cpu offline. */
+ if (cpu_online(cpu))
+ pr_warning("IRQ %u affinity broken off cpu %u\n",
+ virq, cpu);
+
+ /* Reset affinity to all cpus */
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+ irq_set_affinity(virq, cpu_all_mask);
+ continue;
+unlock:
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+ }
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#ifdef CONFIG_SMP
+/*
+ * For the moment we only implement delivery to all cpus or one cpu.
+ *
+ * If the requested affinity is cpu_all_mask, we set global affinity.
+ * If not we set it to the first cpu in the mask, even if multiple cpus
+ * are set. This is so things like irqbalance (which set core and package
+ * wide affinities) do the right thing.
+ *
+ * We need to fix this to implement support for the links
+ */
+int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
+ unsigned int strict_check)
+{
+
+ if (!distribute_irqs)
+ return xics_default_server;
+
+ if (!cpumask_subset(cpu_possible_mask, cpumask)) {
+ int server = cpumask_first_and(cpu_online_mask, cpumask);
+
+ if (server < nr_cpu_ids)
+ return get_hard_smp_processor_id(server);
+
+ if (strict_check)
+ return -1;
+ }
+
+ /*
+ * Workaround issue with some versions of JS20 firmware that
+ * deliver interrupts to cpus which haven't been started. This
+ * happens when using the maxcpus= boot option.
+ */
+ if (cpumask_equal(cpu_online_mask, cpu_present_mask))
+ return xics_default_distrib_server;
+
+ return xics_default_server;
+}
+#endif /* CONFIG_SMP */
+
+static int xics_host_match(struct irq_host *h, struct device_node *node)
+{
+ struct ics *ics;
+
+ list_for_each_entry(ics, &ics_list, link)
+ if (ics->host_match(ics, node))
+ return 1;
+
+ return 0;
+}
+
+/* Dummies */
+static void xics_ipi_unmask(struct irq_data *d) { }
+static void xics_ipi_mask(struct irq_data *d) { }
+
+static struct irq_chip xics_ipi_chip = {
+ .name = "XICS",
+ .irq_eoi = NULL, /* Patched at init time */
+ .irq_mask = xics_ipi_mask,
+ .irq_unmask = xics_ipi_unmask,
+};
+
+static int xics_host_map(struct irq_host *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ struct ics *ics;
+
+ pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
+
+ /* Insert the interrupt mapping into the radix tree for fast lookup */
+ irq_radix_revmap_insert(xics_host, virq, hw);
+
+ /* They aren't all level sensitive but we just don't really know */
+ irq_set_status_flags(virq, IRQ_LEVEL);
+
+ /* Don't call into ICS for IPIs */
+ if (hw == XICS_IPI) {
+ irq_set_chip_and_handler(virq, &xics_ipi_chip,
+ handle_percpu_irq);
+ return 0;
+ }
+
+ /* Let the ICS setup the chip data */
+ list_for_each_entry(ics, &ics_list, link)
+ if (ics->map(ics, virq) == 0)
+ return 0;
+
+ return -EINVAL;
+}
+
+static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+ /* Current xics implementation translates everything
+ * to level. It is not technically right for MSIs but this
+ * is irrelevant at this point. We might get smarter in the future
+ */
+ *out_hwirq = intspec[0];
+ *out_flags = IRQ_TYPE_LEVEL_LOW;
+
+ return 0;
+}
+
+static struct irq_host_ops xics_host_ops = {
+ .match = xics_host_match,
+ .map = xics_host_map,
+ .xlate = xics_host_xlate,
+};
+
+static void __init xics_init_host(void)
+{
+ xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops,
+ XICS_IRQ_SPURIOUS);
+ BUG_ON(xics_host == NULL);
+ irq_set_default_host(xics_host);
+}
+
+void __init xics_register_ics(struct ics *ics)
+{
+ list_add(&ics->link, &ics_list);
+}
+
+static void __init xics_get_server_size(void)
+{
+ struct device_node *np;
+ const u32 *isize;
+
+ /* We fetch the interrupt server size from the first ICS node
+ * we find if any
+ */
+ np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xics");
+ if (!np)
+ return;
+ isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
+ if (!isize)
+ return;
+ xics_interrupt_server_size = *isize;
+ of_node_put(np);
+}
+
+void __init xics_init(void)
+{
+ int rc = -1;
+
+ /* Fist locate ICP */
+#ifdef CONFIG_PPC_ICP_HV
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ rc = icp_hv_init();
+#endif
+#ifdef CONFIG_PPC_ICP_NATIVE
+ if (rc < 0)
+ rc = icp_native_init();
+#endif
+ if (rc < 0) {
+ pr_warning("XICS: Cannot find a Presentation Controller !\n");
+ return;
+ }
+
+ /* Copy get_irq callback over to ppc_md */
+ ppc_md.get_irq = icp_ops->get_irq;
+
+ /* Patch up IPI chip EOI */
+ xics_ipi_chip.irq_eoi = icp_ops->eoi;
+
+ /* Now locate ICS */
+#ifdef CONFIG_PPC_ICS_RTAS
+ rc = ics_rtas_init();
+#endif
+ if (rc < 0)
+ pr_warning("XICS: Cannot find a Source Controller !\n");
+
+ /* Initialize common bits */
+ xics_get_server_size();
+ xics_update_irq_servers();
+ xics_init_host();
+ xics_setup_cpu();
+}
diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c
index 0a13fc19e28..6183799754a 100644
--- a/arch/powerpc/sysdev/xilinx_intc.c
+++ b/arch/powerpc/sysdev/xilinx_intc.c
@@ -71,7 +71,7 @@ static unsigned char xilinx_intc_map_senses[] = {
*/
static void xilinx_intc_mask(struct irq_data *d)
{
- int irq = virq_to_hw(d->irq);
+ int irq = irqd_to_hwirq(d);
void * regs = irq_data_get_irq_chip_data(d);
pr_debug("mask: %d\n", irq);
out_be32(regs + XINTC_CIE, 1 << irq);
@@ -87,7 +87,7 @@ static int xilinx_intc_set_type(struct irq_data *d, unsigned int flow_type)
*/
static void xilinx_intc_level_unmask(struct irq_data *d)
{
- int irq = virq_to_hw(d->irq);
+ int irq = irqd_to_hwirq(d);
void * regs = irq_data_get_irq_chip_data(d);
pr_debug("unmask: %d\n", irq);
out_be32(regs + XINTC_SIE, 1 << irq);
@@ -112,7 +112,7 @@ static struct irq_chip xilinx_intc_level_irqchip = {
*/
static void xilinx_intc_edge_unmask(struct irq_data *d)
{
- int irq = virq_to_hw(d->irq);
+ int irq = irqd_to_hwirq(d);
void *regs = irq_data_get_irq_chip_data(d);
pr_debug("unmask: %d\n", irq);
out_be32(regs + XINTC_SIE, 1 << irq);
@@ -120,7 +120,7 @@ static void xilinx_intc_edge_unmask(struct irq_data *d)
static void xilinx_intc_edge_ack(struct irq_data *d)
{
- int irq = virq_to_hw(d->irq);
+ int irq = irqd_to_hwirq(d);
void * regs = irq_data_get_irq_chip_data(d);
pr_debug("ack: %d\n", irq);
out_be32(regs + XINTC_IAR, 1 << irq);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 33794c1d92c..42541bbcc7f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -334,7 +334,7 @@ static void release_output_lock(void)
int cpus_are_in_xmon(void)
{
- return !cpus_empty(cpus_in_xmon);
+ return !cpumask_empty(&cpus_in_xmon);
}
#endif
@@ -373,7 +373,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
#ifdef CONFIG_SMP
cpu = smp_processor_id();
- if (cpu_isset(cpu, cpus_in_xmon)) {
+ if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
get_output_lock();
excprint(regs);
printf("cpu 0x%x: Exception %lx %s in xmon, "
@@ -396,10 +396,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
}
xmon_fault_jmp[cpu] = recurse_jmp;
- cpu_set(cpu, cpus_in_xmon);
+ cpumask_set_cpu(cpu, &cpus_in_xmon);
bp = NULL;
- if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF))
+ if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT))
bp = at_breakpoint(regs->nip);
if (bp || unrecoverable_excp(regs))
fromipi = 0;
@@ -437,10 +437,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
xmon_owner = cpu;
mb();
if (ncpus > 1) {
- smp_send_debugger_break(MSG_ALL_BUT_SELF);
+ smp_send_debugger_break();
/* wait for other cpus to come in */
for (timeout = 100000000; timeout != 0; --timeout) {
- if (cpus_weight(cpus_in_xmon) >= ncpus)
+ if (cpumask_weight(&cpus_in_xmon) >= ncpus)
break;
barrier();
}
@@ -484,7 +484,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
}
}
leave:
- cpu_clear(cpu, cpus_in_xmon);
+ cpumask_clear_cpu(cpu, &cpus_in_xmon);
xmon_fault_jmp[cpu] = NULL;
#else
/* UP is simple... */
@@ -529,7 +529,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
}
}
#else
- if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) {
+ if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) {
bp = at_breakpoint(regs->nip);
if (bp != NULL) {
int stepped = emulate_step(regs, bp->instr[0]);
@@ -578,7 +578,7 @@ static int xmon_bpt(struct pt_regs *regs)
struct bpt *bp;
unsigned long offset;
- if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF))
+ if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
return 0;
/* Are we at the trap at bp->instr[1] for some bp? */
@@ -609,7 +609,7 @@ static int xmon_sstep(struct pt_regs *regs)
static int xmon_dabr_match(struct pt_regs *regs)
{
- if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF))
+ if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
return 0;
if (dabr.enabled == 0)
return 0;
@@ -619,7 +619,7 @@ static int xmon_dabr_match(struct pt_regs *regs)
static int xmon_iabr_match(struct pt_regs *regs)
{
- if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF))
+ if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
return 0;
if (iabr == NULL)
return 0;
@@ -630,7 +630,7 @@ static int xmon_iabr_match(struct pt_regs *regs)
static int xmon_ipi(struct pt_regs *regs)
{
#ifdef CONFIG_SMP
- if (in_xmon && !cpu_isset(smp_processor_id(), cpus_in_xmon))
+ if (in_xmon && !cpumask_test_cpu(smp_processor_id(), &cpus_in_xmon))
xmon_core(regs, 1);
#endif
return 0;
@@ -644,7 +644,7 @@ static int xmon_fault_handler(struct pt_regs *regs)
if (in_xmon && catch_memory_errors)
handle_fault(regs); /* doesn't return */
- if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) {
+ if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) {
bp = in_breakpoint_table(regs->nip, &offset);
if (bp != NULL) {
regs->nip = bp->address + offset;
@@ -929,7 +929,7 @@ static int do_step(struct pt_regs *regs)
int stepped;
/* check we are in 64-bit kernel mode, translation enabled */
- if ((regs->msr & (MSR_SF|MSR_PR|MSR_IR)) == (MSR_SF|MSR_IR)) {
+ if ((regs->msr & (MSR_64BIT|MSR_PR|MSR_IR)) == (MSR_64BIT|MSR_IR)) {
if (mread(regs->nip, &instr, 4) == 4) {
stepped = emulate_step(regs, instr);
if (stepped < 0) {
@@ -976,7 +976,7 @@ static int cpu_cmd(void)
printf("cpus stopped:");
count = 0;
for (cpu = 0; cpu < NR_CPUS; ++cpu) {
- if (cpu_isset(cpu, cpus_in_xmon)) {
+ if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
if (count == 0)
printf(" %x", cpu);
++count;
@@ -992,7 +992,7 @@ static int cpu_cmd(void)
return 0;
}
/* try to switch to cpu specified */
- if (!cpu_isset(cpu, cpus_in_xmon)) {
+ if (!cpumask_test_cpu(cpu, &cpus_in_xmon)) {
printf("cpu 0x%x isn't in xmon\n", cpu);
return 0;
}
@@ -1497,6 +1497,10 @@ static void prregs(struct pt_regs *fp)
#endif
printf("pc = ");
xmon_print_symbol(fp->nip, " ", "\n");
+ if (TRAP(fp) != 0xc00 && cpu_has_feature(CPU_FTR_CFAR)) {
+ printf("cfar= ");
+ xmon_print_symbol(fp->orig_gpr3, " ", "\n");
+ }
printf("lr = ");
xmon_print_symbol(fp->link, " ", "\n");
printf("msr = "REG" cr = %.8lx\n", fp->msr, fp->ccr);
@@ -2663,7 +2667,7 @@ static void dump_stab(void)
void dump_segments(void)
{
- if (cpu_has_feature(CPU_FTR_SLB))
+ if (mmu_has_feature(MMU_FTR_SLB))
dump_slb();
else
dump_stab();
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 1cf81d77c5a..7f0b7cda625 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
obj-$(CONFIG_S390_PRNG) += prng.o
+obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 58f46734465..a9ce135893f 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -31,7 +31,8 @@
#define AES_KEYLEN_192 2
#define AES_KEYLEN_256 4
-static char keylen_flag = 0;
+static u8 *ctrblk;
+static char keylen_flag;
struct s390_aes_ctx {
u8 iv[AES_BLOCK_SIZE];
@@ -45,6 +46,24 @@ struct s390_aes_ctx {
} fallback;
};
+struct pcc_param {
+ u8 key[32];
+ u8 tweak[16];
+ u8 block[16];
+ u8 bit[16];
+ u8 xts[16];
+};
+
+struct s390_xts_ctx {
+ u8 key[32];
+ u8 xts_param[16];
+ struct pcc_param pcc;
+ long enc;
+ long dec;
+ int key_len;
+ struct crypto_blkcipher *fallback;
+};
+
/*
* Check if the key_len is supported by the HW.
* Returns 0 if it is, a positive number if it is not and software fallback is
@@ -504,15 +523,337 @@ static struct crypto_alg cbc_aes_alg = {
}
};
+static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int len)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ unsigned int ret;
+
+ xts_ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+ xts_ctx->fallback->base.crt_flags |= (tfm->crt_flags &
+ CRYPTO_TFM_REQ_MASK);
+
+ ret = crypto_blkcipher_setkey(xts_ctx->fallback, key, len);
+ if (ret) {
+ tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+ tfm->crt_flags |= (xts_ctx->fallback->base.crt_flags &
+ CRYPTO_TFM_RES_MASK);
+ }
+ return ret;
+}
+
+static int xts_fallback_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_blkcipher *tfm;
+ unsigned int ret;
+
+ tfm = desc->tfm;
+ desc->tfm = xts_ctx->fallback;
+
+ ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes);
+
+ desc->tfm = tfm;
+ return ret;
+}
+
+static int xts_fallback_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_blkcipher *tfm;
+ unsigned int ret;
+
+ tfm = desc->tfm;
+ desc->tfm = xts_ctx->fallback;
+
+ ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes);
+
+ desc->tfm = tfm;
+ return ret;
+}
+
+static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ u32 *flags = &tfm->crt_flags;
+
+ switch (key_len) {
+ case 32:
+ xts_ctx->enc = KM_XTS_128_ENCRYPT;
+ xts_ctx->dec = KM_XTS_128_DECRYPT;
+ memcpy(xts_ctx->key + 16, in_key, 16);
+ memcpy(xts_ctx->pcc.key + 16, in_key + 16, 16);
+ break;
+ case 48:
+ xts_ctx->enc = 0;
+ xts_ctx->dec = 0;
+ xts_fallback_setkey(tfm, in_key, key_len);
+ break;
+ case 64:
+ xts_ctx->enc = KM_XTS_256_ENCRYPT;
+ xts_ctx->dec = KM_XTS_256_DECRYPT;
+ memcpy(xts_ctx->key, in_key, 32);
+ memcpy(xts_ctx->pcc.key, in_key + 32, 32);
+ break;
+ default:
+ *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+ xts_ctx->key_len = key_len;
+ return 0;
+}
+
+static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
+ struct s390_xts_ctx *xts_ctx,
+ struct blkcipher_walk *walk)
+{
+ unsigned int offset = (xts_ctx->key_len >> 1) & 0x10;
+ int ret = blkcipher_walk_virt(desc, walk);
+ unsigned int nbytes = walk->nbytes;
+ unsigned int n;
+ u8 *in, *out;
+ void *param;
+
+ if (!nbytes)
+ goto out;
+
+ memset(xts_ctx->pcc.block, 0, sizeof(xts_ctx->pcc.block));
+ memset(xts_ctx->pcc.bit, 0, sizeof(xts_ctx->pcc.bit));
+ memset(xts_ctx->pcc.xts, 0, sizeof(xts_ctx->pcc.xts));
+ memcpy(xts_ctx->pcc.tweak, walk->iv, sizeof(xts_ctx->pcc.tweak));
+ param = xts_ctx->pcc.key + offset;
+ ret = crypt_s390_pcc(func, param);
+ BUG_ON(ret < 0);
+
+ memcpy(xts_ctx->xts_param, xts_ctx->pcc.xts, 16);
+ param = xts_ctx->key + offset;
+ do {
+ /* only use complete blocks */
+ n = nbytes & ~(AES_BLOCK_SIZE - 1);
+ out = walk->dst.virt.addr;
+ in = walk->src.virt.addr;
+
+ ret = crypt_s390_km(func, param, out, in, n);
+ BUG_ON(ret < 0 || ret != n);
+
+ nbytes &= AES_BLOCK_SIZE - 1;
+ ret = blkcipher_walk_done(desc, walk, nbytes);
+ } while ((nbytes = walk->nbytes));
+out:
+ return ret;
+}
+
+static int xts_aes_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ if (unlikely(xts_ctx->key_len == 48))
+ return xts_fallback_encrypt(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk);
+}
+
+static int xts_aes_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ if (unlikely(xts_ctx->key_len == 48))
+ return xts_fallback_decrypt(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk);
+}
+
+static int xts_fallback_init(struct crypto_tfm *tfm)
+{
+ const char *name = tfm->__crt_alg->cra_name;
+ struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+
+ xts_ctx->fallback = crypto_alloc_blkcipher(name, 0,
+ CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
+ if (IS_ERR(xts_ctx->fallback)) {
+ pr_err("Allocating XTS fallback algorithm %s failed\n",
+ name);
+ return PTR_ERR(xts_ctx->fallback);
+ }
+ return 0;
+}
+
+static void xts_fallback_exit(struct crypto_tfm *tfm)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+
+ crypto_free_blkcipher(xts_ctx->fallback);
+ xts_ctx->fallback = NULL;
+}
+
+static struct crypto_alg xts_aes_alg = {
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-aes-s390",
+ .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_xts_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(xts_aes_alg.cra_list),
+ .cra_init = xts_fallback_init,
+ .cra_exit = xts_fallback_exit,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_aes_set_key,
+ .encrypt = xts_aes_encrypt,
+ .decrypt = xts_aes_decrypt,
+ }
+ }
+};
+
+static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+ switch (key_len) {
+ case 16:
+ sctx->enc = KMCTR_AES_128_ENCRYPT;
+ sctx->dec = KMCTR_AES_128_DECRYPT;
+ break;
+ case 24:
+ sctx->enc = KMCTR_AES_192_ENCRYPT;
+ sctx->dec = KMCTR_AES_192_DECRYPT;
+ break;
+ case 32:
+ sctx->enc = KMCTR_AES_256_ENCRYPT;
+ sctx->dec = KMCTR_AES_256_DECRYPT;
+ break;
+ }
+
+ return aes_set_key(tfm, in_key, key_len);
+}
+
+static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
+ struct s390_aes_ctx *sctx, struct blkcipher_walk *walk)
+{
+ int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
+ unsigned int i, n, nbytes;
+ u8 buf[AES_BLOCK_SIZE];
+ u8 *out, *in;
+
+ if (!walk->nbytes)
+ return ret;
+
+ memcpy(ctrblk, walk->iv, AES_BLOCK_SIZE);
+ while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ out = walk->dst.virt.addr;
+ in = walk->src.virt.addr;
+ while (nbytes >= AES_BLOCK_SIZE) {
+ /* only use complete blocks, max. PAGE_SIZE */
+ n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
+ nbytes & ~(AES_BLOCK_SIZE - 1);
+ for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
+ memcpy(ctrblk + i, ctrblk + i - AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE);
+ crypto_inc(ctrblk + i, AES_BLOCK_SIZE);
+ }
+ ret = crypt_s390_kmctr(func, sctx->key, out, in, n, ctrblk);
+ BUG_ON(ret < 0 || ret != n);
+ if (n > AES_BLOCK_SIZE)
+ memcpy(ctrblk, ctrblk + n - AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE);
+ crypto_inc(ctrblk, AES_BLOCK_SIZE);
+ out += n;
+ in += n;
+ nbytes -= n;
+ }
+ ret = blkcipher_walk_done(desc, walk, nbytes);
+ }
+ /*
+ * final block may be < AES_BLOCK_SIZE, copy only nbytes
+ */
+ if (nbytes) {
+ out = walk->dst.virt.addr;
+ in = walk->src.virt.addr;
+ ret = crypt_s390_kmctr(func, sctx->key, buf, in,
+ AES_BLOCK_SIZE, ctrblk);
+ BUG_ON(ret < 0 || ret != AES_BLOCK_SIZE);
+ memcpy(out, buf, nbytes);
+ crypto_inc(ctrblk, AES_BLOCK_SIZE);
+ ret = blkcipher_walk_done(desc, walk, 0);
+ }
+ memcpy(walk->iv, ctrblk, AES_BLOCK_SIZE);
+ return ret;
+}
+
+static int ctr_aes_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_aes_crypt(desc, sctx->enc, sctx, &walk);
+}
+
+static int ctr_aes_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_aes_crypt(desc, sctx->dec, sctx, &walk);
+}
+
+static struct crypto_alg ctr_aes_alg = {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-s390",
+ .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ctr_aes_alg.cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ctr_aes_set_key,
+ .encrypt = ctr_aes_encrypt,
+ .decrypt = ctr_aes_decrypt,
+ }
+ }
+};
+
static int __init aes_s390_init(void)
{
int ret;
- if (crypt_s390_func_available(KM_AES_128_ENCRYPT))
+ if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA))
keylen_flag |= AES_KEYLEN_128;
- if (crypt_s390_func_available(KM_AES_192_ENCRYPT))
+ if (crypt_s390_func_available(KM_AES_192_ENCRYPT, CRYPT_S390_MSA))
keylen_flag |= AES_KEYLEN_192;
- if (crypt_s390_func_available(KM_AES_256_ENCRYPT))
+ if (crypt_s390_func_available(KM_AES_256_ENCRYPT, CRYPT_S390_MSA))
keylen_flag |= AES_KEYLEN_256;
if (!keylen_flag)
@@ -535,9 +876,40 @@ static int __init aes_s390_init(void)
if (ret)
goto cbc_aes_err;
+ if (crypt_s390_func_available(KM_XTS_128_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+ crypt_s390_func_available(KM_XTS_256_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+ ret = crypto_register_alg(&xts_aes_alg);
+ if (ret)
+ goto xts_aes_err;
+ }
+
+ if (crypt_s390_func_available(KMCTR_AES_128_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+ crypt_s390_func_available(KMCTR_AES_192_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+ crypt_s390_func_available(KMCTR_AES_256_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+ ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+ if (!ctrblk) {
+ ret = -ENOMEM;
+ goto ctr_aes_err;
+ }
+ ret = crypto_register_alg(&ctr_aes_alg);
+ if (ret) {
+ free_page((unsigned long) ctrblk);
+ goto ctr_aes_err;
+ }
+ }
+
out:
return ret;
+ctr_aes_err:
+ crypto_unregister_alg(&xts_aes_alg);
+xts_aes_err:
+ crypto_unregister_alg(&cbc_aes_alg);
cbc_aes_err:
crypto_unregister_alg(&ecb_aes_alg);
ecb_aes_err:
@@ -548,6 +920,9 @@ aes_err:
static void __exit aes_s390_fini(void)
{
+ crypto_unregister_alg(&ctr_aes_alg);
+ free_page((unsigned long) ctrblk);
+ crypto_unregister_alg(&xts_aes_alg);
crypto_unregister_alg(&cbc_aes_alg);
crypto_unregister_alg(&ecb_aes_alg);
crypto_unregister_alg(&aes_alg);
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index 7ee9a1b4ad9..49676771bd6 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -24,13 +24,18 @@
#define CRYPT_S390_PRIORITY 300
#define CRYPT_S390_COMPOSITE_PRIORITY 400
+#define CRYPT_S390_MSA 0x1
+#define CRYPT_S390_MSA3 0x2
+#define CRYPT_S390_MSA4 0x4
+
/* s390 cryptographic operations */
enum crypt_s390_operations {
CRYPT_S390_KM = 0x0100,
CRYPT_S390_KMC = 0x0200,
CRYPT_S390_KIMD = 0x0300,
CRYPT_S390_KLMD = 0x0400,
- CRYPT_S390_KMAC = 0x0500
+ CRYPT_S390_KMAC = 0x0500,
+ CRYPT_S390_KMCTR = 0x0600
};
/*
@@ -51,6 +56,10 @@ enum crypt_s390_km_func {
KM_AES_192_DECRYPT = CRYPT_S390_KM | 0x13 | 0x80,
KM_AES_256_ENCRYPT = CRYPT_S390_KM | 0x14,
KM_AES_256_DECRYPT = CRYPT_S390_KM | 0x14 | 0x80,
+ KM_XTS_128_ENCRYPT = CRYPT_S390_KM | 0x32,
+ KM_XTS_128_DECRYPT = CRYPT_S390_KM | 0x32 | 0x80,
+ KM_XTS_256_ENCRYPT = CRYPT_S390_KM | 0x34,
+ KM_XTS_256_DECRYPT = CRYPT_S390_KM | 0x34 | 0x80,
};
/*
@@ -75,6 +84,26 @@ enum crypt_s390_kmc_func {
};
/*
+ * function codes for KMCTR (CIPHER MESSAGE WITH COUNTER)
+ * instruction
+ */
+enum crypt_s390_kmctr_func {
+ KMCTR_QUERY = CRYPT_S390_KMCTR | 0x0,
+ KMCTR_DEA_ENCRYPT = CRYPT_S390_KMCTR | 0x1,
+ KMCTR_DEA_DECRYPT = CRYPT_S390_KMCTR | 0x1 | 0x80,
+ KMCTR_TDEA_128_ENCRYPT = CRYPT_S390_KMCTR | 0x2,
+ KMCTR_TDEA_128_DECRYPT = CRYPT_S390_KMCTR | 0x2 | 0x80,
+ KMCTR_TDEA_192_ENCRYPT = CRYPT_S390_KMCTR | 0x3,
+ KMCTR_TDEA_192_DECRYPT = CRYPT_S390_KMCTR | 0x3 | 0x80,
+ KMCTR_AES_128_ENCRYPT = CRYPT_S390_KMCTR | 0x12,
+ KMCTR_AES_128_DECRYPT = CRYPT_S390_KMCTR | 0x12 | 0x80,
+ KMCTR_AES_192_ENCRYPT = CRYPT_S390_KMCTR | 0x13,
+ KMCTR_AES_192_DECRYPT = CRYPT_S390_KMCTR | 0x13 | 0x80,
+ KMCTR_AES_256_ENCRYPT = CRYPT_S390_KMCTR | 0x14,
+ KMCTR_AES_256_DECRYPT = CRYPT_S390_KMCTR | 0x14 | 0x80,
+};
+
+/*
* function codes for KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
* instruction
*/
@@ -83,6 +112,7 @@ enum crypt_s390_kimd_func {
KIMD_SHA_1 = CRYPT_S390_KIMD | 1,
KIMD_SHA_256 = CRYPT_S390_KIMD | 2,
KIMD_SHA_512 = CRYPT_S390_KIMD | 3,
+ KIMD_GHASH = CRYPT_S390_KIMD | 65,
};
/*
@@ -284,6 +314,45 @@ static inline int crypt_s390_kmac(long func, void *param,
}
/**
+ * crypt_s390_kmctr:
+ * @func: the function code passed to KMCTR; see crypt_s390_kmctr_func
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ * @counter: address of counter value
+ *
+ * Executes the KMCTR (CIPHER MESSAGE WITH COUNTER) operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of processed
+ * bytes for encryption/decryption funcs
+ */
+static inline int crypt_s390_kmctr(long func, void *param, u8 *dest,
+ const u8 *src, long src_len, u8 *counter)
+{
+ register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
+ register void *__param asm("1") = param;
+ register const u8 *__src asm("2") = src;
+ register long __src_len asm("3") = src_len;
+ register u8 *__dest asm("4") = dest;
+ register u8 *__ctr asm("6") = counter;
+ int ret = -1;
+
+ asm volatile(
+ "0: .insn rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */
+ "1: brc 1,0b \n" /* handle partial completion */
+ " la %0,0\n"
+ "2:\n"
+ EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ : "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest),
+ "+a" (__ctr)
+ : "d" (__func), "a" (__param) : "cc", "memory");
+ if (ret < 0)
+ return ret;
+ return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
+}
+
+/**
* crypt_s390_func_available:
* @func: the function code of the specific function; 0 if op in general
*
@@ -291,13 +360,17 @@ static inline int crypt_s390_kmac(long func, void *param,
*
* Returns 1 if func available; 0 if func or op in general not available
*/
-static inline int crypt_s390_func_available(int func)
+static inline int crypt_s390_func_available(int func,
+ unsigned int facility_mask)
{
unsigned char status[16];
int ret;
- /* check if CPACF facility (bit 17) is available */
- if (!test_facility(17))
+ if (facility_mask & CRYPT_S390_MSA && !test_facility(17))
+ return 0;
+ if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76))
+ return 0;
+ if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77))
return 0;
switch (func & CRYPT_S390_OP_MASK) {
@@ -316,6 +389,10 @@ static inline int crypt_s390_func_available(int func)
case CRYPT_S390_KMAC:
ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0);
break;
+ case CRYPT_S390_KMCTR:
+ ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0,
+ NULL);
+ break;
default:
return 0;
}
@@ -326,4 +403,31 @@ static inline int crypt_s390_func_available(int func)
return (status[func >> 3] & (0x80 >> (func & 7))) != 0;
}
+/**
+ * crypt_s390_pcc:
+ * @func: the function code passed to KM; see crypt_s390_km_func
+ * @param: address of parameter block; see POP for details on each func
+ *
+ * Executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION) operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for success.
+ */
+static inline int crypt_s390_pcc(long func, void *param)
+{
+ register long __func asm("0") = func & 0x7f; /* encrypt or decrypt */
+ register void *__param asm("1") = param;
+ int ret = -1;
+
+ asm volatile(
+ "0: .insn rre,0xb92c0000,0,0 \n" /* PCC opcode */
+ "1: brc 1,0b \n" /* handle partial completion */
+ " la %0,0\n"
+ "2:\n"
+ EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ : "+d" (ret)
+ : "d" (__func), "a" (__param) : "cc", "memory");
+ return ret;
+}
+
+
#endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */
diff --git a/arch/s390/crypto/des_check_key.c b/arch/s390/crypto/des_check_key.c
deleted file mode 100644
index 5706af26644..00000000000
--- a/arch/s390/crypto/des_check_key.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Function for checking keys for the DES and Tripple DES Encryption
- * algorithms.
- *
- * Originally released as descore by Dana L. How <how@isl.stanford.edu>.
- * Modified by Raimar Falke <rf13@inf.tu-dresden.de> for the Linux-Kernel.
- * Derived from Cryptoapi and Nettle implementations, adapted for in-place
- * scatterlist interface. Changed LGPL to GPL per section 3 of the LGPL.
- *
- * s390 Version:
- * Copyright IBM Corp. 2003
- * Author(s): Thomas Spatzier
- * Jan Glauber (jan.glauber@de.ibm.com)
- *
- * Derived from "crypto/des.c"
- * Copyright (c) 1992 Dana L. How.
- * Copyright (c) Raimar Falke <rf13@inf.tu-dresden.de>
- * Copyright (c) Gisle Sflensminde <gisle@ii.uib.no>
- * Copyright (C) 2001 Niels Mvller.
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/crypto.h>
-#include "crypto_des.h"
-
-#define ROR(d,c,o) ((d) = (d) >> (c) | (d) << (o))
-
-static const u8 parity[] = {
- 8,1,0,8,0,8,8,0,0,8,8,0,8,0,2,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,3,
- 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,
- 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,
- 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,
- 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,
- 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,
- 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,
- 4,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,5,0,8,0,8,8,0,0,8,8,0,8,0,6,8,
-};
-
-/*
- * RFC2451: Weak key checks SHOULD be performed.
- */
-int
-crypto_des_check_key(const u8 *key, unsigned int keylen, u32 *flags)
-{
- u32 n, w;
-
- n = parity[key[0]]; n <<= 4;
- n |= parity[key[1]]; n <<= 4;
- n |= parity[key[2]]; n <<= 4;
- n |= parity[key[3]]; n <<= 4;
- n |= parity[key[4]]; n <<= 4;
- n |= parity[key[5]]; n <<= 4;
- n |= parity[key[6]]; n <<= 4;
- n |= parity[key[7]];
- w = 0x88888888L;
-
- if ((*flags & CRYPTO_TFM_REQ_WEAK_KEY)
- && !((n - (w >> 3)) & w)) { /* 1 in 10^10 keys passes this test */
- if (n < 0x41415151) {
- if (n < 0x31312121) {
- if (n < 0x14141515) {
- /* 01 01 01 01 01 01 01 01 */
- if (n == 0x11111111) goto weak;
- /* 01 1F 01 1F 01 0E 01 0E */
- if (n == 0x13131212) goto weak;
- } else {
- /* 01 E0 01 E0 01 F1 01 F1 */
- if (n == 0x14141515) goto weak;
- /* 01 FE 01 FE 01 FE 01 FE */
- if (n == 0x16161616) goto weak;
- }
- } else {
- if (n < 0x34342525) {
- /* 1F 01 1F 01 0E 01 0E 01 */
- if (n == 0x31312121) goto weak;
- /* 1F 1F 1F 1F 0E 0E 0E 0E (?) */
- if (n == 0x33332222) goto weak;
- } else {
- /* 1F E0 1F E0 0E F1 0E F1 */
- if (n == 0x34342525) goto weak;
- /* 1F FE 1F FE 0E FE 0E FE */
- if (n == 0x36362626) goto weak;
- }
- }
- } else {
- if (n < 0x61616161) {
- if (n < 0x44445555) {
- /* E0 01 E0 01 F1 01 F1 01 */
- if (n == 0x41415151) goto weak;
- /* E0 1F E0 1F F1 0E F1 0E */
- if (n == 0x43435252) goto weak;
- } else {
- /* E0 E0 E0 E0 F1 F1 F1 F1 (?) */
- if (n == 0x44445555) goto weak;
- /* E0 FE E0 FE F1 FE F1 FE */
- if (n == 0x46465656) goto weak;
- }
- } else {
- if (n < 0x64646565) {
- /* FE 01 FE 01 FE 01 FE 01 */
- if (n == 0x61616161) goto weak;
- /* FE 1F FE 1F FE 0E FE 0E */
- if (n == 0x63636262) goto weak;
- } else {
- /* FE E0 FE E0 FE F1 FE F1 */
- if (n == 0x64646565) goto weak;
- /* FE FE FE FE FE FE FE FE */
- if (n == 0x66666666) goto weak;
- }
- }
- }
- }
- return 0;
-weak:
- *flags |= CRYPTO_TFM_RES_WEAK_KEY;
- return -EINVAL;
-}
-
-EXPORT_SYMBOL(crypto_des_check_key);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Key Check function for DES & DES3 Cipher Algorithms");
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index cc542011839..a52bfd124d8 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -3,7 +3,7 @@
*
* s390 implementation of the DES Cipher Algorithm.
*
- * Copyright IBM Corp. 2003,2007
+ * Copyright IBM Corp. 2003,2011
* Author(s): Thomas Spatzier
* Jan Glauber (jan.glauber@de.ibm.com)
*
@@ -22,22 +22,19 @@
#include "crypt_s390.h"
-#define DES3_192_KEY_SIZE (3 * DES_KEY_SIZE)
+#define DES3_KEY_SIZE (3 * DES_KEY_SIZE)
-struct crypt_s390_des_ctx {
- u8 iv[DES_BLOCK_SIZE];
- u8 key[DES_KEY_SIZE];
-};
+static u8 *ctrblk;
-struct crypt_s390_des3_192_ctx {
+struct s390_des_ctx {
u8 iv[DES_BLOCK_SIZE];
- u8 key[DES3_192_KEY_SIZE];
+ u8 key[DES3_KEY_SIZE];
};
static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
+ unsigned int key_len)
{
- struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
u32 tmp[DES_EXPKEY_WORDS];
@@ -47,22 +44,22 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
return -EINVAL;
}
- memcpy(dctx->key, key, keylen);
+ memcpy(ctx->key, key, key_len);
return 0;
}
static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
- struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
- crypt_s390_km(KM_DEA_ENCRYPT, dctx->key, out, in, DES_BLOCK_SIZE);
+ crypt_s390_km(KM_DEA_ENCRYPT, ctx->key, out, in, DES_BLOCK_SIZE);
}
static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
- struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
- crypt_s390_km(KM_DEA_DECRYPT, dctx->key, out, in, DES_BLOCK_SIZE);
+ crypt_s390_km(KM_DEA_DECRYPT, ctx->key, out, in, DES_BLOCK_SIZE);
}
static struct crypto_alg des_alg = {
@@ -71,7 +68,7 @@ static struct crypto_alg des_alg = {
.cra_priority = CRYPT_S390_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des_ctx),
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(des_alg.cra_list),
.cra_u = {
@@ -86,7 +83,7 @@ static struct crypto_alg des_alg = {
};
static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
- void *param, struct blkcipher_walk *walk)
+ u8 *key, struct blkcipher_walk *walk)
{
int ret = blkcipher_walk_virt(desc, walk);
unsigned int nbytes;
@@ -97,7 +94,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
u8 *out = walk->dst.virt.addr;
u8 *in = walk->src.virt.addr;
- ret = crypt_s390_km(func, param, out, in, n);
+ ret = crypt_s390_km(func, key, out, in, n);
BUG_ON((ret < 0) || (ret != n));
nbytes &= DES_BLOCK_SIZE - 1;
@@ -108,7 +105,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
}
static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
- void *param, struct blkcipher_walk *walk)
+ u8 *iv, struct blkcipher_walk *walk)
{
int ret = blkcipher_walk_virt(desc, walk);
unsigned int nbytes = walk->nbytes;
@@ -116,20 +113,20 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
if (!nbytes)
goto out;
- memcpy(param, walk->iv, DES_BLOCK_SIZE);
+ memcpy(iv, walk->iv, DES_BLOCK_SIZE);
do {
/* only use complete blocks */
unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
u8 *out = walk->dst.virt.addr;
u8 *in = walk->src.virt.addr;
- ret = crypt_s390_kmc(func, param, out, in, n);
+ ret = crypt_s390_kmc(func, iv, out, in, n);
BUG_ON((ret < 0) || (ret != n));
nbytes &= DES_BLOCK_SIZE - 1;
ret = blkcipher_walk_done(desc, walk, nbytes);
} while ((nbytes = walk->nbytes));
- memcpy(walk->iv, param, DES_BLOCK_SIZE);
+ memcpy(walk->iv, iv, DES_BLOCK_SIZE);
out:
return ret;
@@ -139,22 +136,22 @@ static int ecb_des_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, sctx->key, &walk);
+ return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, ctx->key, &walk);
}
static int ecb_des_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_DEA_DECRYPT, sctx->key, &walk);
+ return ecb_desall_crypt(desc, KM_DEA_DECRYPT, ctx->key, &walk);
}
static struct crypto_alg ecb_des_alg = {
@@ -163,7 +160,7 @@ static struct crypto_alg ecb_des_alg = {
.cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des_ctx),
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(ecb_des_alg.cra_list),
@@ -182,22 +179,22 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, sctx->iv, &walk);
+ return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, ctx->iv, &walk);
}
static int cbc_des_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, sctx->iv, &walk);
+ return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, ctx->iv, &walk);
}
static struct crypto_alg cbc_des_alg = {
@@ -206,7 +203,7 @@ static struct crypto_alg cbc_des_alg = {
.cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des_ctx),
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(cbc_des_alg.cra_list),
@@ -235,10 +232,10 @@ static struct crypto_alg cbc_des_alg = {
* property.
*
*/
-static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
+static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int key_len)
{
- struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
@@ -248,141 +245,276 @@ static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
*flags |= CRYPTO_TFM_RES_WEAK_KEY;
return -EINVAL;
}
- memcpy(dctx->key, key, keylen);
+ memcpy(ctx->key, key, key_len);
return 0;
}
-static void des3_192_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
- struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
- crypt_s390_km(KM_TDEA_192_ENCRYPT, dctx->key, dst, (void*)src,
- DES_BLOCK_SIZE);
+ crypt_s390_km(KM_TDEA_192_ENCRYPT, ctx->key, dst, src, DES_BLOCK_SIZE);
}
-static void des3_192_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
- struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
- crypt_s390_km(KM_TDEA_192_DECRYPT, dctx->key, dst, (void*)src,
- DES_BLOCK_SIZE);
+ crypt_s390_km(KM_TDEA_192_DECRYPT, ctx->key, dst, src, DES_BLOCK_SIZE);
}
-static struct crypto_alg des3_192_alg = {
+static struct crypto_alg des3_alg = {
.cra_name = "des3_ede",
.cra_driver_name = "des3_ede-s390",
.cra_priority = CRYPT_S390_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx),
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(des3_192_alg.cra_list),
+ .cra_list = LIST_HEAD_INIT(des3_alg.cra_list),
.cra_u = {
.cipher = {
- .cia_min_keysize = DES3_192_KEY_SIZE,
- .cia_max_keysize = DES3_192_KEY_SIZE,
- .cia_setkey = des3_192_setkey,
- .cia_encrypt = des3_192_encrypt,
- .cia_decrypt = des3_192_decrypt,
+ .cia_min_keysize = DES3_KEY_SIZE,
+ .cia_max_keysize = DES3_KEY_SIZE,
+ .cia_setkey = des3_setkey,
+ .cia_encrypt = des3_encrypt,
+ .cia_decrypt = des3_decrypt,
}
}
};
-static int ecb_des3_192_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_des3_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
{
- struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, sctx->key, &walk);
+ return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, ctx->key, &walk);
}
-static int ecb_des3_192_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_des3_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
{
- struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, sctx->key, &walk);
+ return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, ctx->key, &walk);
}
-static struct crypto_alg ecb_des3_192_alg = {
+static struct crypto_alg ecb_des3_alg = {
.cra_name = "ecb(des3_ede)",
.cra_driver_name = "ecb-des3_ede-s390",
.cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx),
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(
- ecb_des3_192_alg.cra_list),
+ ecb_des3_alg.cra_list),
.cra_u = {
.blkcipher = {
- .min_keysize = DES3_192_KEY_SIZE,
- .max_keysize = DES3_192_KEY_SIZE,
- .setkey = des3_192_setkey,
- .encrypt = ecb_des3_192_encrypt,
- .decrypt = ecb_des3_192_decrypt,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .setkey = des3_setkey,
+ .encrypt = ecb_des3_encrypt,
+ .decrypt = ecb_des3_decrypt,
}
}
};
-static int cbc_des3_192_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_des3_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
{
- struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, sctx->iv, &walk);
+ return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, ctx->iv, &walk);
}
-static int cbc_des3_192_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_des3_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
{
- struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, sctx->iv, &walk);
+ return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, ctx->iv, &walk);
}
-static struct crypto_alg cbc_des3_192_alg = {
+static struct crypto_alg cbc_des3_alg = {
.cra_name = "cbc(des3_ede)",
.cra_driver_name = "cbc-des3_ede-s390",
.cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx),
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(
- cbc_des3_192_alg.cra_list),
+ cbc_des3_alg.cra_list),
.cra_u = {
.blkcipher = {
- .min_keysize = DES3_192_KEY_SIZE,
- .max_keysize = DES3_192_KEY_SIZE,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
.ivsize = DES_BLOCK_SIZE,
- .setkey = des3_192_setkey,
- .encrypt = cbc_des3_192_encrypt,
- .decrypt = cbc_des3_192_decrypt,
+ .setkey = des3_setkey,
+ .encrypt = cbc_des3_encrypt,
+ .decrypt = cbc_des3_decrypt,
}
}
};
-static int des_s390_init(void)
+static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
+ struct s390_des_ctx *ctx, struct blkcipher_walk *walk)
+{
+ int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
+ unsigned int i, n, nbytes;
+ u8 buf[DES_BLOCK_SIZE];
+ u8 *out, *in;
+
+ memcpy(ctrblk, walk->iv, DES_BLOCK_SIZE);
+ while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ out = walk->dst.virt.addr;
+ in = walk->src.virt.addr;
+ while (nbytes >= DES_BLOCK_SIZE) {
+ /* align to block size, max. PAGE_SIZE */
+ n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
+ nbytes & ~(DES_BLOCK_SIZE - 1);
+ for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
+ memcpy(ctrblk + i, ctrblk + i - DES_BLOCK_SIZE,
+ DES_BLOCK_SIZE);
+ crypto_inc(ctrblk + i, DES_BLOCK_SIZE);
+ }
+ ret = crypt_s390_kmctr(func, ctx->key, out, in, n, ctrblk);
+ BUG_ON((ret < 0) || (ret != n));
+ if (n > DES_BLOCK_SIZE)
+ memcpy(ctrblk, ctrblk + n - DES_BLOCK_SIZE,
+ DES_BLOCK_SIZE);
+ crypto_inc(ctrblk, DES_BLOCK_SIZE);
+ out += n;
+ in += n;
+ nbytes -= n;
+ }
+ ret = blkcipher_walk_done(desc, walk, nbytes);
+ }
+
+ /* final block may be < DES_BLOCK_SIZE, copy only nbytes */
+ if (nbytes) {
+ out = walk->dst.virt.addr;
+ in = walk->src.virt.addr;
+ ret = crypt_s390_kmctr(func, ctx->key, buf, in,
+ DES_BLOCK_SIZE, ctrblk);
+ BUG_ON(ret < 0 || ret != DES_BLOCK_SIZE);
+ memcpy(out, buf, nbytes);
+ crypto_inc(ctrblk, DES_BLOCK_SIZE);
+ ret = blkcipher_walk_done(desc, walk, 0);
+ }
+ memcpy(walk->iv, ctrblk, DES_BLOCK_SIZE);
+ return ret;
+}
+
+static int ctr_des_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_desall_crypt(desc, KMCTR_DEA_ENCRYPT, ctx, &walk);
+}
+
+static int ctr_des_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_desall_crypt(desc, KMCTR_DEA_DECRYPT, ctx, &walk);
+}
+
+static struct crypto_alg ctr_des_alg = {
+ .cra_name = "ctr(des)",
+ .cra_driver_name = "ctr-des-s390",
+ .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ctr_des_alg.cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des_setkey,
+ .encrypt = ctr_des_encrypt,
+ .decrypt = ctr_des_decrypt,
+ }
+ }
+};
+
+static int ctr_des3_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_desall_crypt(desc, KMCTR_TDEA_192_ENCRYPT, ctx, &walk);
+}
+
+static int ctr_des3_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_desall_crypt(desc, KMCTR_TDEA_192_DECRYPT, ctx, &walk);
+}
+
+static struct crypto_alg ctr_des3_alg = {
+ .cra_name = "ctr(des3_ede)",
+ .cra_driver_name = "ctr-des3_ede-s390",
+ .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ctr_des3_alg.cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des3_setkey,
+ .encrypt = ctr_des3_encrypt,
+ .decrypt = ctr_des3_decrypt,
+ }
+ }
+};
+
+static int __init des_s390_init(void)
{
int ret;
- if (!crypt_s390_func_available(KM_DEA_ENCRYPT) ||
- !crypt_s390_func_available(KM_TDEA_192_ENCRYPT))
+ if (!crypt_s390_func_available(KM_DEA_ENCRYPT, CRYPT_S390_MSA) ||
+ !crypt_s390_func_available(KM_TDEA_192_ENCRYPT, CRYPT_S390_MSA))
return -EOPNOTSUPP;
ret = crypto_register_alg(&des_alg);
@@ -394,23 +526,46 @@ static int des_s390_init(void)
ret = crypto_register_alg(&cbc_des_alg);
if (ret)
goto cbc_des_err;
- ret = crypto_register_alg(&des3_192_alg);
+ ret = crypto_register_alg(&des3_alg);
if (ret)
- goto des3_192_err;
- ret = crypto_register_alg(&ecb_des3_192_alg);
+ goto des3_err;
+ ret = crypto_register_alg(&ecb_des3_alg);
if (ret)
- goto ecb_des3_192_err;
- ret = crypto_register_alg(&cbc_des3_192_alg);
+ goto ecb_des3_err;
+ ret = crypto_register_alg(&cbc_des3_alg);
if (ret)
- goto cbc_des3_192_err;
+ goto cbc_des3_err;
+
+ if (crypt_s390_func_available(KMCTR_DEA_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+ crypt_s390_func_available(KMCTR_TDEA_192_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+ ret = crypto_register_alg(&ctr_des_alg);
+ if (ret)
+ goto ctr_des_err;
+ ret = crypto_register_alg(&ctr_des3_alg);
+ if (ret)
+ goto ctr_des3_err;
+ ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+ if (!ctrblk) {
+ ret = -ENOMEM;
+ goto ctr_mem_err;
+ }
+ }
out:
return ret;
-cbc_des3_192_err:
- crypto_unregister_alg(&ecb_des3_192_alg);
-ecb_des3_192_err:
- crypto_unregister_alg(&des3_192_alg);
-des3_192_err:
+ctr_mem_err:
+ crypto_unregister_alg(&ctr_des3_alg);
+ctr_des3_err:
+ crypto_unregister_alg(&ctr_des_alg);
+ctr_des_err:
+ crypto_unregister_alg(&cbc_des3_alg);
+cbc_des3_err:
+ crypto_unregister_alg(&ecb_des3_alg);
+ecb_des3_err:
+ crypto_unregister_alg(&des3_alg);
+des3_err:
crypto_unregister_alg(&cbc_des_alg);
cbc_des_err:
crypto_unregister_alg(&ecb_des_alg);
@@ -422,9 +577,14 @@ des_err:
static void __exit des_s390_exit(void)
{
- crypto_unregister_alg(&cbc_des3_192_alg);
- crypto_unregister_alg(&ecb_des3_192_alg);
- crypto_unregister_alg(&des3_192_alg);
+ if (ctrblk) {
+ crypto_unregister_alg(&ctr_des_alg);
+ crypto_unregister_alg(&ctr_des3_alg);
+ free_page((unsigned long) ctrblk);
+ }
+ crypto_unregister_alg(&cbc_des3_alg);
+ crypto_unregister_alg(&ecb_des3_alg);
+ crypto_unregister_alg(&des3_alg);
crypto_unregister_alg(&cbc_des_alg);
crypto_unregister_alg(&ecb_des_alg);
crypto_unregister_alg(&des_alg);
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
new file mode 100644
index 00000000000..b1bd170f24b
--- /dev/null
+++ b/arch/s390/crypto/ghash_s390.c
@@ -0,0 +1,162 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the GHASH algorithm for GCM (Galois/Counter Mode).
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/module.h>
+
+#include "crypt_s390.h"
+
+#define GHASH_BLOCK_SIZE 16
+#define GHASH_DIGEST_SIZE 16
+
+struct ghash_ctx {
+ u8 icv[16];
+ u8 key[16];
+};
+
+struct ghash_desc_ctx {
+ u8 buffer[GHASH_BLOCK_SIZE];
+ u32 bytes;
+};
+
+static int ghash_init(struct shash_desc *desc)
+{
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ memset(dctx, 0, sizeof(*dctx));
+
+ return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+
+ if (keylen != GHASH_BLOCK_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
+ memset(ctx->icv, 0, GHASH_BLOCK_SIZE);
+
+ return 0;
+}
+
+static int ghash_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ unsigned int n;
+ u8 *buf = dctx->buffer;
+ int ret;
+
+ if (dctx->bytes) {
+ u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+ n = min(srclen, dctx->bytes);
+ dctx->bytes -= n;
+ srclen -= n;
+
+ memcpy(pos, src, n);
+ src += n;
+
+ if (!dctx->bytes) {
+ ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf,
+ GHASH_BLOCK_SIZE);
+ BUG_ON(ret != GHASH_BLOCK_SIZE);
+ }
+ }
+
+ n = srclen & ~(GHASH_BLOCK_SIZE - 1);
+ if (n) {
+ ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n);
+ BUG_ON(ret != n);
+ src += n;
+ srclen -= n;
+ }
+
+ if (srclen) {
+ dctx->bytes = GHASH_BLOCK_SIZE - srclen;
+ memcpy(buf, src, srclen);
+ }
+
+ return 0;
+}
+
+static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+{
+ u8 *buf = dctx->buffer;
+ int ret;
+
+ if (dctx->bytes) {
+ u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+ memset(pos, 0, dctx->bytes);
+
+ ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE);
+ BUG_ON(ret != GHASH_BLOCK_SIZE);
+ }
+
+ dctx->bytes = 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+
+ ghash_flush(ctx, dctx);
+ memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE);
+
+ return 0;
+}
+
+static struct shash_alg ghash_alg = {
+ .digestsize = GHASH_DIGEST_SIZE,
+ .init = ghash_init,
+ .update = ghash_update,
+ .final = ghash_final,
+ .setkey = ghash_setkey,
+ .descsize = sizeof(struct ghash_desc_ctx),
+ .base = {
+ .cra_name = "ghash",
+ .cra_driver_name = "ghash-s390",
+ .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ghash_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list),
+ },
+};
+
+static int __init ghash_mod_init(void)
+{
+ if (!crypt_s390_func_available(KIMD_GHASH,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4))
+ return -EOPNOTSUPP;
+
+ return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_mod_exit(void)
+{
+ crypto_unregister_shash(&ghash_alg);
+}
+
+module_init(ghash_mod_init);
+module_exit(ghash_mod_exit);
+
+MODULE_ALIAS("ghash");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 8b16c479585..0808fbf0f7d 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -166,7 +166,7 @@ static int __init prng_init(void)
int ret;
/* check if the CPU has a PRNG */
- if (!crypt_s390_func_available(KMC_PRNG))
+ if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA))
return -EOPNOTSUPP;
if (prng_chunk_size < 8)
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index f6de7826c97..e9868c6e0a0 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -90,7 +90,7 @@ static struct shash_alg alg = {
static int __init sha1_s390_init(void)
{
- if (!crypt_s390_func_available(KIMD_SHA_1))
+ if (!crypt_s390_func_available(KIMD_SHA_1, CRYPT_S390_MSA))
return -EOPNOTSUPP;
return crypto_register_shash(&alg);
}
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 61a7db37212..5ed8d64fc2e 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -86,7 +86,7 @@ static struct shash_alg alg = {
static int sha256_s390_init(void)
{
- if (!crypt_s390_func_available(KIMD_SHA_256))
+ if (!crypt_s390_func_available(KIMD_SHA_256, CRYPT_S390_MSA))
return -EOPNOTSUPP;
return crypto_register_shash(&alg);
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 4bf73d0dc52..32a81383b69 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -132,7 +132,7 @@ static int __init init(void)
{
int ret;
- if (!crypt_s390_func_available(KIMD_SHA_512))
+ if (!crypt_s390_func_available(KIMD_SHA_512, CRYPT_S390_MSA))
return -EOPNOTSUPP;
if ((ret = crypto_register_shash(&sha512_alg)) < 0)
goto out;
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
index 14726eef1ce..f0907995b4c 100644
--- a/arch/sh/kernel/cpu/sh4/sq.c
+++ b/arch/sh/kernel/cpu/sh4/sq.c
@@ -20,6 +20,7 @@
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/io.h>
+#include <linux/prefetch.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <cpu/sq.h>
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index e560d102215..63a027c9ada 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -25,6 +25,10 @@ config SPARC
select HAVE_DMA_ATTRS
select HAVE_DMA_API_DEBUG
select HAVE_ARCH_JUMP_LABEL
+ select HAVE_GENERIC_HARDIRQS
+ select GENERIC_HARDIRQS_NO_DEPRECATED
+ select GENERIC_IRQ_SHOW
+ select USE_GENERIC_SMP_HELPERS if SMP
config SPARC32
def_bool !64BIT
@@ -43,15 +47,12 @@ config SPARC64
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_SYSCALL_TRACEPOINTS
- select USE_GENERIC_SMP_HELPERS if SMP
select RTC_DRV_CMOS
select RTC_DRV_BQ4802
select RTC_DRV_SUN4V
select RTC_DRV_STARFIRE
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
- select HAVE_GENERIC_HARDIRQS
- select GENERIC_IRQ_SHOW
select IRQ_PREFLOW_FASTEOI
config ARCH_DEFCONFIG
diff --git a/arch/sparc/include/asm/cpudata_32.h b/arch/sparc/include/asm/cpudata_32.h
index 31d48a0e32c..a4c5a938b93 100644
--- a/arch/sparc/include/asm/cpudata_32.h
+++ b/arch/sparc/include/asm/cpudata_32.h
@@ -16,6 +16,10 @@ typedef struct {
unsigned long clock_tick;
unsigned int multiplier;
unsigned int counter;
+#ifdef CONFIG_SMP
+ unsigned int irq_resched_count;
+ unsigned int irq_call_count;
+#endif
int prom_node;
int mid;
int next;
@@ -23,5 +27,6 @@ typedef struct {
DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu))
+#define local_cpu_data() __get_cpu_var(__cpu_data)
#endif /* _SPARC_CPUDATA_H */
diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h
index 86666f70322..482c79e2a41 100644
--- a/arch/sparc/include/asm/floppy_32.h
+++ b/arch/sparc/include/asm/floppy_32.h
@@ -281,28 +281,27 @@ static inline void sun_fd_enable_dma(void)
pdma_areasize = pdma_size;
}
-/* Our low-level entry point in arch/sparc/kernel/entry.S */
-extern int sparc_floppy_request_irq(int irq, unsigned long flags,
- irq_handler_t irq_handler);
+extern int sparc_floppy_request_irq(unsigned int irq,
+ irq_handler_t irq_handler);
static int sun_fd_request_irq(void)
{
static int once = 0;
- int error;
- if(!once) {
+ if (!once) {
once = 1;
- error = sparc_floppy_request_irq(FLOPPY_IRQ,
- IRQF_DISABLED,
- floppy_interrupt);
- return ((error == 0) ? 0 : -1);
- } else return 0;
+ return sparc_floppy_request_irq(FLOPPY_IRQ, floppy_interrupt);
+ } else {
+ return 0;
+ }
}
static struct linux_prom_registers fd_regs[2];
static int sun_floppy_init(void)
{
+ struct platform_device *op;
+ struct device_node *dp;
char state[128];
phandle tnode, fd_node;
int num_regs;
@@ -310,7 +309,6 @@ static int sun_floppy_init(void)
use_virtual_dma = 1;
- FLOPPY_IRQ = 11;
/* Forget it if we aren't on a machine that could possibly
* ever have a floppy drive.
*/
@@ -349,6 +347,26 @@ static int sun_floppy_init(void)
sun_fdc = (struct sun_flpy_controller *)
of_ioremap(&r, 0, fd_regs[0].reg_size, "floppy");
+ /* Look up irq in platform_device.
+ * We try "SUNW,fdtwo" and "fd"
+ */
+ for_each_node_by_name(dp, "SUNW,fdtwo") {
+ op = of_find_device_by_node(dp);
+ if (op)
+ break;
+ }
+ if (!op) {
+ for_each_node_by_name(dp, "fd") {
+ op = of_find_device_by_node(dp);
+ if (op)
+ break;
+ }
+ }
+ if (!op)
+ goto no_sun_fdc;
+
+ FLOPPY_IRQ = op->archdata.irqs[0];
+
/* Last minute sanity check... */
if(sun_fdc->status_82072 == 0xff) {
sun_fdc = NULL;
diff --git a/arch/sparc/include/asm/io.h b/arch/sparc/include/asm/io.h
index a34b2994937..f6902cf3cbe 100644
--- a/arch/sparc/include/asm/io.h
+++ b/arch/sparc/include/asm/io.h
@@ -5,4 +5,17 @@
#else
#include <asm/io_32.h>
#endif
+
+/*
+ * Defines used for both SPARC32 and SPARC64
+ */
+
+/* Big endian versions of memory read/write routines */
+#define readb_be(__addr) __raw_readb(__addr)
+#define readw_be(__addr) __raw_readw(__addr)
+#define readl_be(__addr) __raw_readl(__addr)
+#define writeb_be(__b, __addr) __raw_writeb(__b, __addr)
+#define writel_be(__w, __addr) __raw_writel(__w, __addr)
+#define writew_be(__l, __addr) __raw_writew(__l, __addr)
+
#endif
diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h
index eced3e3ebd3..2ae3acaeb1b 100644
--- a/arch/sparc/include/asm/irq_32.h
+++ b/arch/sparc/include/asm/irq_32.h
@@ -6,7 +6,11 @@
#ifndef _SPARC_IRQ_H
#define _SPARC_IRQ_H
-#define NR_IRQS 16
+/* Allocated number of logical irq numbers.
+ * sun4d boxes (ss2000e) should be OK with ~32.
+ * Be on the safe side and make room for 64
+ */
+#define NR_IRQS 64
#include <linux/interrupt.h>
diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h
index c04f96fb753..6bdaf1e43d2 100644
--- a/arch/sparc/include/asm/leon.h
+++ b/arch/sparc/include/asm/leon.h
@@ -52,29 +52,6 @@
#define LEON_DIAGF_VALID 0x2000
#define LEON_DIAGF_VALID_SHIFT 13
-/*
- * Interrupt Sources
- *
- * The interrupt source numbers directly map to the trap type and to
- * the bits used in the Interrupt Clear, Interrupt Force, Interrupt Mask,
- * and the Interrupt Pending Registers.
- */
-#define LEON_INTERRUPT_CORRECTABLE_MEMORY_ERROR 1
-#define LEON_INTERRUPT_UART_1_RX_TX 2
-#define LEON_INTERRUPT_UART_0_RX_TX 3
-#define LEON_INTERRUPT_EXTERNAL_0 4
-#define LEON_INTERRUPT_EXTERNAL_1 5
-#define LEON_INTERRUPT_EXTERNAL_2 6
-#define LEON_INTERRUPT_EXTERNAL_3 7
-#define LEON_INTERRUPT_TIMER1 8
-#define LEON_INTERRUPT_TIMER2 9
-#define LEON_INTERRUPT_EMPTY1 10
-#define LEON_INTERRUPT_EMPTY2 11
-#define LEON_INTERRUPT_OPEN_ETH 12
-#define LEON_INTERRUPT_EMPTY4 13
-#define LEON_INTERRUPT_EMPTY5 14
-#define LEON_INTERRUPT_EMPTY6 15
-
/* irq masks */
#define LEON_HARD_INT(x) (1 << (x)) /* irq 0-15 */
#define LEON_IRQMASK_R 0x0000fffe /* bit 15- 1 of lregs.irqmask */
@@ -183,7 +160,6 @@ static inline void leon_srmmu_enabletlb(void)
/* macro access for leon_readnobuffer_reg() */
#define LEON_BYPASSCACHE_LOAD_VA(x) leon_readnobuffer_reg((unsigned long)(x))
-extern void sparc_leon_eirq_register(int eirq);
extern void leon_init(void);
extern void leon_switch_mm(void);
extern void leon_init_IRQ(void);
@@ -239,8 +215,8 @@ static inline int sparc_leon3_cpuid(void)
#endif /*!__ASSEMBLY__*/
#ifdef CONFIG_SMP
-# define LEON3_IRQ_RESCHEDULE 13
-# define LEON3_IRQ_TICKER (leon_percpu_timer_dev[0].irq)
+# define LEON3_IRQ_IPI_DEFAULT 13
+# define LEON3_IRQ_TICKER (leon3_ticker_irq)
# define LEON3_IRQ_CROSS_CALL 15
#endif
@@ -339,9 +315,9 @@ struct leon2_cacheregs {
#include <linux/interrupt.h>
struct device_node;
-extern int sparc_leon_eirq_get(int eirq, int cpu);
-extern irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id);
-extern void sparc_leon_eirq_register(int eirq);
+extern unsigned int leon_build_device_irq(unsigned int real_irq,
+ irq_flow_handler_t flow_handler,
+ const char *name, int do_ack);
extern void leon_clear_clock_irq(void);
extern void leon_load_profile_irq(int cpu, unsigned int limit);
extern void leon_init_timers(irq_handler_t counter_fn);
@@ -358,6 +334,7 @@ extern void leon3_getCacheRegs(struct leon3_cacheregs *regs);
extern int leon_flush_needed(void);
extern void leon_switch_mm(void);
extern int srmmu_swprobe_trace;
+extern int leon3_ticker_irq;
#ifdef CONFIG_SMP
extern int leon_smp_nrcpus(void);
@@ -366,17 +343,19 @@ extern void leon_smp_done(void);
extern void leon_boot_cpus(void);
extern int leon_boot_one_cpu(int i);
void leon_init_smp(void);
-extern void cpu_probe(void);
extern void cpu_idle(void);
extern void init_IRQ(void);
extern void cpu_panic(void);
extern int __leon_processor_id(void);
void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu);
+extern irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused);
-extern unsigned int real_irq_entry[], smpleon_ticker[];
+extern unsigned int real_irq_entry[];
+extern unsigned int smpleon_ipi[];
extern unsigned int patchme_maybe_smp_msg[];
extern unsigned int t_nmi[], linux_trap_ipi15_leon[];
extern unsigned int linux_trap_ipi15_sun4m[];
+extern int leon_ipi_irq;
#endif /* CONFIG_SMP */
diff --git a/arch/sparc/include/asm/pcic.h b/arch/sparc/include/asm/pcic.h
index f20ef562b26..7eb5d78f521 100644
--- a/arch/sparc/include/asm/pcic.h
+++ b/arch/sparc/include/asm/pcic.h
@@ -29,11 +29,17 @@ struct linux_pcic {
int pcic_imdim;
};
-extern int pcic_probe(void);
-/* Erm... MJ redefined pcibios_present() so that it does not work early. */
+#ifdef CONFIG_PCI
extern int pcic_present(void);
+extern int pcic_probe(void);
+extern void pci_time_init(void);
extern void sun4m_pci_init_IRQ(void);
-
+#else
+static inline int pcic_present(void) { return 0; }
+static inline int pcic_probe(void) { return 0; }
+static inline void pci_time_init(void) {}
+static inline void sun4m_pci_init_IRQ(void) {}
+#endif
#endif
/* Size of PCI I/O space which we relocate. */
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 303bd4dc829..5b31a8e8982 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -8,6 +8,8 @@
* Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
*/
+#include <linux/const.h>
+
#ifndef __ASSEMBLY__
#include <asm-generic/4level-fixup.h>
@@ -456,9 +458,9 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma,
#endif /* !(__ASSEMBLY__) */
-#define VMALLOC_START 0xfe600000
+#define VMALLOC_START _AC(0xfe600000,UL)
/* XXX Alter this when I get around to fixing sun4c - Anton */
-#define VMALLOC_END 0xffc00000
+#define VMALLOC_END _AC(0xffc00000,UL)
/* We provide our own get_unmapped_area to cope with VA holes for userland */
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index f8dddb7045b..b77128c8052 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -699,6 +699,9 @@ extern pmd_t swapper_low_pmd_dir[2048];
extern void paging_init(void);
extern unsigned long find_ecache_flush_span(unsigned long size);
+struct seq_file;
+extern void mmu_info(struct seq_file *);
+
/* These do nothing with the way I have things setup. */
#define mmu_lockarea(vaddr, len) (vaddr)
#define mmu_unlockarea(vaddr, len) do { } while(0)
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 2643c62f4ac..64718ba2643 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -11,4 +11,16 @@
# define COMMAND_LINE_SIZE 256
#endif
+#ifdef __KERNEL__
+
+#ifdef CONFIG_SPARC32
+/* The CPU that was used for booting
+ * Only sun4d + leon may have boot_cpu_id != 0
+ */
+extern unsigned char boot_cpu_id;
+extern unsigned char boot_cpu_id4;
+#endif
+
+#endif /* __KERNEL__ */
+
#endif /* _SPARC_SETUP_H */
diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h
index d82d7f4c0a7..093f10843ff 100644
--- a/arch/sparc/include/asm/smp_32.h
+++ b/arch/sparc/include/asm/smp_32.h
@@ -50,42 +50,38 @@ void smp_callin(void);
void smp_boot_cpus(void);
void smp_store_cpu_info(int);
+void smp_resched_interrupt(void);
+void smp_call_function_single_interrupt(void);
+void smp_call_function_interrupt(void);
+
struct seq_file;
void smp_bogo(struct seq_file *);
void smp_info(struct seq_file *);
BTFIXUPDEF_CALL(void, smp_cross_call, smpfunc_t, cpumask_t, unsigned long, unsigned long, unsigned long, unsigned long)
BTFIXUPDEF_CALL(int, __hard_smp_processor_id, void)
+BTFIXUPDEF_CALL(void, smp_ipi_resched, int);
+BTFIXUPDEF_CALL(void, smp_ipi_single, int);
+BTFIXUPDEF_CALL(void, smp_ipi_mask_one, int);
BTFIXUPDEF_BLACKBOX(hard_smp_processor_id)
BTFIXUPDEF_BLACKBOX(load_current)
#define smp_cross_call(func,mask,arg1,arg2,arg3,arg4) BTFIXUP_CALL(smp_cross_call)(func,mask,arg1,arg2,arg3,arg4)
-static inline void xc0(smpfunc_t func) { smp_cross_call(func, cpu_online_map, 0, 0, 0, 0); }
+static inline void xc0(smpfunc_t func) { smp_cross_call(func, *cpu_online_mask, 0, 0, 0, 0); }
static inline void xc1(smpfunc_t func, unsigned long arg1)
-{ smp_cross_call(func, cpu_online_map, arg1, 0, 0, 0); }
+{ smp_cross_call(func, *cpu_online_mask, arg1, 0, 0, 0); }
static inline void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2)
-{ smp_cross_call(func, cpu_online_map, arg1, arg2, 0, 0); }
+{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, 0, 0); }
static inline void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2,
unsigned long arg3)
-{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, 0); }
+{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, 0); }
static inline void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2,
unsigned long arg3, unsigned long arg4)
-{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, arg4); }
-
-static inline int smp_call_function(void (*func)(void *info), void *info, int wait)
-{
- xc1((smpfunc_t)func, (unsigned long)info);
- return 0;
-}
+{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, arg4); }
-static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
- void *info, int wait)
-{
- smp_cross_call((smpfunc_t)func, cpumask_of_cpu(cpuid),
- (unsigned long) info, 0, 0, 0);
- return 0;
-}
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
static inline int cpu_logical_map(int cpu)
{
@@ -135,6 +131,11 @@ static inline int hard_smp_processor_id(void)
__asm__ __volatile__("lda [%g0] ASI_M_VIKING_TMP1, %0\n\t"
"nop; nop" :
"=&r" (cpuid));
+ - leon
+ __asm__ __volatile__( "rd %asr17, %0\n\t"
+ "srl %0, 0x1c, %0\n\t"
+ "nop\n\t" :
+ "=&r" (cpuid));
See btfixup.h and btfixupprep.c to understand how a blackbox works.
*/
__asm__ __volatile__("sethi %%hi(___b_hard_smp_processor_id), %0\n\t"
diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h
index f49e11cd4de..20bca895071 100644
--- a/arch/sparc/include/asm/smp_64.h
+++ b/arch/sparc/include/asm/smp_64.h
@@ -49,6 +49,10 @@ extern void cpu_play_dead(void);
extern void smp_fetch_global_regs(void);
+struct seq_file;
+void smp_bogo(struct seq_file *);
+void smp_info(struct seq_file *);
+
#ifdef CONFIG_HOTPLUG_CPU
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index 7f9b9dba38a..5f5b8bf3f50 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -9,6 +9,7 @@
#ifndef __ASSEMBLY__
#include <asm/psr.h>
+#include <asm/processor.h> /* for cpu_relax */
#define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
diff --git a/arch/sparc/include/asm/system_32.h b/arch/sparc/include/asm/system_32.h
index 890036b3689..47a7e862474 100644
--- a/arch/sparc/include/asm/system_32.h
+++ b/arch/sparc/include/asm/system_32.h
@@ -15,11 +15,6 @@
#include <linux/irqflags.h>
-static inline unsigned int probe_irq_mask(unsigned long val)
-{
- return 0;
-}
-
/*
* Sparc (general) CPU types
*/
diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h
index e3b65d8cf41..3c96d3bb9f1 100644
--- a/arch/sparc/include/asm/system_64.h
+++ b/arch/sparc/include/asm/system_64.h
@@ -29,10 +29,6 @@ enum sparc_cpu {
/* This cannot ever be a sun4c :) That's just history. */
#define ARCH_SUN4C 0
-extern const char *sparc_cpu_type;
-extern const char *sparc_fpu_type;
-extern const char *sparc_pmu_type;
-
extern char reboot_command[];
/* These are here in an effort to more fully work around Spitfire Errata
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 9d897b6db98..c5387ed0add 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -404,8 +404,9 @@
#define __NR_open_by_handle_at 333
#define __NR_clock_adjtime 334
#define __NR_syncfs 335
+#define __NR_sendmmsg 336
-#define NR_syscalls 336
+#define NR_syscalls 337
#ifdef __32bit_syscall_numbers__
/* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/include/asm/winmacro.h b/arch/sparc/include/asm/winmacro.h
index 5b0a06dc3bc..a9be04b0d04 100644
--- a/arch/sparc/include/asm/winmacro.h
+++ b/arch/sparc/include/asm/winmacro.h
@@ -103,6 +103,7 @@
st %scratch, [%cur_reg + TI_W_SAVED];
#ifdef CONFIG_SMP
+/* Results of LOAD_CURRENT() after BTFIXUP for SUN4M, SUN4D & LEON (comments) */
#define LOAD_CURRENT4M(dest_reg, idreg) \
rd %tbr, %idreg; \
sethi %hi(current_set), %dest_reg; \
@@ -118,6 +119,14 @@
or %dest_reg, %lo(C_LABEL(current_set)), %dest_reg; \
ld [%idreg + %dest_reg], %dest_reg;
+#define LOAD_CURRENT_LEON(dest_reg, idreg) \
+ rd %asr17, %idreg; \
+ sethi %hi(current_set), %dest_reg; \
+ srl %idreg, 0x1c, %idreg; \
+ or %dest_reg, %lo(current_set), %dest_reg; \
+ sll %idreg, 0x2, %idreg; \
+ ld [%idreg + %dest_reg], %dest_reg;
+
/* Blackbox - take care with this... - check smp4m and smp4d before changing this. */
#define LOAD_CURRENT(dest_reg, idreg) \
sethi %hi(___b_load_current), %idreg; \
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 99aa4db6e9c..9cff2709a96 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -71,10 +71,6 @@ obj-$(CONFIG_SPARC64) += pcr.o
obj-$(CONFIG_SPARC64) += nmi.o
obj-$(CONFIG_SPARC64_SMP) += cpumap.o
-# sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation
-obj-$(CONFIG_SPARC32) += devres.o
-devres-y := ../../../kernel/irq/devres.o
-
obj-y += dma.o
obj-$(CONFIG_SPARC32_PCI) += pcic.o
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index 7925c54f413..138dbbc8dc8 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -4,6 +4,7 @@
* Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
*/
+#include <linux/seq_file.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -11,7 +12,9 @@
#include <linux/threads.h>
#include <asm/spitfire.h>
+#include <asm/pgtable.h>
#include <asm/oplib.h>
+#include <asm/setup.h>
#include <asm/page.h>
#include <asm/head.h>
#include <asm/psr.h>
@@ -23,6 +26,9 @@
DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
EXPORT_PER_CPU_SYMBOL(__cpu_data);
+int ncpus_probed;
+unsigned int fsr_storage;
+
struct cpu_info {
int psr_vers;
const char *name;
@@ -247,13 +253,12 @@ static const struct manufacturer_info __initconst manufacturer_info[] = {
* machine type value into consideration too. I will fix this.
*/
-const char *sparc_cpu_type;
-const char *sparc_fpu_type;
+static const char *sparc_cpu_type;
+static const char *sparc_fpu_type;
const char *sparc_pmu_type;
-unsigned int fsr_storage;
-static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
+static void __init set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
{
const struct manufacturer_info *manuf;
int i;
@@ -313,7 +318,123 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
}
#ifdef CONFIG_SPARC32
-void __cpuinit cpu_probe(void)
+static int show_cpuinfo(struct seq_file *m, void *__unused)
+{
+ seq_printf(m,
+ "cpu\t\t: %s\n"
+ "fpu\t\t: %s\n"
+ "promlib\t\t: Version %d Revision %d\n"
+ "prom\t\t: %d.%d\n"
+ "type\t\t: %s\n"
+ "ncpus probed\t: %d\n"
+ "ncpus active\t: %d\n"
+#ifndef CONFIG_SMP
+ "CPU0Bogo\t: %lu.%02lu\n"
+ "CPU0ClkTck\t: %ld\n"
+#endif
+ ,
+ sparc_cpu_type,
+ sparc_fpu_type ,
+ romvec->pv_romvers,
+ prom_rev,
+ romvec->pv_printrev >> 16,
+ romvec->pv_printrev & 0xffff,
+ &cputypval[0],
+ ncpus_probed,
+ num_online_cpus()
+#ifndef CONFIG_SMP
+ , cpu_data(0).udelay_val/(500000/HZ),
+ (cpu_data(0).udelay_val/(5000/HZ)) % 100,
+ cpu_data(0).clock_tick
+#endif
+ );
+
+#ifdef CONFIG_SMP
+ smp_bogo(m);
+#endif
+ mmu_info(m);
+#ifdef CONFIG_SMP
+ smp_info(m);
+#endif
+ return 0;
+}
+#endif /* CONFIG_SPARC32 */
+
+#ifdef CONFIG_SPARC64
+unsigned int dcache_parity_tl1_occurred;
+unsigned int icache_parity_tl1_occurred;
+
+
+static int show_cpuinfo(struct seq_file *m, void *__unused)
+{
+ seq_printf(m,
+ "cpu\t\t: %s\n"
+ "fpu\t\t: %s\n"
+ "pmu\t\t: %s\n"
+ "prom\t\t: %s\n"
+ "type\t\t: %s\n"
+ "ncpus probed\t: %d\n"
+ "ncpus active\t: %d\n"
+ "D$ parity tl1\t: %u\n"
+ "I$ parity tl1\t: %u\n"
+#ifndef CONFIG_SMP
+ "Cpu0ClkTck\t: %016lx\n"
+#endif
+ ,
+ sparc_cpu_type,
+ sparc_fpu_type,
+ sparc_pmu_type,
+ prom_version,
+ ((tlb_type == hypervisor) ?
+ "sun4v" :
+ "sun4u"),
+ ncpus_probed,
+ num_online_cpus(),
+ dcache_parity_tl1_occurred,
+ icache_parity_tl1_occurred
+#ifndef CONFIG_SMP
+ , cpu_data(0).clock_tick
+#endif
+ );
+#ifdef CONFIG_SMP
+ smp_bogo(m);
+#endif
+ mmu_info(m);
+#ifdef CONFIG_SMP
+ smp_info(m);
+#endif
+ return 0;
+}
+#endif /* CONFIG_SPARC64 */
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ /* The pointer we are returning is arbitrary,
+ * it just has to be non-NULL and not IS_ERR
+ * in the success case.
+ */
+ return *pos == 0 ? &c_start : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+ .start =c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
+};
+
+#ifdef CONFIG_SPARC32
+static int __init cpu_type_probe(void)
{
int psr_impl, psr_vers, fpu_vers;
int psr;
@@ -332,8 +453,12 @@ void __cpuinit cpu_probe(void)
put_psr(psr);
set_cpu_and_fpu(psr_impl, psr_vers, fpu_vers);
+
+ return 0;
}
-#else
+#endif /* CONFIG_SPARC32 */
+
+#ifdef CONFIG_SPARC64
static void __init sun4v_cpu_probe(void)
{
switch (sun4v_chip_type) {
@@ -374,6 +499,6 @@ static int __init cpu_type_probe(void)
}
return 0;
}
+#endif /* CONFIG_SPARC64 */
early_initcall(cpu_type_probe);
-#endif
diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c
index 8de64c8126b..d91fd782743 100644
--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
@@ -202,7 +202,7 @@ static struct cpuinfo_tree *build_cpuinfo_tree(void)
new_tree->total_nodes = n;
memcpy(&new_tree->level, tmp_level, sizeof(tmp_level));
- prev_cpu = cpu = first_cpu(cpu_online_map);
+ prev_cpu = cpu = cpumask_first(cpu_online_mask);
/* Initialize all levels in the tree with the first CPU */
for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) {
@@ -381,7 +381,7 @@ static int simple_map_to_cpu(unsigned int index)
}
/* Impossible, since num_online_cpus() <= num_possible_cpus() */
- return first_cpu(cpu_online_map);
+ return cpumask_first(cpu_online_mask);
}
static int _map_to_cpu(unsigned int index)
diff --git a/arch/sparc/kernel/devices.c b/arch/sparc/kernel/devices.c
index d2eddd6647c..113c052c304 100644
--- a/arch/sparc/kernel/devices.c
+++ b/arch/sparc/kernel/devices.c
@@ -20,7 +20,6 @@
#include <asm/system.h>
#include <asm/cpudata.h>
-extern void cpu_probe(void);
extern void clock_stop_probe(void); /* tadpole.c */
extern void sun4c_probe_memerr_reg(void);
@@ -115,7 +114,7 @@ int cpu_get_hwmid(phandle prom_node)
void __init device_scan(void)
{
- prom_printf("Booting Linux...\n");
+ printk(KERN_NOTICE "Booting Linux...\n");
#ifndef CONFIG_SMP
{
@@ -133,7 +132,6 @@ void __init device_scan(void)
}
#endif /* !CONFIG_SMP */
- cpu_probe();
{
extern void auxio_probe(void);
extern void auxio_power_probe(void);
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index 3add4de8a1a..dd1342c0a3b 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -497,7 +497,7 @@ static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
tag->num_records = ncpus;
i = 0;
- for_each_cpu_mask(cpu, *mask) {
+ for_each_cpu(cpu, mask) {
ent[i].cpu = cpu;
ent[i].result = DR_CPU_RES_OK;
ent[i].stat = default_stat;
@@ -534,7 +534,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp,
int resp_len, ncpus, cpu;
unsigned long flags;
- ncpus = cpus_weight(*mask);
+ ncpus = cpumask_weight(mask);
resp_len = dr_cpu_size_response(ncpus);
resp = kzalloc(resp_len, GFP_KERNEL);
if (!resp)
@@ -547,7 +547,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp,
mdesc_populate_present_mask(mask);
mdesc_fill_in_cpu_data(mask);
- for_each_cpu_mask(cpu, *mask) {
+ for_each_cpu(cpu, mask) {
int err;
printk(KERN_INFO "ds-%llu: Starting cpu %d...\n",
@@ -593,7 +593,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp,
int resp_len, ncpus, cpu;
unsigned long flags;
- ncpus = cpus_weight(*mask);
+ ncpus = cpumask_weight(mask);
resp_len = dr_cpu_size_response(ncpus);
resp = kzalloc(resp_len, GFP_KERNEL);
if (!resp)
@@ -603,7 +603,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp,
resp_len, ncpus, mask,
DR_CPU_STAT_UNCONFIGURED);
- for_each_cpu_mask(cpu, *mask) {
+ for_each_cpu(cpu, mask) {
int err;
printk(KERN_INFO "ds-%llu: Shutting down cpu %d...\n",
@@ -649,13 +649,13 @@ static void __cpuinit dr_cpu_data(struct ds_info *dp,
purge_dups(cpu_list, tag->num_records);
- cpus_clear(mask);
+ cpumask_clear(&mask);
for (i = 0; i < tag->num_records; i++) {
if (cpu_list[i] == CPU_SENTINEL)
continue;
if (cpu_list[i] < nr_cpu_ids)
- cpu_set(cpu_list[i], mask);
+ cpumask_set_cpu(cpu_list[i], &mask);
}
if (tag->type == DR_CPU_CONFIGURE)
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 6da784a5612..8341963f4c8 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -269,19 +269,22 @@ smp4m_ticker:
/* Here is where we check for possible SMP IPI passed to us
* on some level other than 15 which is the NMI and only used
* for cross calls. That has a separate entry point below.
+ *
+ * IPIs are sent on Level 12, 13 and 14. See IRQ_IPI_*.
*/
maybe_smp4m_msg:
GET_PROCESSOR4M_ID(o3)
sethi %hi(sun4m_irq_percpu), %l5
sll %o3, 2, %o3
or %l5, %lo(sun4m_irq_percpu), %o5
- sethi %hi(0x40000000), %o2
+ sethi %hi(0x70000000), %o2 ! Check all soft-IRQs
ld [%o5 + %o3], %o1
ld [%o1 + 0x00], %o3 ! sun4m_irq_percpu[cpu]->pending
andcc %o3, %o2, %g0
be,a smp4m_ticker
cmp %l7, 14
- st %o2, [%o1 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x40000000
+ /* Soft-IRQ IPI */
+ st %o2, [%o1 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x70000000
WRITE_PAUSE
ld [%o1 + 0x00], %g0 ! sun4m_irq_percpu[cpu]->pending
WRITE_PAUSE
@@ -290,9 +293,27 @@ maybe_smp4m_msg:
WRITE_PAUSE
wr %l4, PSR_ET, %psr
WRITE_PAUSE
- call smp_reschedule_irq
+ sll %o2, 28, %o2 ! shift for simpler checks below
+maybe_smp4m_msg_check_single:
+ andcc %o2, 0x1, %g0
+ beq,a maybe_smp4m_msg_check_mask
+ andcc %o2, 0x2, %g0
+ call smp_call_function_single_interrupt
nop
-
+ andcc %o2, 0x2, %g0
+maybe_smp4m_msg_check_mask:
+ beq,a maybe_smp4m_msg_check_resched
+ andcc %o2, 0x4, %g0
+ call smp_call_function_interrupt
+ nop
+ andcc %o2, 0x4, %g0
+maybe_smp4m_msg_check_resched:
+ /* rescheduling is done in RESTORE_ALL regardless, but incr stats */
+ beq,a maybe_smp4m_msg_out
+ nop
+ call smp_resched_interrupt
+ nop
+maybe_smp4m_msg_out:
RESTORE_ALL
.align 4
@@ -401,18 +422,18 @@ linux_trap_ipi15_sun4d:
1: b,a 1b
#ifdef CONFIG_SPARC_LEON
-
- .globl smpleon_ticker
- /* SMP per-cpu ticker interrupts are handled specially. */
-smpleon_ticker:
+ .globl smpleon_ipi
+ .extern leon_ipi_interrupt
+ /* SMP per-cpu IPI interrupts are handled specially. */
+smpleon_ipi:
SAVE_ALL
or %l0, PSR_PIL, %g2
wr %g2, 0x0, %psr
WRITE_PAUSE
wr %g2, PSR_ET, %psr
WRITE_PAUSE
- call leon_percpu_timer_interrupt
- add %sp, STACKFRAME_SZ, %o0
+ call leonsmp_ipi_interrupt
+ add %sp, STACKFRAME_SZ, %o1 ! pt_regs
wr %l0, PSR_ET, %psr
WRITE_PAUSE
RESTORE_ALL
diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
index 59423491cef..58778575983 100644
--- a/arch/sparc/kernel/head_32.S
+++ b/arch/sparc/kernel/head_32.S
@@ -810,31 +810,25 @@ found_version:
got_prop:
#ifdef CONFIG_SPARC_LEON
/* no cpu-type check is needed, it is a SPARC-LEON */
-#ifdef CONFIG_SMP
- ba leon_smp_init
- nop
- .global leon_smp_init
-leon_smp_init:
- sethi %hi(boot_cpu_id), %g1 ! master always 0
- stb %g0, [%g1 + %lo(boot_cpu_id)]
- sethi %hi(boot_cpu_id4), %g1 ! master always 0
- stb %g0, [%g1 + %lo(boot_cpu_id4)]
+ sethi %hi(boot_cpu_id), %g2 ! boot-cpu index
- rd %asr17,%g1
- srl %g1,28,%g1
+#ifdef CONFIG_SMP
+ ldub [%g2 + %lo(boot_cpu_id)], %g1
+ cmp %g1, 0xff ! unset means first CPU
+ bne leon_smp_cpu_startup ! continue only with master
+ nop
+#endif
+ /* Get CPU-ID from most significant 4-bit of ASR17 */
+ rd %asr17, %g1
+ srl %g1, 28, %g1
- cmp %g0,%g1
- beq sun4c_continue_boot !continue with master
- nop
+ /* Update boot_cpu_id only on boot cpu */
+ stub %g1, [%g2 + %lo(boot_cpu_id)]
- ba leon_smp_cpu_startup
- nop
-#else
ba sun4c_continue_boot
nop
#endif
-#endif
set cputypval, %o2
ldub [%o2 + 0x4], %l1
@@ -893,9 +887,6 @@ sun4d_init:
sta %g4, [%g0] ASI_M_VIKING_TMP1
sethi %hi(boot_cpu_id), %g5
stb %g4, [%g5 + %lo(boot_cpu_id)]
- sll %g4, 2, %g4
- sethi %hi(boot_cpu_id4), %g5
- stb %g4, [%g5 + %lo(boot_cpu_id4)]
#endif
/* Fall through to sun4m_init */
@@ -1024,14 +1015,28 @@ sun4c_continue_boot:
bl 1b
add %o0, 0x1, %o0
+ /* If boot_cpu_id has not been setup by machine specific
+ * init-code above we default it to zero.
+ */
+ sethi %hi(boot_cpu_id), %g2
+ ldub [%g2 + %lo(boot_cpu_id)], %g3
+ cmp %g3, 0xff
+ bne 1f
+ nop
+ mov %g0, %g3
+ stub %g3, [%g2 + %lo(boot_cpu_id)]
+
+1: /* boot_cpu_id set. calculate boot_cpu_id4 = boot_cpu_id*4 */
+ sll %g3, 2, %g3
+ sethi %hi(boot_cpu_id4), %g2
+ stub %g3, [%g2 + %lo(boot_cpu_id4)]
+
/* Initialize the uwinmask value for init task just in case.
* But first make current_set[boot_cpu_id] point to something useful.
*/
set init_thread_union, %g6
set current_set, %g2
#ifdef CONFIG_SMP
- sethi %hi(boot_cpu_id4), %g3
- ldub [%g3 + %lo(boot_cpu_id4)], %g3
st %g6, [%g2]
add %g2, %g3, %g2
#endif
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index c6ce9a6a479..1c9c80a1a86 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -50,10 +50,15 @@
#include <asm/io-unit.h>
#include <asm/leon.h>
+/* This function must make sure that caches and memory are coherent after DMA
+ * On LEON systems without cache snooping it flushes the entire D-CACHE.
+ */
#ifndef CONFIG_SPARC_LEON
-#define mmu_inval_dma_area(p, l) /* Anton pulled it out for 2.4.0-xx */
+static inline void dma_make_coherent(unsigned long pa, unsigned long len)
+{
+}
#else
-static inline void mmu_inval_dma_area(void *va, unsigned long len)
+static inline void dma_make_coherent(unsigned long pa, unsigned long len)
{
if (!sparc_leon3_snooping_enabled())
leon_flush_dcache_all();
@@ -284,7 +289,6 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len,
printk("sbus_alloc_consistent: cannot occupy 0x%lx", len_total);
goto err_nova;
}
- mmu_inval_dma_area((void *)va, len_total);
// XXX The mmu_map_dma_area does this for us below, see comments.
// sparc_mapiorange(0, virt_to_phys(va), res->start, len_total);
@@ -336,7 +340,6 @@ static void sbus_free_coherent(struct device *dev, size_t n, void *p,
release_resource(res);
kfree(res);
- /* mmu_inval_dma_area(va, n); */ /* it's consistent, isn't it */
pgv = virt_to_page(p);
mmu_unmap_dma_area(dev, ba, n);
@@ -463,7 +466,6 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len,
printk("pci_alloc_consistent: cannot occupy 0x%lx", len_total);
goto err_nova;
}
- mmu_inval_dma_area(va, len_total);
sparc_mapiorange(0, virt_to_phys(va), res->start, len_total);
*pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */
@@ -489,7 +491,6 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p,
dma_addr_t ba)
{
struct resource *res;
- void *pgp;
if ((res = _sparc_find_resource(&_sparc_dvma,
(unsigned long)p)) == NULL) {
@@ -509,14 +510,12 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p,
return;
}
- pgp = phys_to_virt(ba); /* bus_to_virt actually */
- mmu_inval_dma_area(pgp, n);
+ dma_make_coherent(ba, n);
sparc_unmapiorange((unsigned long)p, n);
release_resource(res);
kfree(res);
-
- free_pages((unsigned long)pgp, get_order(n));
+ free_pages((unsigned long)phys_to_virt(ba), get_order(n));
}
/*
@@ -535,7 +534,7 @@ static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size,
enum dma_data_direction dir, struct dma_attrs *attrs)
{
if (dir != PCI_DMA_TODEVICE)
- mmu_inval_dma_area(phys_to_virt(ba), PAGE_ALIGN(size));
+ dma_make_coherent(ba, PAGE_ALIGN(size));
}
/* Map a set of buffers described by scatterlist in streaming
@@ -562,8 +561,7 @@ static int pci32_map_sg(struct device *device, struct scatterlist *sgl,
/* IIep is write-through, not flushing. */
for_each_sg(sgl, sg, nents, n) {
- BUG_ON(page_address(sg_page(sg)) == NULL);
- sg->dma_address = virt_to_phys(sg_virt(sg));
+ sg->dma_address = sg_phys(sg);
sg->dma_length = sg->length;
}
return nents;
@@ -582,9 +580,7 @@ static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl,
if (dir != PCI_DMA_TODEVICE) {
for_each_sg(sgl, sg, nents, n) {
- BUG_ON(page_address(sg_page(sg)) == NULL);
- mmu_inval_dma_area(page_address(sg_page(sg)),
- PAGE_ALIGN(sg->length));
+ dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
}
}
}
@@ -603,8 +599,7 @@ static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba,
size_t size, enum dma_data_direction dir)
{
if (dir != PCI_DMA_TODEVICE) {
- mmu_inval_dma_area(phys_to_virt(ba),
- PAGE_ALIGN(size));
+ dma_make_coherent(ba, PAGE_ALIGN(size));
}
}
@@ -612,8 +607,7 @@ static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba,
size_t size, enum dma_data_direction dir)
{
if (dir != PCI_DMA_TODEVICE) {
- mmu_inval_dma_area(phys_to_virt(ba),
- PAGE_ALIGN(size));
+ dma_make_coherent(ba, PAGE_ALIGN(size));
}
}
@@ -631,9 +625,7 @@ static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
if (dir != PCI_DMA_TODEVICE) {
for_each_sg(sgl, sg, nents, n) {
- BUG_ON(page_address(sg_page(sg)) == NULL);
- mmu_inval_dma_area(page_address(sg_page(sg)),
- PAGE_ALIGN(sg->length));
+ dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
}
}
}
@@ -646,9 +638,7 @@ static void pci32_sync_sg_for_device(struct device *device, struct scatterlist *
if (dir != PCI_DMA_TODEVICE) {
for_each_sg(sgl, sg, nents, n) {
- BUG_ON(page_address(sg_page(sg)) == NULL);
- mmu_inval_dma_area(page_address(sg_page(sg)),
- PAGE_ALIGN(sg->length));
+ dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
}
}
}
diff --git a/arch/sparc/kernel/irq.h b/arch/sparc/kernel/irq.h
index 008453b798e..100b9c204e7 100644
--- a/arch/sparc/kernel/irq.h
+++ b/arch/sparc/kernel/irq.h
@@ -2,6 +2,23 @@
#include <asm/btfixup.h>
+struct irq_bucket {
+ struct irq_bucket *next;
+ unsigned int real_irq;
+ unsigned int irq;
+ unsigned int pil;
+};
+
+#define SUN4D_MAX_BOARD 10
+#define SUN4D_MAX_IRQ ((SUN4D_MAX_BOARD + 2) << 5)
+
+/* Map between the irq identifier used in hw to the
+ * irq_bucket. The map is sufficient large to hold
+ * the sun4d hw identifiers.
+ */
+extern struct irq_bucket *irq_map[SUN4D_MAX_IRQ];
+
+
/* sun4m specific type definitions */
/* This maps direct to CPU specific interrupt registers */
@@ -35,6 +52,10 @@ struct sparc_irq_config {
};
extern struct sparc_irq_config sparc_irq_config;
+unsigned int irq_alloc(unsigned int real_irq, unsigned int pil);
+void irq_link(unsigned int irq);
+void irq_unlink(unsigned int irq);
+void handler_irq(unsigned int pil, struct pt_regs *regs);
/* Dave Redman (djhr@tadpole.co.uk)
* changed these to function pointers.. it saves cycles and will allow
@@ -44,33 +65,9 @@ extern struct sparc_irq_config sparc_irq_config;
* Changed these to btfixup entities... It saves cycles :)
*/
-BTFIXUPDEF_CALL(void, disable_irq, unsigned int)
-BTFIXUPDEF_CALL(void, enable_irq, unsigned int)
-BTFIXUPDEF_CALL(void, disable_pil_irq, unsigned int)
-BTFIXUPDEF_CALL(void, enable_pil_irq, unsigned int)
BTFIXUPDEF_CALL(void, clear_clock_irq, void)
BTFIXUPDEF_CALL(void, load_profile_irq, int, unsigned int)
-static inline void __disable_irq(unsigned int irq)
-{
- BTFIXUP_CALL(disable_irq)(irq);
-}
-
-static inline void __enable_irq(unsigned int irq)
-{
- BTFIXUP_CALL(enable_irq)(irq);
-}
-
-static inline void disable_pil_irq(unsigned int irq)
-{
- BTFIXUP_CALL(disable_pil_irq)(irq);
-}
-
-static inline void enable_pil_irq(unsigned int irq)
-{
- BTFIXUP_CALL(enable_pil_irq)(irq);
-}
-
static inline void clear_clock_irq(void)
{
BTFIXUP_CALL(clear_clock_irq)();
@@ -89,4 +86,10 @@ BTFIXUPDEF_CALL(void, set_irq_udt, int)
#define set_cpu_int(cpu,level) BTFIXUP_CALL(set_cpu_int)(cpu,level)
#define clear_cpu_int(cpu,level) BTFIXUP_CALL(clear_cpu_int)(cpu,level)
#define set_irq_udt(cpu) BTFIXUP_CALL(set_irq_udt)(cpu)
+
+/* All SUN4D IPIs are sent on this IRQ, may be shared with hard IRQs */
+#define SUN4D_IPI_IRQ 14
+
+extern void sun4d_ipi_interrupt(void);
+
#endif
diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c
index 7c93df4099c..9b89d842913 100644
--- a/arch/sparc/kernel/irq_32.c
+++ b/arch/sparc/kernel/irq_32.c
@@ -15,6 +15,7 @@
#include <linux/seq_file.h>
#include <asm/cacheflush.h>
+#include <asm/cpudata.h>
#include <asm/pcic.h>
#include <asm/leon.h>
@@ -101,284 +102,173 @@ EXPORT_SYMBOL(arch_local_irq_restore);
* directed CPU interrupts using the existing enable/disable irq code
* with tweaks.
*
+ * Sun4d complicates things even further. IRQ numbers are arbitrary
+ * 32-bit values in that case. Since this is similar to sparc64,
+ * we adopt a virtual IRQ numbering scheme as is done there.
+ * Virutal interrupt numbers are allocated by build_irq(). So NR_IRQS
+ * just becomes a limit of how many interrupt sources we can handle in
+ * a single system. Even fully loaded SS2000 machines top off at
+ * about 32 interrupt sources or so, therefore a NR_IRQS value of 64
+ * is more than enough.
+ *
+ * We keep a map of per-PIL enable interrupts. These get wired
+ * up via the irq_chip->startup() method which gets invoked by
+ * the generic IRQ layer during request_irq().
*/
+/* Table of allocated irqs. Unused entries has irq == 0 */
+static struct irq_bucket irq_table[NR_IRQS];
+/* Protect access to irq_table */
+static DEFINE_SPINLOCK(irq_table_lock);
-/*
- * Dave Redman (djhr@tadpole.co.uk)
- *
- * There used to be extern calls and hard coded values here.. very sucky!
- * instead, because some of the devices attach very early, I do something
- * equally sucky but at least we'll never try to free statically allocated
- * space or call kmalloc before kmalloc_init :(.
- *
- * In fact it's the timer10 that attaches first.. then timer14
- * then kmalloc_init is called.. then the tty interrupts attach.
- * hmmm....
- *
- */
-#define MAX_STATIC_ALLOC 4
-struct irqaction static_irqaction[MAX_STATIC_ALLOC];
-int static_irq_count;
-
-static struct {
- struct irqaction *action;
- int flags;
-} sparc_irq[NR_IRQS];
-#define SPARC_IRQ_INPROGRESS 1
-
-/* Used to protect the IRQ action lists */
-DEFINE_SPINLOCK(irq_action_lock);
+/* Map between the irq identifier used in hw to the irq_bucket. */
+struct irq_bucket *irq_map[SUN4D_MAX_IRQ];
+/* Protect access to irq_map */
+static DEFINE_SPINLOCK(irq_map_lock);
-int show_interrupts(struct seq_file *p, void *v)
+/* Allocate a new irq from the irq_table */
+unsigned int irq_alloc(unsigned int real_irq, unsigned int pil)
{
- int i = *(loff_t *)v;
- struct irqaction *action;
unsigned long flags;
-#ifdef CONFIG_SMP
- int j;
-#endif
+ unsigned int i;
+
+ spin_lock_irqsave(&irq_table_lock, flags);
+ for (i = 1; i < NR_IRQS; i++) {
+ if (irq_table[i].real_irq == real_irq && irq_table[i].pil == pil)
+ goto found;
+ }
- if (sparc_cpu_model == sun4d)
- return show_sun4d_interrupts(p, v);
+ for (i = 1; i < NR_IRQS; i++) {
+ if (!irq_table[i].irq)
+ break;
+ }
- spin_lock_irqsave(&irq_action_lock, flags);
if (i < NR_IRQS) {
- action = sparc_irq[i].action;
- if (!action)
- goto out_unlock;
- seq_printf(p, "%3d: ", i);
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for_each_online_cpu(j) {
- seq_printf(p, "%10u ",
- kstat_cpu(j).irqs[i]);
- }
-#endif
- seq_printf(p, " %c %s",
- (action->flags & IRQF_DISABLED) ? '+' : ' ',
- action->name);
- for (action = action->next; action; action = action->next) {
- seq_printf(p, ",%s %s",
- (action->flags & IRQF_DISABLED) ? " +" : "",
- action->name);
- }
- seq_putc(p, '\n');
+ irq_table[i].real_irq = real_irq;
+ irq_table[i].irq = i;
+ irq_table[i].pil = pil;
+ } else {
+ printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
+ i = 0;
}
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
- return 0;
+found:
+ spin_unlock_irqrestore(&irq_table_lock, flags);
+
+ return i;
}
-void free_irq(unsigned int irq, void *dev_id)
+/* Based on a single pil handler_irq may need to call several
+ * interrupt handlers. Use irq_map as entry to irq_table,
+ * and let each entry in irq_table point to the next entry.
+ */
+void irq_link(unsigned int irq)
{
- struct irqaction *action;
- struct irqaction **actionp;
+ struct irq_bucket *p;
unsigned long flags;
- unsigned int cpu_irq;
-
- if (sparc_cpu_model == sun4d) {
- sun4d_free_irq(irq, dev_id);
- return;
- }
- cpu_irq = irq & (NR_IRQS - 1);
- if (cpu_irq > 14) { /* 14 irq levels on the sparc */
- printk(KERN_ERR "Trying to free bogus IRQ %d\n", irq);
- return;
- }
+ unsigned int pil;
- spin_lock_irqsave(&irq_action_lock, flags);
+ BUG_ON(irq >= NR_IRQS);
- actionp = &sparc_irq[cpu_irq].action;
- action = *actionp;
+ spin_lock_irqsave(&irq_map_lock, flags);
- if (!action->handler) {
- printk(KERN_ERR "Trying to free free IRQ%d\n", irq);
- goto out_unlock;
- }
- if (dev_id) {
- for (; action; action = action->next) {
- if (action->dev_id == dev_id)
- break;
- actionp = &action->next;
- }
- if (!action) {
- printk(KERN_ERR "Trying to free free shared IRQ%d\n",
- irq);
- goto out_unlock;
- }
- } else if (action->flags & IRQF_SHARED) {
- printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n",
- irq);
- goto out_unlock;
- }
- if (action->flags & SA_STATIC_ALLOC) {
- /*
- * This interrupt is marked as specially allocated
- * so it is a bad idea to free it.
- */
- printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n",
- irq, action->name);
- goto out_unlock;
- }
-
- *actionp = action->next;
+ p = &irq_table[irq];
+ pil = p->pil;
+ BUG_ON(pil > SUN4D_MAX_IRQ);
+ p->next = irq_map[pil];
+ irq_map[pil] = p;
- spin_unlock_irqrestore(&irq_action_lock, flags);
+ spin_unlock_irqrestore(&irq_map_lock, flags);
+}
- synchronize_irq(irq);
+void irq_unlink(unsigned int irq)
+{
+ struct irq_bucket *p, **pnext;
+ unsigned long flags;
- spin_lock_irqsave(&irq_action_lock, flags);
+ BUG_ON(irq >= NR_IRQS);
- kfree(action);
+ spin_lock_irqsave(&irq_map_lock, flags);
- if (!sparc_irq[cpu_irq].action)
- __disable_irq(irq);
+ p = &irq_table[irq];
+ BUG_ON(p->pil > SUN4D_MAX_IRQ);
+ pnext = &irq_map[p->pil];
+ while (*pnext != p)
+ pnext = &(*pnext)->next;
+ *pnext = p->next;
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
+ spin_unlock_irqrestore(&irq_map_lock, flags);
}
-EXPORT_SYMBOL(free_irq);
-
-/*
- * This is called when we want to synchronize with
- * interrupts. We may for example tell a device to
- * stop sending interrupts: but to make sure there
- * are no interrupts that are executing on another
- * CPU we need to call this function.
- */
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
- unsigned int cpu_irq;
- cpu_irq = irq & (NR_IRQS - 1);
- while (sparc_irq[cpu_irq].flags & SPARC_IRQ_INPROGRESS)
- cpu_relax();
-}
-EXPORT_SYMBOL(synchronize_irq);
-#endif /* SMP */
-void unexpected_irq(int irq, void *dev_id, struct pt_regs *regs)
+/* /proc/interrupts printing */
+int arch_show_interrupts(struct seq_file *p, int prec)
{
- int i;
- struct irqaction *action;
- unsigned int cpu_irq;
+ int j;
- cpu_irq = irq & (NR_IRQS - 1);
- action = sparc_irq[cpu_irq].action;
-
- printk(KERN_ERR "IO device interrupt, irq = %d\n", irq);
- printk(KERN_ERR "PC = %08lx NPC = %08lx FP=%08lx\n", regs->pc,
- regs->npc, regs->u_regs[14]);
- if (action) {
- printk(KERN_ERR "Expecting: ");
- for (i = 0; i < 16; i++)
- if (action->handler)
- printk(KERN_CONT "[%s:%d:0x%x] ", action->name,
- i, (unsigned int)action->handler);
- }
- printk(KERN_ERR "AIEEE\n");
- panic("bogus interrupt received");
+#ifdef CONFIG_SMP
+ seq_printf(p, "RES: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_data(j).irq_resched_count);
+ seq_printf(p, " IPI rescheduling interrupts\n");
+ seq_printf(p, "CAL: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_data(j).irq_call_count);
+ seq_printf(p, " IPI function call interrupts\n");
+#endif
+ seq_printf(p, "NMI: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_data(j).counter);
+ seq_printf(p, " Non-maskable interrupts\n");
+ return 0;
}
-void handler_irq(int pil, struct pt_regs *regs)
+void handler_irq(unsigned int pil, struct pt_regs *regs)
{
struct pt_regs *old_regs;
- struct irqaction *action;
- int cpu = smp_processor_id();
+ struct irq_bucket *p;
+ BUG_ON(pil > 15);
old_regs = set_irq_regs(regs);
irq_enter();
- disable_pil_irq(pil);
-#ifdef CONFIG_SMP
- /* Only rotate on lower priority IRQs (scsi, ethernet, etc.). */
- if ((sparc_cpu_model==sun4m) && (pil < 10))
- smp4m_irq_rotate(cpu);
-#endif
- action = sparc_irq[pil].action;
- sparc_irq[pil].flags |= SPARC_IRQ_INPROGRESS;
- kstat_cpu(cpu).irqs[pil]++;
- do {
- if (!action || !action->handler)
- unexpected_irq(pil, NULL, regs);
- action->handler(pil, action->dev_id);
- action = action->next;
- } while (action);
- sparc_irq[pil].flags &= ~SPARC_IRQ_INPROGRESS;
- enable_pil_irq(pil);
+
+ p = irq_map[pil];
+ while (p) {
+ struct irq_bucket *next = p->next;
+
+ generic_handle_irq(p->irq);
+ p = next;
+ }
irq_exit();
set_irq_regs(old_regs);
}
#if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE)
+static unsigned int floppy_irq;
-/*
- * Fast IRQs on the Sparc can only have one routine attached to them,
- * thus no sharing possible.
- */
-static int request_fast_irq(unsigned int irq,
- void (*handler)(void),
- unsigned long irqflags, const char *devname)
+int sparc_floppy_request_irq(unsigned int irq, irq_handler_t irq_handler)
{
- struct irqaction *action;
- unsigned long flags;
unsigned int cpu_irq;
- int ret;
+ int err;
+
#if defined CONFIG_SMP && !defined CONFIG_SPARC_LEON
struct tt_entry *trap_table;
#endif
- cpu_irq = irq & (NR_IRQS - 1);
- if (cpu_irq > 14) {
- ret = -EINVAL;
- goto out;
- }
- if (!handler) {
- ret = -EINVAL;
- goto out;
- }
- spin_lock_irqsave(&irq_action_lock, flags);
+ err = request_irq(irq, irq_handler, 0, "floppy", NULL);
+ if (err)
+ return -1;
- action = sparc_irq[cpu_irq].action;
- if (action) {
- if (action->flags & IRQF_SHARED)
- panic("Trying to register fast irq when already shared.\n");
- if (irqflags & IRQF_SHARED)
- panic("Trying to register fast irq as shared.\n");
+ /* Save for later use in floppy interrupt handler */
+ floppy_irq = irq;
- /* Anyway, someone already owns it so cannot be made fast. */
- printk(KERN_ERR "request_fast_irq: Trying to register yet already owned.\n");
- ret = -EBUSY;
- goto out_unlock;
- }
-
- /*
- * If this is flagged as statically allocated then we use our
- * private struct which is never freed.
- */
- if (irqflags & SA_STATIC_ALLOC) {
- if (static_irq_count < MAX_STATIC_ALLOC)
- action = &static_irqaction[static_irq_count++];
- else
- printk(KERN_ERR "Fast IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
- irq, devname);
- }
-
- if (action == NULL)
- action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
- if (!action) {
- ret = -ENOMEM;
- goto out_unlock;
- }
+ cpu_irq = (irq & (NR_IRQS - 1));
/* Dork with trap table if we get this far. */
#define INSTANTIATE(table) \
table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_one = SPARC_RD_PSR_L0; \
table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two = \
- SPARC_BRANCH((unsigned long) handler, \
+ SPARC_BRANCH((unsigned long) floppy_hardint, \
(unsigned long) &table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two);\
table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_three = SPARC_RD_WIM_L3; \
table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_four = SPARC_NOP;
@@ -399,22 +289,9 @@ static int request_fast_irq(unsigned int irq,
* writing we have no CPU-neutral interface to fine-grained flushes.
*/
flush_cache_all();
-
- action->flags = irqflags;
- action->name = devname;
- action->dev_id = NULL;
- action->next = NULL;
-
- sparc_irq[cpu_irq].action = action;
-
- __enable_irq(irq);
-
- ret = 0;
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
-out:
- return ret;
+ return 0;
}
+EXPORT_SYMBOL(sparc_floppy_request_irq);
/*
* These variables are used to access state from the assembler
@@ -440,154 +317,23 @@ EXPORT_SYMBOL(pdma_base);
unsigned long pdma_areasize;
EXPORT_SYMBOL(pdma_areasize);
-static irq_handler_t floppy_irq_handler;
-
+/* Use the generic irq support to call floppy_interrupt
+ * which was setup using request_irq() in sparc_floppy_request_irq().
+ * We only have one floppy interrupt so we do not need to check
+ * for additional handlers being wired up by irq_link()
+ */
void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs)
{
struct pt_regs *old_regs;
- int cpu = smp_processor_id();
old_regs = set_irq_regs(regs);
- disable_pil_irq(irq);
irq_enter();
- kstat_cpu(cpu).irqs[irq]++;
- floppy_irq_handler(irq, dev_id);
+ generic_handle_irq(floppy_irq);
irq_exit();
- enable_pil_irq(irq);
set_irq_regs(old_regs);
- /*
- * XXX Eek, it's totally changed with preempt_count() and such
- * if (softirq_pending(cpu))
- * do_softirq();
- */
-}
-
-int sparc_floppy_request_irq(int irq, unsigned long flags,
- irq_handler_t irq_handler)
-{
- floppy_irq_handler = irq_handler;
- return request_fast_irq(irq, floppy_hardint, flags, "floppy");
}
-EXPORT_SYMBOL(sparc_floppy_request_irq);
-
#endif
-int request_irq(unsigned int irq,
- irq_handler_t handler,
- unsigned long irqflags, const char *devname, void *dev_id)
-{
- struct irqaction *action, **actionp;
- unsigned long flags;
- unsigned int cpu_irq;
- int ret;
-
- if (sparc_cpu_model == sun4d)
- return sun4d_request_irq(irq, handler, irqflags, devname, dev_id);
-
- cpu_irq = irq & (NR_IRQS - 1);
- if (cpu_irq > 14) {
- ret = -EINVAL;
- goto out;
- }
- if (!handler) {
- ret = -EINVAL;
- goto out;
- }
-
- spin_lock_irqsave(&irq_action_lock, flags);
-
- actionp = &sparc_irq[cpu_irq].action;
- action = *actionp;
- if (action) {
- if (!(action->flags & IRQF_SHARED) || !(irqflags & IRQF_SHARED)) {
- ret = -EBUSY;
- goto out_unlock;
- }
- if ((action->flags & IRQF_DISABLED) != (irqflags & IRQF_DISABLED)) {
- printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n",
- irq);
- ret = -EBUSY;
- goto out_unlock;
- }
- for ( ; action; action = *actionp)
- actionp = &action->next;
- }
-
- /* If this is flagged as statically allocated then we use our
- * private struct which is never freed.
- */
- if (irqflags & SA_STATIC_ALLOC) {
- if (static_irq_count < MAX_STATIC_ALLOC)
- action = &static_irqaction[static_irq_count++];
- else
- printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
- irq, devname);
- }
- if (action == NULL)
- action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
- if (!action) {
- ret = -ENOMEM;
- goto out_unlock;
- }
-
- action->handler = handler;
- action->flags = irqflags;
- action->name = devname;
- action->next = NULL;
- action->dev_id = dev_id;
-
- *actionp = action;
-
- __enable_irq(irq);
-
- ret = 0;
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
-out:
- return ret;
-}
-EXPORT_SYMBOL(request_irq);
-
-void disable_irq_nosync(unsigned int irq)
-{
- __disable_irq(irq);
-}
-EXPORT_SYMBOL(disable_irq_nosync);
-
-void disable_irq(unsigned int irq)
-{
- __disable_irq(irq);
-}
-EXPORT_SYMBOL(disable_irq);
-
-void enable_irq(unsigned int irq)
-{
- __enable_irq(irq);
-}
-EXPORT_SYMBOL(enable_irq);
-
-/*
- * We really don't need these at all on the Sparc. We only have
- * stubs here because they are exported to modules.
- */
-unsigned long probe_irq_on(void)
-{
- return 0;
-}
-EXPORT_SYMBOL(probe_irq_on);
-
-int probe_irq_off(unsigned long mask)
-{
- return 0;
-}
-EXPORT_SYMBOL(probe_irq_off);
-
-static unsigned int build_device_irq(struct platform_device *op,
- unsigned int real_irq)
-{
- return real_irq;
-}
-
/* djhr
* This could probably be made indirect too and assigned in the CPU
* bits of the code. That would be much nicer I think and would also
@@ -598,8 +344,6 @@ static unsigned int build_device_irq(struct platform_device *op,
void __init init_IRQ(void)
{
- sparc_irq_config.build_device_irq = build_device_irq;
-
switch (sparc_cpu_model) {
case sun4c:
case sun4:
@@ -607,14 +351,11 @@ void __init init_IRQ(void)
break;
case sun4m:
-#ifdef CONFIG_PCI
pcic_probe();
- if (pcic_present()) {
+ if (pcic_present())
sun4m_pci_init_IRQ();
- break;
- }
-#endif
- sun4m_init_IRQ();
+ else
+ sun4m_init_IRQ();
break;
case sun4d:
@@ -632,9 +373,3 @@ void __init init_IRQ(void)
btfixup();
}
-#ifdef CONFIG_PROC_FS
-void init_irq_proc(void)
-{
- /* For now, nothing... */
-}
-#endif /* CONFIG_PROC_FS */
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index b1d275ce343..4e78862d12f 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -224,13 +224,13 @@ static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity)
int cpuid;
cpumask_copy(&mask, affinity);
- if (cpus_equal(mask, cpu_online_map)) {
+ if (cpumask_equal(&mask, cpu_online_mask)) {
cpuid = map_to_cpu(irq);
} else {
cpumask_t tmp;
- cpus_and(tmp, cpu_online_map, mask);
- cpuid = cpus_empty(tmp) ? map_to_cpu(irq) : first_cpu(tmp);
+ cpumask_and(&tmp, cpu_online_mask, &mask);
+ cpuid = cpumask_empty(&tmp) ? map_to_cpu(irq) : cpumask_first(&tmp);
}
return cpuid;
diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
index 24ad449886b..6f6544cfa0e 100644
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h
@@ -6,11 +6,9 @@
#include <asm/traps.h>
/* cpu.c */
-extern const char *sparc_cpu_type;
extern const char *sparc_pmu_type;
-extern const char *sparc_fpu_type;
-
extern unsigned int fsr_storage;
+extern int ncpus_probed;
#ifdef CONFIG_SPARC32
/* cpu.c */
@@ -37,6 +35,7 @@ extern void sun4c_init_IRQ(void);
extern unsigned int lvl14_resolution;
extern void sun4m_init_IRQ(void);
+extern void sun4m_unmask_profile_irq(void);
extern void sun4m_clear_profile_irq(int cpu);
/* sun4d_irq.c */
diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
index 2969f777fa1..2f538ac2e13 100644
--- a/arch/sparc/kernel/leon_kernel.c
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -19,53 +19,70 @@
#include <asm/leon_amba.h>
#include <asm/traps.h>
#include <asm/cacheflush.h>
+#include <asm/smp.h>
+#include <asm/setup.h>
#include "prom.h"
#include "irq.h"
struct leon3_irqctrl_regs_map *leon3_irqctrl_regs; /* interrupt controller base address */
struct leon3_gptimer_regs_map *leon3_gptimer_regs; /* timer controller base address */
-struct amba_apb_device leon_percpu_timer_dev[16];
int leondebug_irq_disable;
int leon_debug_irqout;
static int dummy_master_l10_counter;
unsigned long amba_system_id;
+static DEFINE_SPINLOCK(leon_irq_lock);
unsigned long leon3_gptimer_irq; /* interrupt controller irq number */
unsigned long leon3_gptimer_idx; /* Timer Index (0..6) within Timer Core */
+int leon3_ticker_irq; /* Timer ticker IRQ */
unsigned int sparc_leon_eirq;
-#define LEON_IMASK ((&leon3_irqctrl_regs->mask[0]))
+#define LEON_IMASK(cpu) (&leon3_irqctrl_regs->mask[cpu])
+#define LEON_IACK (&leon3_irqctrl_regs->iclear)
+#define LEON_DO_ACK_HW 1
-/* Return the IRQ of the pending IRQ on the extended IRQ controller */
-int sparc_leon_eirq_get(int eirq, int cpu)
+/* Return the last ACKed IRQ by the Extended IRQ controller. It has already
+ * been (automatically) ACKed when the CPU takes the trap.
+ */
+static inline unsigned int leon_eirq_get(int cpu)
{
return LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->intid[cpu]) & 0x1f;
}
-irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id)
+/* Handle one or multiple IRQs from the extended interrupt controller */
+static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc)
{
- printk(KERN_ERR "sparc_leon_eirq_isr: ERROR EXTENDED IRQ\n");
- return IRQ_HANDLED;
+ unsigned int eirq;
+ int cpu = sparc_leon3_cpuid();
+
+ eirq = leon_eirq_get(cpu);
+ if ((eirq & 0x10) && irq_map[eirq]->irq) /* bit4 tells if IRQ happened */
+ generic_handle_irq(irq_map[eirq]->irq);
}
/* The extended IRQ controller has been found, this function registers it */
-void sparc_leon_eirq_register(int eirq)
+void leon_eirq_setup(unsigned int eirq)
{
- int irq;
+ unsigned long mask, oldmask;
+ unsigned int veirq;
- /* Register a "BAD" handler for this interrupt, it should never happen */
- irq = request_irq(eirq, sparc_leon_eirq_isr,
- (IRQF_DISABLED | SA_STATIC_ALLOC), "extirq", NULL);
-
- if (irq) {
- printk(KERN_ERR
- "sparc_leon_eirq_register: unable to attach IRQ%d\n",
- eirq);
- } else {
- sparc_leon_eirq = eirq;
+ if (eirq < 1 || eirq > 0xf) {
+ printk(KERN_ERR "LEON EXT IRQ NUMBER BAD: %d\n", eirq);
+ return;
}
+ veirq = leon_build_device_irq(eirq, leon_handle_ext_irq, "extirq", 0);
+
+ /*
+ * Unmask the Extended IRQ, the IRQs routed through the Ext-IRQ
+ * controller have a mask-bit of their own, so this is safe.
+ */
+ irq_link(veirq);
+ mask = 1 << eirq;
+ oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(boot_cpu_id));
+ LEON3_BYPASS_STORE_PA(LEON_IMASK(boot_cpu_id), (oldmask | mask));
+ sparc_leon_eirq = eirq;
}
static inline unsigned long get_irqmask(unsigned int irq)
@@ -83,35 +100,151 @@ static inline unsigned long get_irqmask(unsigned int irq)
return mask;
}
-static void leon_enable_irq(unsigned int irq_nr)
+#ifdef CONFIG_SMP
+static int irq_choose_cpu(const struct cpumask *affinity)
{
- unsigned long mask, flags;
- mask = get_irqmask(irq_nr);
- local_irq_save(flags);
- LEON3_BYPASS_STORE_PA(LEON_IMASK,
- (LEON3_BYPASS_LOAD_PA(LEON_IMASK) | (mask)));
- local_irq_restore(flags);
+ cpumask_t mask;
+
+ cpus_and(mask, cpu_online_map, *affinity);
+ if (cpus_equal(mask, cpu_online_map) || cpus_empty(mask))
+ return boot_cpu_id;
+ else
+ return first_cpu(mask);
}
+#else
+#define irq_choose_cpu(affinity) boot_cpu_id
+#endif
-static void leon_disable_irq(unsigned int irq_nr)
+static int leon_set_affinity(struct irq_data *data, const struct cpumask *dest,
+ bool force)
{
- unsigned long mask, flags;
- mask = get_irqmask(irq_nr);
- local_irq_save(flags);
- LEON3_BYPASS_STORE_PA(LEON_IMASK,
- (LEON3_BYPASS_LOAD_PA(LEON_IMASK) & ~(mask)));
- local_irq_restore(flags);
+ unsigned long mask, oldmask, flags;
+ int oldcpu, newcpu;
+
+ mask = (unsigned long)data->chip_data;
+ oldcpu = irq_choose_cpu(data->affinity);
+ newcpu = irq_choose_cpu(dest);
+
+ if (oldcpu == newcpu)
+ goto out;
+
+ /* unmask on old CPU first before enabling on the selected CPU */
+ spin_lock_irqsave(&leon_irq_lock, flags);
+ oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(oldcpu));
+ LEON3_BYPASS_STORE_PA(LEON_IMASK(oldcpu), (oldmask & ~mask));
+ oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(newcpu));
+ LEON3_BYPASS_STORE_PA(LEON_IMASK(newcpu), (oldmask | mask));
+ spin_unlock_irqrestore(&leon_irq_lock, flags);
+out:
+ return IRQ_SET_MASK_OK;
+}
+
+static void leon_unmask_irq(struct irq_data *data)
+{
+ unsigned long mask, oldmask, flags;
+ int cpu;
+
+ mask = (unsigned long)data->chip_data;
+ cpu = irq_choose_cpu(data->affinity);
+ spin_lock_irqsave(&leon_irq_lock, flags);
+ oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu));
+ LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask | mask));
+ spin_unlock_irqrestore(&leon_irq_lock, flags);
+}
+
+static void leon_mask_irq(struct irq_data *data)
+{
+ unsigned long mask, oldmask, flags;
+ int cpu;
+
+ mask = (unsigned long)data->chip_data;
+ cpu = irq_choose_cpu(data->affinity);
+ spin_lock_irqsave(&leon_irq_lock, flags);
+ oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu));
+ LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask & ~mask));
+ spin_unlock_irqrestore(&leon_irq_lock, flags);
+}
+
+static unsigned int leon_startup_irq(struct irq_data *data)
+{
+ irq_link(data->irq);
+ leon_unmask_irq(data);
+ return 0;
+}
+static void leon_shutdown_irq(struct irq_data *data)
+{
+ leon_mask_irq(data);
+ irq_unlink(data->irq);
+}
+
+/* Used by external level sensitive IRQ handlers on the LEON: ACK IRQ ctrl */
+static void leon_eoi_irq(struct irq_data *data)
+{
+ unsigned long mask = (unsigned long)data->chip_data;
+
+ if (mask & LEON_DO_ACK_HW)
+ LEON3_BYPASS_STORE_PA(LEON_IACK, mask & ~LEON_DO_ACK_HW);
+}
+
+static struct irq_chip leon_irq = {
+ .name = "leon",
+ .irq_startup = leon_startup_irq,
+ .irq_shutdown = leon_shutdown_irq,
+ .irq_mask = leon_mask_irq,
+ .irq_unmask = leon_unmask_irq,
+ .irq_eoi = leon_eoi_irq,
+ .irq_set_affinity = leon_set_affinity,
+};
+
+/*
+ * Build a LEON IRQ for the edge triggered LEON IRQ controller:
+ * Edge (normal) IRQ - handle_simple_irq, ack=DONT-CARE, never ack
+ * Level IRQ (PCI|Level-GPIO) - handle_fasteoi_irq, ack=1, ack after ISR
+ * Per-CPU Edge - handle_percpu_irq, ack=0
+ */
+unsigned int leon_build_device_irq(unsigned int real_irq,
+ irq_flow_handler_t flow_handler,
+ const char *name, int do_ack)
+{
+ unsigned int irq;
+ unsigned long mask;
+
+ irq = 0;
+ mask = get_irqmask(real_irq);
+ if (mask == 0)
+ goto out;
+
+ irq = irq_alloc(real_irq, real_irq);
+ if (irq == 0)
+ goto out;
+
+ if (do_ack)
+ mask |= LEON_DO_ACK_HW;
+
+ irq_set_chip_and_handler_name(irq, &leon_irq,
+ flow_handler, name);
+ irq_set_chip_data(irq, (void *)mask);
+
+out:
+ return irq;
+}
+
+static unsigned int _leon_build_device_irq(struct platform_device *op,
+ unsigned int real_irq)
+{
+ return leon_build_device_irq(real_irq, handle_simple_irq, "edge", 0);
}
void __init leon_init_timers(irq_handler_t counter_fn)
{
- int irq;
+ int irq, eirq;
struct device_node *rootnp, *np, *nnp;
struct property *pp;
int len;
- int cpu, icsel;
+ int icsel;
int ampopts;
+ int err;
leondebug_irq_disable = 0;
leon_debug_irqout = 0;
@@ -173,98 +306,85 @@ void __init leon_init_timers(irq_handler_t counter_fn)
leon3_gptimer_irq = *(unsigned int *)pp->value;
} while (0);
- if (leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq) {
- LEON3_BYPASS_STORE_PA(
- &leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0);
- LEON3_BYPASS_STORE_PA(
- &leon3_gptimer_regs->e[leon3_gptimer_idx].rld,
- (((1000000 / HZ) - 1)));
- LEON3_BYPASS_STORE_PA(
+ if (!(leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq))
+ goto bad;
+
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0);
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld,
+ (((1000000 / HZ) - 1)));
+ LEON3_BYPASS_STORE_PA(
&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, 0);
#ifdef CONFIG_SMP
- leon_percpu_timer_dev[0].start = (int)leon3_gptimer_regs;
- leon_percpu_timer_dev[0].irq = leon3_gptimer_irq + 1 +
- leon3_gptimer_idx;
-
- if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) &
- (1<<LEON3_GPTIMER_SEPIRQ))) {
- prom_printf("irq timer not configured with separate irqs\n");
- BUG();
- }
+ leon3_ticker_irq = leon3_gptimer_irq + 1 + leon3_gptimer_idx;
- LEON3_BYPASS_STORE_PA(
- &leon3_gptimer_regs->e[leon3_gptimer_idx+1].val, 0);
- LEON3_BYPASS_STORE_PA(
- &leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld,
- (((1000000/HZ) - 1)));
- LEON3_BYPASS_STORE_PA(
- &leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, 0);
-# endif
-
- /*
- * The IRQ controller may (if implemented) consist of multiple
- * IRQ controllers, each mapped on a 4Kb boundary.
- * Each CPU may be routed to different IRQCTRLs, however
- * we assume that all CPUs (in SMP system) is routed to the
- * same IRQ Controller, and for non-SMP only one IRQCTRL is
- * accessed anyway.
- * In AMP systems, Linux must run on CPU0 for the time being.
- */
- cpu = sparc_leon3_cpuid();
- icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[cpu/8]);
- icsel = (icsel >> ((7 - (cpu&0x7)) * 4)) & 0xf;
- leon3_irqctrl_regs += icsel;
- } else {
- goto bad;
+ if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) &
+ (1<<LEON3_GPTIMER_SEPIRQ))) {
+ printk(KERN_ERR "timer not configured with separate irqs\n");
+ BUG();
}
- irq = request_irq(leon3_gptimer_irq+leon3_gptimer_idx,
- counter_fn,
- (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL);
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].val,
+ 0);
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld,
+ (((1000000/HZ) - 1)));
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
+ 0);
+#endif
- if (irq) {
- printk(KERN_ERR "leon_time_init: unable to attach IRQ%d\n",
- LEON_INTERRUPT_TIMER1);
+ /*
+ * The IRQ controller may (if implemented) consist of multiple
+ * IRQ controllers, each mapped on a 4Kb boundary.
+ * Each CPU may be routed to different IRQCTRLs, however
+ * we assume that all CPUs (in SMP system) is routed to the
+ * same IRQ Controller, and for non-SMP only one IRQCTRL is
+ * accessed anyway.
+ * In AMP systems, Linux must run on CPU0 for the time being.
+ */
+ icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[boot_cpu_id/8]);
+ icsel = (icsel >> ((7 - (boot_cpu_id&0x7)) * 4)) & 0xf;
+ leon3_irqctrl_regs += icsel;
+
+ /* Mask all IRQs on boot-cpu IRQ controller */
+ LEON3_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[boot_cpu_id], 0);
+
+ /* Probe extended IRQ controller */
+ eirq = (LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->mpstatus)
+ >> 16) & 0xf;
+ if (eirq != 0)
+ leon_eirq_setup(eirq);
+
+ irq = _leon_build_device_irq(NULL, leon3_gptimer_irq+leon3_gptimer_idx);
+ err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
+ if (err) {
+ printk(KERN_ERR "unable to attach timer IRQ%d\n", irq);
prom_halt();
}
-# ifdef CONFIG_SMP
- {
- unsigned long flags;
- struct tt_entry *trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_percpu_timer_dev[0].irq - 1)];
-
- /* For SMP we use the level 14 ticker, however the bootup code
- * has copied the firmwares level 14 vector into boot cpu's
- * trap table, we must fix this now or we get squashed.
- */
- local_irq_save(flags);
-
- patchme_maybe_smp_msg[0] = 0x01000000; /* NOP out the branch */
-
- /* Adjust so that we jump directly to smpleon_ticker */
- trap_table->inst_three += smpleon_ticker - real_irq_entry;
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
+ LEON3_GPTIMER_EN |
+ LEON3_GPTIMER_RL |
+ LEON3_GPTIMER_LD |
+ LEON3_GPTIMER_IRQEN);
- local_flush_cache_all();
- local_irq_restore(flags);
+#ifdef CONFIG_SMP
+ /* Install per-cpu IRQ handler for broadcasted ticker */
+ irq = leon_build_device_irq(leon3_ticker_irq, handle_percpu_irq,
+ "per-cpu", 0);
+ err = request_irq(irq, leon_percpu_timer_interrupt,
+ IRQF_PERCPU | IRQF_TIMER, "ticker",
+ NULL);
+ if (err) {
+ printk(KERN_ERR "unable to attach ticker IRQ%d\n", irq);
+ prom_halt();
}
-# endif
-
- if (leon3_gptimer_regs) {
- LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
- LEON3_GPTIMER_EN |
- LEON3_GPTIMER_RL |
- LEON3_GPTIMER_LD | LEON3_GPTIMER_IRQEN);
-#ifdef CONFIG_SMP
- LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
- LEON3_GPTIMER_EN |
- LEON3_GPTIMER_RL |
- LEON3_GPTIMER_LD |
- LEON3_GPTIMER_IRQEN);
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
+ LEON3_GPTIMER_EN |
+ LEON3_GPTIMER_RL |
+ LEON3_GPTIMER_LD |
+ LEON3_GPTIMER_IRQEN);
#endif
-
- }
return;
bad:
printk(KERN_ERR "No Timer/irqctrl found\n");
@@ -281,9 +401,6 @@ void leon_load_profile_irq(int cpu, unsigned int limit)
BUG();
}
-
-
-
void __init leon_trans_init(struct device_node *dp)
{
if (strcmp(dp->type, "cpu") == 0 && strcmp(dp->name, "<NULL>") == 0) {
@@ -337,22 +454,18 @@ void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu)
{
unsigned long mask, flags, *addr;
mask = get_irqmask(irq_nr);
- local_irq_save(flags);
- addr = (unsigned long *)&(leon3_irqctrl_regs->mask[cpu]);
- LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | (mask)));
- local_irq_restore(flags);
+ spin_lock_irqsave(&leon_irq_lock, flags);
+ addr = (unsigned long *)LEON_IMASK(cpu);
+ LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | mask));
+ spin_unlock_irqrestore(&leon_irq_lock, flags);
}
#endif
void __init leon_init_IRQ(void)
{
- sparc_irq_config.init_timers = leon_init_timers;
-
- BTFIXUPSET_CALL(enable_irq, leon_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_irq, leon_disable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(enable_pil_irq, leon_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_pil_irq, leon_disable_irq, BTFIXUPCALL_NORM);
+ sparc_irq_config.init_timers = leon_init_timers;
+ sparc_irq_config.build_device_irq = _leon_build_device_irq;
BTFIXUPSET_CALL(clear_clock_irq, leon_clear_clock_irq,
BTFIXUPCALL_NORM);
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index 8f5de4aa3c0..fe8fb44c609 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -14,6 +14,7 @@
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/of.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
@@ -29,6 +30,7 @@
#include <asm/ptrace.h>
#include <asm/atomic.h>
#include <asm/irq_regs.h>
+#include <asm/traps.h>
#include <asm/delay.h>
#include <asm/irq.h>
@@ -50,9 +52,12 @@
extern ctxd_t *srmmu_ctx_table_phys;
static int smp_processors_ready;
extern volatile unsigned long cpu_callin_map[NR_CPUS];
-extern unsigned char boot_cpu_id;
extern cpumask_t smp_commenced_mask;
void __init leon_configure_cache_smp(void);
+static void leon_ipi_init(void);
+
+/* IRQ number of LEON IPIs */
+int leon_ipi_irq = LEON3_IRQ_IPI_DEFAULT;
static inline unsigned long do_swap(volatile unsigned long *ptr,
unsigned long val)
@@ -94,8 +99,6 @@ void __cpuinit leon_callin(void)
local_flush_cache_all();
local_flush_tlb_all();
- cpu_probe();
-
/* Fix idle thread fields. */
__asm__ __volatile__("ld [%0], %%g6\n\t" : : "r"(&current_set[cpuid])
: "memory" /* paranoid */);
@@ -104,11 +107,11 @@ void __cpuinit leon_callin(void)
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
- while (!cpu_isset(cpuid, smp_commenced_mask))
+ while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
mb();
local_irq_enable();
- cpu_set(cpuid, cpu_online_map);
+ set_cpu_online(cpuid, true);
}
/*
@@ -179,13 +182,16 @@ void __init leon_boot_cpus(void)
int nrcpu = leon_smp_nrcpus();
int me = smp_processor_id();
+ /* Setup IPI */
+ leon_ipi_init();
+
printk(KERN_INFO "%d:(%d:%d) cpus mpirq at 0x%x\n", (unsigned int)me,
(unsigned int)nrcpu, (unsigned int)NR_CPUS,
(unsigned int)&(leon3_irqctrl_regs->mpstatus));
leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, me);
leon_enable_irq_cpu(LEON3_IRQ_TICKER, me);
- leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, me);
+ leon_enable_irq_cpu(leon_ipi_irq, me);
leon_smp_setbroadcast(1 << LEON3_IRQ_TICKER);
@@ -220,6 +226,10 @@ int __cpuinit leon_boot_one_cpu(int i)
(unsigned int)&leon3_irqctrl_regs->mpstatus);
local_flush_cache_all();
+ /* Make sure all IRQs are of from the start for this new CPU */
+ LEON_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[i], 0);
+
+ /* Wake one CPU */
LEON_BYPASS_STORE_PA(&(leon3_irqctrl_regs->mpstatus), 1 << i);
/* wheee... it's going... */
@@ -236,7 +246,7 @@ int __cpuinit leon_boot_one_cpu(int i)
} else {
leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, i);
leon_enable_irq_cpu(LEON3_IRQ_TICKER, i);
- leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, i);
+ leon_enable_irq_cpu(leon_ipi_irq, i);
}
local_flush_cache_all();
@@ -262,21 +272,21 @@ void __init leon_smp_done(void)
local_flush_cache_all();
/* Free unneeded trap tables */
- if (!cpu_isset(1, cpu_present_map)) {
+ if (!cpu_present(1)) {
ClearPageReserved(virt_to_page(&trapbase_cpu1));
init_page_count(virt_to_page(&trapbase_cpu1));
free_page((unsigned long)&trapbase_cpu1);
totalram_pages++;
num_physpages++;
}
- if (!cpu_isset(2, cpu_present_map)) {
+ if (!cpu_present(2)) {
ClearPageReserved(virt_to_page(&trapbase_cpu2));
init_page_count(virt_to_page(&trapbase_cpu2));
free_page((unsigned long)&trapbase_cpu2);
totalram_pages++;
num_physpages++;
}
- if (!cpu_isset(3, cpu_present_map)) {
+ if (!cpu_present(3)) {
ClearPageReserved(virt_to_page(&trapbase_cpu3));
init_page_count(virt_to_page(&trapbase_cpu3));
free_page((unsigned long)&trapbase_cpu3);
@@ -292,6 +302,99 @@ void leon_irq_rotate(int cpu)
{
}
+struct leon_ipi_work {
+ int single;
+ int msk;
+ int resched;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct leon_ipi_work, leon_ipi_work);
+
+/* Initialize IPIs on the LEON, in order to save IRQ resources only one IRQ
+ * is used for all three types of IPIs.
+ */
+static void __init leon_ipi_init(void)
+{
+ int cpu, len;
+ struct leon_ipi_work *work;
+ struct property *pp;
+ struct device_node *rootnp;
+ struct tt_entry *trap_table;
+ unsigned long flags;
+
+ /* Find IPI IRQ or stick with default value */
+ rootnp = of_find_node_by_path("/ambapp0");
+ if (rootnp) {
+ pp = of_find_property(rootnp, "ipi_num", &len);
+ if (pp && (*(int *)pp->value))
+ leon_ipi_irq = *(int *)pp->value;
+ }
+ printk(KERN_INFO "leon: SMP IPIs at IRQ %d\n", leon_ipi_irq);
+
+ /* Adjust so that we jump directly to smpleon_ipi */
+ local_irq_save(flags);
+ trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_ipi_irq - 1)];
+ trap_table->inst_three += smpleon_ipi - real_irq_entry;
+ local_flush_cache_all();
+ local_irq_restore(flags);
+
+ for_each_possible_cpu(cpu) {
+ work = &per_cpu(leon_ipi_work, cpu);
+ work->single = work->msk = work->resched = 0;
+ }
+}
+
+static void leon_ipi_single(int cpu)
+{
+ struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+ /* Mark work */
+ work->single = 1;
+
+ /* Generate IRQ on the CPU */
+ set_cpu_int(cpu, leon_ipi_irq);
+}
+
+static void leon_ipi_mask_one(int cpu)
+{
+ struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+ /* Mark work */
+ work->msk = 1;
+
+ /* Generate IRQ on the CPU */
+ set_cpu_int(cpu, leon_ipi_irq);
+}
+
+static void leon_ipi_resched(int cpu)
+{
+ struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+ /* Mark work */
+ work->resched = 1;
+
+ /* Generate IRQ on the CPU (any IRQ will cause resched) */
+ set_cpu_int(cpu, leon_ipi_irq);
+}
+
+void leonsmp_ipi_interrupt(void)
+{
+ struct leon_ipi_work *work = &__get_cpu_var(leon_ipi_work);
+
+ if (work->single) {
+ work->single = 0;
+ smp_call_function_single_interrupt();
+ }
+ if (work->msk) {
+ work->msk = 0;
+ smp_call_function_interrupt();
+ }
+ if (work->resched) {
+ work->resched = 0;
+ smp_resched_interrupt();
+ }
+}
+
static struct smp_funcall {
smpfunc_t func;
unsigned long arg1;
@@ -337,10 +440,10 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
{
register int i;
- cpu_clear(smp_processor_id(), mask);
- cpus_and(mask, cpu_online_map, mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+ cpumask_and(&mask, cpu_online_mask, &mask);
for (i = 0; i <= high; i++) {
- if (cpu_isset(i, mask)) {
+ if (cpumask_test_cpu(i, &mask)) {
ccall_info.processors_in[i] = 0;
ccall_info.processors_out[i] = 0;
set_cpu_int(i, LEON3_IRQ_CROSS_CALL);
@@ -354,7 +457,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
i = 0;
do {
- if (!cpu_isset(i, mask))
+ if (!cpumask_test_cpu(i, &mask))
continue;
while (!ccall_info.processors_in[i])
@@ -363,7 +466,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
i = 0;
do {
- if (!cpu_isset(i, mask))
+ if (!cpumask_test_cpu(i, &mask))
continue;
while (!ccall_info.processors_out[i])
@@ -386,27 +489,23 @@ void leon_cross_call_irq(void)
ccall_info.processors_out[i] = 1;
}
-void leon_percpu_timer_interrupt(struct pt_regs *regs)
+irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused)
{
- struct pt_regs *old_regs;
int cpu = smp_processor_id();
- old_regs = set_irq_regs(regs);
-
leon_clear_profile_irq(cpu);
profile_tick(CPU_PROFILING);
if (!--prof_counter(cpu)) {
- int user = user_mode(regs);
+ int user = user_mode(get_irq_regs());
- irq_enter();
update_process_times(user);
- irq_exit();
prof_counter(cpu) = prof_multiplier(cpu);
}
- set_irq_regs(old_regs);
+
+ return IRQ_HANDLED;
}
static void __init smp_setup_percpu_timer(void)
@@ -449,6 +548,9 @@ void __init leon_init_smp(void)
BTFIXUPSET_CALL(smp_cross_call, leon_cross_call, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(__hard_smp_processor_id, __leon_processor_id,
BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_resched, leon_ipi_resched, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_single, leon_ipi_single, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_mask_one, leon_ipi_mask_one, BTFIXUPCALL_NORM);
}
#endif /* CONFIG_SPARC_LEON */
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 56db06432ce..42f28c7420e 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -768,7 +768,7 @@ static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handl
cpuid, NR_CPUS);
continue;
}
- if (!cpu_isset(cpuid, *mask))
+ if (!cpumask_test_cpu(cpuid, mask))
continue;
#endif
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index 5c149689bb2..3bb2eace58c 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -622,8 +622,9 @@ static unsigned int __init build_one_device_irq(struct platform_device *op,
out:
nid = of_node_to_nid(dp);
if (nid != -1) {
- cpumask_t numa_mask = *cpumask_of_node(nid);
+ cpumask_t numa_mask;
+ cpumask_copy(&numa_mask, cpumask_of_node(nid));
irq_set_affinity(irq, &numa_mask);
}
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index 30982e9ab62..580651af73f 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -284,8 +284,9 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
nid = pbm->numa_node;
if (nid != -1) {
- cpumask_t numa_mask = *cpumask_of_node(nid);
+ cpumask_t numa_mask;
+ cpumask_copy(&numa_mask, cpumask_of_node(nid));
irq_set_affinity(irq, &numa_mask);
}
err = request_irq(irq, sparc64_msiq_interrupt, 0,
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index 2cdc131b50a..948601a066f 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -164,6 +164,9 @@ void __iomem *pcic_regs;
volatile int pcic_speculative;
volatile int pcic_trapped;
+/* forward */
+unsigned int pcic_build_device_irq(struct platform_device *op,
+ unsigned int real_irq);
#define CONFIG_CMD(bus, device_fn, where) (0x80000000 | (((unsigned int)bus) << 16) | (((unsigned int)device_fn) << 8) | (where & ~3))
@@ -523,6 +526,7 @@ static void
pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
{
struct pcic_ca2irq *p;
+ unsigned int real_irq;
int i, ivec;
char namebuf[64];
@@ -551,26 +555,25 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
i = p->pin;
if (i >= 0 && i < 4) {
ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_LO);
- dev->irq = ivec >> (i << 2) & 0xF;
+ real_irq = ivec >> (i << 2) & 0xF;
} else if (i >= 4 && i < 8) {
ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_HI);
- dev->irq = ivec >> ((i-4) << 2) & 0xF;
+ real_irq = ivec >> ((i-4) << 2) & 0xF;
} else { /* Corrupted map */
printk("PCIC: BAD PIN %d\n", i); for (;;) {}
}
/* P3 */ /* printk("PCIC: device %s pin %d ivec 0x%x irq %x\n", namebuf, i, ivec, dev->irq); */
- /*
- * dev->irq=0 means PROM did not bother to program the upper
+ /* real_irq means PROM did not bother to program the upper
* half of PCIC. This happens on JS-E with PROM 3.11, for instance.
*/
- if (dev->irq == 0 || p->force) {
+ if (real_irq == 0 || p->force) {
if (p->irq == 0 || p->irq >= 15) { /* Corrupted map */
printk("PCIC: BAD IRQ %d\n", p->irq); for (;;) {}
}
printk("PCIC: setting irq %d at pin %d for device %02x:%02x\n",
p->irq, p->pin, dev->bus->number, dev->devfn);
- dev->irq = p->irq;
+ real_irq = p->irq;
i = p->pin;
if (i >= 4) {
@@ -584,7 +587,8 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
ivec |= p->irq << (i << 2);
writew(ivec, pcic->pcic_regs+PCI_INT_SELECT_LO);
}
- }
+ }
+ dev->irq = pcic_build_device_irq(NULL, real_irq);
}
/*
@@ -729,6 +733,7 @@ void __init pci_time_init(void)
struct linux_pcic *pcic = &pcic0;
unsigned long v;
int timer_irq, irq;
+ int err;
do_arch_gettimeoffset = pci_gettimeoffset;
@@ -740,9 +745,10 @@ void __init pci_time_init(void)
timer_irq = PCI_COUNTER_IRQ_SYS(v);
writel (PCI_COUNTER_IRQ_SET(timer_irq, 0),
pcic->pcic_regs+PCI_COUNTER_IRQ);
- irq = request_irq(timer_irq, pcic_timer_handler,
- (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL);
- if (irq) {
+ irq = pcic_build_device_irq(NULL, timer_irq);
+ err = request_irq(irq, pcic_timer_handler,
+ IRQF_TIMER, "timer", NULL);
+ if (err) {
prom_printf("time_init: unable to attach IRQ%d\n", timer_irq);
prom_halt();
}
@@ -803,50 +809,73 @@ static inline unsigned long get_irqmask(int irq_nr)
return 1 << irq_nr;
}
-static void pcic_disable_irq(unsigned int irq_nr)
+static void pcic_mask_irq(struct irq_data *data)
{
unsigned long mask, flags;
- mask = get_irqmask(irq_nr);
+ mask = (unsigned long)data->chip_data;
local_irq_save(flags);
writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET);
local_irq_restore(flags);
}
-static void pcic_enable_irq(unsigned int irq_nr)
+static void pcic_unmask_irq(struct irq_data *data)
{
unsigned long mask, flags;
- mask = get_irqmask(irq_nr);
+ mask = (unsigned long)data->chip_data;
local_irq_save(flags);
writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR);
local_irq_restore(flags);
}
-static void pcic_load_profile_irq(int cpu, unsigned int limit)
+static unsigned int pcic_startup_irq(struct irq_data *data)
{
- printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__);
+ irq_link(data->irq);
+ pcic_unmask_irq(data);
+ return 0;
}
-/* We assume the caller has disabled local interrupts when these are called,
- * or else very bizarre behavior will result.
- */
-static void pcic_disable_pil_irq(unsigned int pil)
+static struct irq_chip pcic_irq = {
+ .name = "pcic",
+ .irq_startup = pcic_startup_irq,
+ .irq_mask = pcic_mask_irq,
+ .irq_unmask = pcic_unmask_irq,
+};
+
+unsigned int pcic_build_device_irq(struct platform_device *op,
+ unsigned int real_irq)
{
- writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET);
+ unsigned int irq;
+ unsigned long mask;
+
+ irq = 0;
+ mask = get_irqmask(real_irq);
+ if (mask == 0)
+ goto out;
+
+ irq = irq_alloc(real_irq, real_irq);
+ if (irq == 0)
+ goto out;
+
+ irq_set_chip_and_handler_name(irq, &pcic_irq,
+ handle_level_irq, "PCIC");
+ irq_set_chip_data(irq, (void *)mask);
+
+out:
+ return irq;
}
-static void pcic_enable_pil_irq(unsigned int pil)
+
+static void pcic_load_profile_irq(int cpu, unsigned int limit)
{
- writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR);
+ printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__);
}
void __init sun4m_pci_init_IRQ(void)
{
- BTFIXUPSET_CALL(enable_irq, pcic_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_irq, pcic_disable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(enable_pil_irq, pcic_enable_pil_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_pil_irq, pcic_disable_pil_irq, BTFIXUPCALL_NORM);
+ sparc_irq_config.build_device_irq = pcic_build_device_irq;
+
BTFIXUPSET_CALL(clear_clock_irq, pcic_clear_clock_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(load_profile_irq, pcic_load_profile_irq, BTFIXUPCALL_NORM);
}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index ee8426ede7c..2cb0e1c001e 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -26,6 +26,7 @@
#include <asm/nmi.h>
#include <asm/pcr.h>
+#include "kernel.h"
#include "kstack.h"
/* Sparc64 chips have two performance counters, 32-bits each, with
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 17529298c50..c8cc461ff75 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -128,8 +128,16 @@ void cpu_idle(void)
set_thread_flag(TIF_POLLING_NRFLAG);
/* endless idle loop with no priority at all */
while(1) {
- while (!need_resched())
- cpu_relax();
+#ifdef CONFIG_SPARC_LEON
+ if (pm_idle) {
+ while (!need_resched())
+ (*pm_idle)();
+ } else
+#endif
+ {
+ while (!need_resched())
+ cpu_relax();
+ }
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index 05fb2533058..5ce3d15a99b 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -326,7 +326,6 @@ void __init of_console_init(void)
of_console_options = NULL;
}
- prom_printf(msg, of_console_path);
printk(msg, of_console_path);
}
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index 7b8b76c9557..3609bdee9ed 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -103,16 +103,20 @@ static unsigned int boot_flags __initdata = 0;
/* Exported for mm/init.c:paging_init. */
unsigned long cmdline_memory_size __initdata = 0;
+/* which CPU booted us (0xff = not set) */
+unsigned char boot_cpu_id = 0xff; /* 0xff will make it into DATA section... */
+unsigned char boot_cpu_id4; /* boot_cpu_id << 2 */
+
static void
prom_console_write(struct console *con, const char *s, unsigned n)
{
prom_write(s, n);
}
-static struct console prom_debug_console = {
- .name = "debug",
+static struct console prom_early_console = {
+ .name = "earlyprom",
.write = prom_console_write,
- .flags = CON_PRINTBUFFER,
+ .flags = CON_PRINTBUFFER | CON_BOOT,
.index = -1,
};
@@ -133,8 +137,7 @@ static void __init process_switch(char c)
prom_halt();
break;
case 'p':
- /* Use PROM debug console. */
- register_console(&prom_debug_console);
+ /* Just ignore, this behavior is now the default. */
break;
default:
printk("Unknown boot switch (-%c)\n", c);
@@ -215,6 +218,10 @@ void __init setup_arch(char **cmdline_p)
strcpy(boot_command_line, *cmdline_p);
parse_early_param();
+ boot_flags_init(*cmdline_p);
+
+ register_console(&prom_early_console);
+
/* Set sparc_cpu_model */
sparc_cpu_model = sun_unknown;
if (!strcmp(&cputypval[0], "sun4 "))
@@ -265,7 +272,6 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_DUMMY_CONSOLE
conswitchp = &dummy_con;
#endif
- boot_flags_init(*cmdline_p);
idprom_init();
if (ARCH_SUN4C)
@@ -311,75 +317,6 @@ void __init setup_arch(char **cmdline_p)
smp_setup_cpu_possible_map();
}
-static int ncpus_probed;
-
-static int show_cpuinfo(struct seq_file *m, void *__unused)
-{
- seq_printf(m,
- "cpu\t\t: %s\n"
- "fpu\t\t: %s\n"
- "promlib\t\t: Version %d Revision %d\n"
- "prom\t\t: %d.%d\n"
- "type\t\t: %s\n"
- "ncpus probed\t: %d\n"
- "ncpus active\t: %d\n"
-#ifndef CONFIG_SMP
- "CPU0Bogo\t: %lu.%02lu\n"
- "CPU0ClkTck\t: %ld\n"
-#endif
- ,
- sparc_cpu_type,
- sparc_fpu_type ,
- romvec->pv_romvers,
- prom_rev,
- romvec->pv_printrev >> 16,
- romvec->pv_printrev & 0xffff,
- &cputypval[0],
- ncpus_probed,
- num_online_cpus()
-#ifndef CONFIG_SMP
- , cpu_data(0).udelay_val/(500000/HZ),
- (cpu_data(0).udelay_val/(5000/HZ)) % 100,
- cpu_data(0).clock_tick
-#endif
- );
-
-#ifdef CONFIG_SMP
- smp_bogo(m);
-#endif
- mmu_info(m);
-#ifdef CONFIG_SMP
- smp_info(m);
-#endif
- return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
- /* The pointer we are returning is arbitrary,
- * it just has to be non-NULL and not IS_ERR
- * in the success case.
- */
- return *pos == 0 ? &c_start : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
- ++*pos;
- return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
- .start =c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo,
-};
-
extern int stop_a_enabled;
void sun_do_break(void)
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 29bafe051bb..f3b6850cc8d 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -339,84 +339,6 @@ void __init setup_arch(char **cmdline_p)
paging_init();
}
-/* BUFFER is PAGE_SIZE bytes long. */
-
-extern void smp_info(struct seq_file *);
-extern void smp_bogo(struct seq_file *);
-extern void mmu_info(struct seq_file *);
-
-unsigned int dcache_parity_tl1_occurred;
-unsigned int icache_parity_tl1_occurred;
-
-int ncpus_probed;
-
-static int show_cpuinfo(struct seq_file *m, void *__unused)
-{
- seq_printf(m,
- "cpu\t\t: %s\n"
- "fpu\t\t: %s\n"
- "pmu\t\t: %s\n"
- "prom\t\t: %s\n"
- "type\t\t: %s\n"
- "ncpus probed\t: %d\n"
- "ncpus active\t: %d\n"
- "D$ parity tl1\t: %u\n"
- "I$ parity tl1\t: %u\n"
-#ifndef CONFIG_SMP
- "Cpu0ClkTck\t: %016lx\n"
-#endif
- ,
- sparc_cpu_type,
- sparc_fpu_type,
- sparc_pmu_type,
- prom_version,
- ((tlb_type == hypervisor) ?
- "sun4v" :
- "sun4u"),
- ncpus_probed,
- num_online_cpus(),
- dcache_parity_tl1_occurred,
- icache_parity_tl1_occurred
-#ifndef CONFIG_SMP
- , cpu_data(0).clock_tick
-#endif
- );
-#ifdef CONFIG_SMP
- smp_bogo(m);
-#endif
- mmu_info(m);
-#ifdef CONFIG_SMP
- smp_info(m);
-#endif
- return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
- /* The pointer we are returning is arbitrary,
- * it just has to be non-NULL and not IS_ERR
- * in the success case.
- */
- return *pos == 0 ? &c_start : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
- ++*pos;
- return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
- .start =c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo,
-};
-
extern int stop_a_enabled;
void sun_do_break(void)
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 442286d8343..d5b3958be0b 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -37,8 +37,6 @@
#include "irq.h"
volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,};
-unsigned char boot_cpu_id = 0;
-unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
cpumask_t smp_commenced_mask = CPU_MASK_NONE;
@@ -130,14 +128,57 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 };
void smp_send_reschedule(int cpu)
{
/*
- * XXX missing reschedule IPI, see scheduler_ipi()
+ * CPU model dependent way of implementing IPI generation targeting
+ * a single CPU. The trap handler needs only to do trap entry/return
+ * to call schedule.
*/
+ BTFIXUP_CALL(smp_ipi_resched)(cpu);
}
void smp_send_stop(void)
{
}
+void arch_send_call_function_single_ipi(int cpu)
+{
+ /* trigger one IPI single call on one CPU */
+ BTFIXUP_CALL(smp_ipi_single)(cpu);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+ int cpu;
+
+ /* trigger IPI mask call on each CPU */
+ for_each_cpu(cpu, mask)
+ BTFIXUP_CALL(smp_ipi_mask_one)(cpu);
+}
+
+void smp_resched_interrupt(void)
+{
+ irq_enter();
+ scheduler_ipi();
+ local_cpu_data().irq_resched_count++;
+ irq_exit();
+ /* re-schedule routine called by interrupt return code. */
+}
+
+void smp_call_function_single_interrupt(void)
+{
+ irq_enter();
+ generic_smp_call_function_single_interrupt();
+ local_cpu_data().irq_call_count++;
+ irq_exit();
+}
+
+void smp_call_function_interrupt(void)
+{
+ irq_enter();
+ generic_smp_call_function_interrupt();
+ local_cpu_data().irq_call_count++;
+ irq_exit();
+}
+
void smp_flush_cache_all(void)
{
xc0((smpfunc_t) BTFIXUP_CALL(local_flush_cache_all));
@@ -153,9 +194,10 @@ void smp_flush_tlb_all(void)
void smp_flush_cache_mm(struct mm_struct *mm)
{
if(mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask))
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask))
xc1((smpfunc_t) BTFIXUP_CALL(local_flush_cache_mm), (unsigned long) mm);
local_flush_cache_mm(mm);
}
@@ -164,9 +206,10 @@ void smp_flush_cache_mm(struct mm_struct *mm)
void smp_flush_tlb_mm(struct mm_struct *mm)
{
if(mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask)) {
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask)) {
xc1((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_mm), (unsigned long) mm);
if(atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
cpumask_copy(mm_cpumask(mm),
@@ -182,9 +225,10 @@ void smp_flush_cache_range(struct vm_area_struct *vma, unsigned long start,
struct mm_struct *mm = vma->vm_mm;
if (mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask))
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask))
xc3((smpfunc_t) BTFIXUP_CALL(local_flush_cache_range), (unsigned long) vma, start, end);
local_flush_cache_range(vma, start, end);
}
@@ -196,9 +240,10 @@ void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
struct mm_struct *mm = vma->vm_mm;
if (mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask))
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask))
xc3((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_range), (unsigned long) vma, start, end);
local_flush_tlb_range(vma, start, end);
}
@@ -209,9 +254,10 @@ void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
struct mm_struct *mm = vma->vm_mm;
if(mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask))
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask))
xc2((smpfunc_t) BTFIXUP_CALL(local_flush_cache_page), (unsigned long) vma, page);
local_flush_cache_page(vma, page);
}
@@ -222,19 +268,15 @@ void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
struct mm_struct *mm = vma->vm_mm;
if(mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask))
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask))
xc2((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_page), (unsigned long) vma, page);
local_flush_tlb_page(vma, page);
}
}
-void smp_reschedule_irq(void)
-{
- set_need_resched();
-}
-
void smp_flush_page_to_ram(unsigned long page)
{
/* Current theory is that those who call this are the one's
@@ -251,9 +293,10 @@ void smp_flush_page_to_ram(unsigned long page)
void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
{
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask))
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask))
xc2((smpfunc_t) BTFIXUP_CALL(local_flush_sig_insns), (unsigned long) mm, insn_addr);
local_flush_sig_insns(mm, insn_addr);
}
@@ -407,7 +450,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
};
if (!ret) {
- cpu_set(cpu, smp_commenced_mask);
+ cpumask_set_cpu(cpu, &smp_commenced_mask);
while (!cpu_online(cpu))
mb();
}
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 9478da7fdb3..99cb17251bb 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -121,11 +121,11 @@ void __cpuinit smp_callin(void)
/* inform the notifiers about the new cpu */
notify_cpu_starting(cpuid);
- while (!cpu_isset(cpuid, smp_commenced_mask))
+ while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
rmb();
ipi_call_lock_irq();
- cpu_set(cpuid, cpu_online_map);
+ set_cpu_online(cpuid, true);
ipi_call_unlock_irq();
/* idle thread is expected to have preempt disabled */
@@ -785,7 +785,7 @@ static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask
/* Send cross call to all processors mentioned in MASK_P
* except self. Really, there are only two cases currently,
- * "&cpu_online_map" and "&mm->cpu_vm_mask".
+ * "cpu_online_mask" and "mm_cpumask(mm)".
*/
static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
{
@@ -797,7 +797,7 @@ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 d
/* Send cross call to all processors except self. */
static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
{
- smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map);
+ smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask);
}
extern unsigned long xcall_sync_tick;
@@ -805,7 +805,7 @@ extern unsigned long xcall_sync_tick;
static void smp_start_sync_tick_client(int cpu)
{
xcall_deliver((u64) &xcall_sync_tick, 0, 0,
- &cpumask_of_cpu(cpu));
+ cpumask_of(cpu));
}
extern unsigned long xcall_call_function;
@@ -820,7 +820,7 @@ extern unsigned long xcall_call_function_single;
void arch_send_call_function_single_ipi(int cpu)
{
xcall_deliver((u64) &xcall_call_function_single, 0, 0,
- &cpumask_of_cpu(cpu));
+ cpumask_of(cpu));
}
void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
@@ -918,7 +918,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
}
if (data0) {
xcall_deliver(data0, __pa(pg_addr),
- (u64) pg_addr, &cpumask_of_cpu(cpu));
+ (u64) pg_addr, cpumask_of(cpu));
#ifdef CONFIG_DEBUG_DCFLUSH
atomic_inc(&dcpage_flushes_xcall);
#endif
@@ -954,7 +954,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
}
if (data0) {
xcall_deliver(data0, __pa(pg_addr),
- (u64) pg_addr, &cpu_online_map);
+ (u64) pg_addr, cpu_online_mask);
#ifdef CONFIG_DEBUG_DCFLUSH
atomic_inc(&dcpage_flushes_xcall);
#endif
@@ -1197,32 +1197,32 @@ void __devinit smp_fill_in_sib_core_maps(void)
for_each_present_cpu(i) {
unsigned int j;
- cpus_clear(cpu_core_map[i]);
+ cpumask_clear(&cpu_core_map[i]);
if (cpu_data(i).core_id == 0) {
- cpu_set(i, cpu_core_map[i]);
+ cpumask_set_cpu(i, &cpu_core_map[i]);
continue;
}
for_each_present_cpu(j) {
if (cpu_data(i).core_id ==
cpu_data(j).core_id)
- cpu_set(j, cpu_core_map[i]);
+ cpumask_set_cpu(j, &cpu_core_map[i]);
}
}
for_each_present_cpu(i) {
unsigned int j;
- cpus_clear(per_cpu(cpu_sibling_map, i));
+ cpumask_clear(&per_cpu(cpu_sibling_map, i));
if (cpu_data(i).proc_id == -1) {
- cpu_set(i, per_cpu(cpu_sibling_map, i));
+ cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i));
continue;
}
for_each_present_cpu(j) {
if (cpu_data(i).proc_id ==
cpu_data(j).proc_id)
- cpu_set(j, per_cpu(cpu_sibling_map, i));
+ cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, i));
}
}
}
@@ -1232,10 +1232,10 @@ int __cpuinit __cpu_up(unsigned int cpu)
int ret = smp_boot_one_cpu(cpu);
if (!ret) {
- cpu_set(cpu, smp_commenced_mask);
- while (!cpu_isset(cpu, cpu_online_map))
+ cpumask_set_cpu(cpu, &smp_commenced_mask);
+ while (!cpu_online(cpu))
mb();
- if (!cpu_isset(cpu, cpu_online_map)) {
+ if (!cpu_online(cpu)) {
ret = -ENODEV;
} else {
/* On SUN4V, writes to %tick and %stick are
@@ -1269,7 +1269,7 @@ void cpu_play_dead(void)
tb->nonresum_mondo_pa, 0);
}
- cpu_clear(cpu, smp_commenced_mask);
+ cpumask_clear_cpu(cpu, &smp_commenced_mask);
membar_safe("#Sync");
local_irq_disable();
@@ -1290,13 +1290,13 @@ int __cpu_disable(void)
cpuinfo_sparc *c;
int i;
- for_each_cpu_mask(i, cpu_core_map[cpu])
- cpu_clear(cpu, cpu_core_map[i]);
- cpus_clear(cpu_core_map[cpu]);
+ for_each_cpu(i, &cpu_core_map[cpu])
+ cpumask_clear_cpu(cpu, &cpu_core_map[i]);
+ cpumask_clear(&cpu_core_map[cpu]);
- for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
- cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
- cpus_clear(per_cpu(cpu_sibling_map, cpu));
+ for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu))
+ cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i));
+ cpumask_clear(&per_cpu(cpu_sibling_map, cpu));
c = &cpu_data(cpu);
@@ -1313,7 +1313,7 @@ int __cpu_disable(void)
local_irq_disable();
ipi_call_lock();
- cpu_clear(cpu, cpu_online_map);
+ set_cpu_online(cpu, false);
ipi_call_unlock();
cpu_map_rebuild();
@@ -1327,11 +1327,11 @@ void __cpu_die(unsigned int cpu)
for (i = 0; i < 100; i++) {
smp_rmb();
- if (!cpu_isset(cpu, smp_commenced_mask))
+ if (!cpumask_test_cpu(cpu, &smp_commenced_mask))
break;
msleep(100);
}
- if (cpu_isset(cpu, smp_commenced_mask)) {
+ if (cpumask_test_cpu(cpu, &smp_commenced_mask)) {
printk(KERN_ERR "CPU %u didn't die...\n", cpu);
} else {
#if defined(CONFIG_SUN_LDOMS)
@@ -1341,7 +1341,7 @@ void __cpu_die(unsigned int cpu)
do {
hv_err = sun4v_cpu_stop(cpu);
if (hv_err == HV_EOK) {
- cpu_clear(cpu, cpu_present_map);
+ set_cpu_present(cpu, false);
break;
}
} while (--limit > 0);
@@ -1362,7 +1362,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
void smp_send_reschedule(int cpu)
{
xcall_deliver((u64) &xcall_receive_signal, 0, 0,
- &cpumask_of_cpu(cpu));
+ cpumask_of(cpu));
}
void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
diff --git a/arch/sparc/kernel/sun4c_irq.c b/arch/sparc/kernel/sun4c_irq.c
index 90eea38ad66..f6bf25a2ff8 100644
--- a/arch/sparc/kernel/sun4c_irq.c
+++ b/arch/sparc/kernel/sun4c_irq.c
@@ -65,62 +65,94 @@
*/
unsigned char __iomem *interrupt_enable;
-static void sun4c_disable_irq(unsigned int irq_nr)
+static void sun4c_mask_irq(struct irq_data *data)
{
- unsigned long flags;
- unsigned char current_mask, new_mask;
-
- local_irq_save(flags);
- irq_nr &= (NR_IRQS - 1);
- current_mask = sbus_readb(interrupt_enable);
- switch (irq_nr) {
- case 1:
- new_mask = ((current_mask) & (~(SUN4C_INT_E1)));
- break;
- case 8:
- new_mask = ((current_mask) & (~(SUN4C_INT_E8)));
- break;
- case 10:
- new_mask = ((current_mask) & (~(SUN4C_INT_E10)));
- break;
- case 14:
- new_mask = ((current_mask) & (~(SUN4C_INT_E14)));
- break;
- default:
+ unsigned long mask = (unsigned long)data->chip_data;
+
+ if (mask) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mask = sbus_readb(interrupt_enable) & ~mask;
+ sbus_writeb(mask, interrupt_enable);
local_irq_restore(flags);
- return;
}
- sbus_writeb(new_mask, interrupt_enable);
- local_irq_restore(flags);
}
-static void sun4c_enable_irq(unsigned int irq_nr)
+static void sun4c_unmask_irq(struct irq_data *data)
{
- unsigned long flags;
- unsigned char current_mask, new_mask;
-
- local_irq_save(flags);
- irq_nr &= (NR_IRQS - 1);
- current_mask = sbus_readb(interrupt_enable);
- switch (irq_nr) {
- case 1:
- new_mask = ((current_mask) | SUN4C_INT_E1);
- break;
- case 8:
- new_mask = ((current_mask) | SUN4C_INT_E8);
- break;
- case 10:
- new_mask = ((current_mask) | SUN4C_INT_E10);
- break;
- case 14:
- new_mask = ((current_mask) | SUN4C_INT_E14);
- break;
- default:
+ unsigned long mask = (unsigned long)data->chip_data;
+
+ if (mask) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mask = sbus_readb(interrupt_enable) | mask;
+ sbus_writeb(mask, interrupt_enable);
local_irq_restore(flags);
- return;
}
- sbus_writeb(new_mask, interrupt_enable);
- local_irq_restore(flags);
+}
+
+static unsigned int sun4c_startup_irq(struct irq_data *data)
+{
+ irq_link(data->irq);
+ sun4c_unmask_irq(data);
+
+ return 0;
+}
+
+static void sun4c_shutdown_irq(struct irq_data *data)
+{
+ sun4c_mask_irq(data);
+ irq_unlink(data->irq);
+}
+
+static struct irq_chip sun4c_irq = {
+ .name = "sun4c",
+ .irq_startup = sun4c_startup_irq,
+ .irq_shutdown = sun4c_shutdown_irq,
+ .irq_mask = sun4c_mask_irq,
+ .irq_unmask = sun4c_unmask_irq,
+};
+
+static unsigned int sun4c_build_device_irq(struct platform_device *op,
+ unsigned int real_irq)
+{
+ unsigned int irq;
+
+ if (real_irq >= 16) {
+ prom_printf("Bogus sun4c IRQ %u\n", real_irq);
+ prom_halt();
+ }
+
+ irq = irq_alloc(real_irq, real_irq);
+ if (irq) {
+ unsigned long mask = 0UL;
+
+ switch (real_irq) {
+ case 1:
+ mask = SUN4C_INT_E1;
+ break;
+ case 8:
+ mask = SUN4C_INT_E8;
+ break;
+ case 10:
+ mask = SUN4C_INT_E10;
+ break;
+ case 14:
+ mask = SUN4C_INT_E14;
+ break;
+ default:
+ /* All the rest are either always enabled,
+ * or are for signalling software interrupts.
+ */
+ break;
+ }
+ irq_set_chip_and_handler_name(irq, &sun4c_irq,
+ handle_level_irq, "level");
+ irq_set_chip_data(irq, (void *)mask);
+ }
+ return irq;
}
struct sun4c_timer_info {
@@ -144,8 +176,9 @@ static void sun4c_load_profile_irq(int cpu, unsigned int limit)
static void __init sun4c_init_timers(irq_handler_t counter_fn)
{
- const struct linux_prom_irqs *irq;
+ const struct linux_prom_irqs *prom_irqs;
struct device_node *dp;
+ unsigned int irq;
const u32 *addr;
int err;
@@ -163,9 +196,9 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn)
sun4c_timers = (void __iomem *) (unsigned long) addr[0];
- irq = of_get_property(dp, "intr", NULL);
+ prom_irqs = of_get_property(dp, "intr", NULL);
of_node_put(dp);
- if (!irq) {
+ if (!prom_irqs) {
prom_printf("sun4c_init_timers: No intr property\n");
prom_halt();
}
@@ -178,15 +211,15 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn)
master_l10_counter = &sun4c_timers->l10_count;
- err = request_irq(irq[0].pri, counter_fn,
- (IRQF_DISABLED | SA_STATIC_ALLOC),
- "timer", NULL);
+ irq = sun4c_build_device_irq(NULL, prom_irqs[0].pri);
+ err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
if (err) {
prom_printf("sun4c_init_timers: request_irq() fails with %d\n", err);
prom_halt();
}
- sun4c_disable_irq(irq[1].pri);
+ /* disable timer interrupt */
+ sun4c_mask_irq(irq_get_irq_data(irq));
}
#ifdef CONFIG_SMP
@@ -215,14 +248,11 @@ void __init sun4c_init_IRQ(void)
interrupt_enable = (void __iomem *) (unsigned long) addr[0];
- BTFIXUPSET_CALL(enable_irq, sun4c_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_irq, sun4c_disable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(enable_pil_irq, sun4c_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_pil_irq, sun4c_disable_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(clear_clock_irq, sun4c_clear_clock_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(load_profile_irq, sun4c_load_profile_irq, BTFIXUPCALL_NOP);
- sparc_irq_config.init_timers = sun4c_init_timers;
+ sparc_irq_config.init_timers = sun4c_init_timers;
+ sparc_irq_config.build_device_irq = sun4c_build_device_irq;
#ifdef CONFIG_SMP
BTFIXUPSET_CALL(set_cpu_int, sun4c_nop, BTFIXUPCALL_NOP);
diff --git a/arch/sparc/kernel/sun4d_irq.c b/arch/sparc/kernel/sun4d_irq.c
index 77b4a899271..a9ea60eb2c1 100644
--- a/arch/sparc/kernel/sun4d_irq.c
+++ b/arch/sparc/kernel/sun4d_irq.c
@@ -14,6 +14,7 @@
#include <asm/io.h>
#include <asm/sbi.h>
#include <asm/cacheflush.h>
+#include <asm/setup.h>
#include "kernel.h"
#include "irq.h"
@@ -22,22 +23,20 @@
* cpu local. CPU local interrupts cover the timer interrupts
* and whatnot, and we encode those as normal PILs between
* 0 and 15.
- *
- * SBUS interrupts are encoded integers including the board number
- * (plus one), the SBUS level, and the SBUS slot number. Sun4D
- * IRQ dispatch is done by:
- *
- * 1) Reading the BW local interrupt table in order to get the bus
- * interrupt mask.
- *
- * This table is indexed by SBUS interrupt level which can be
- * derived from the PIL we got interrupted on.
- *
- * 2) For each bus showing interrupt pending from #1, read the
- * SBI interrupt state register. This will indicate which slots
- * have interrupts pending for that SBUS interrupt level.
+ * SBUS interrupts are encodes as a combination of board, level and slot.
*/
+struct sun4d_handler_data {
+ unsigned int cpuid; /* target cpu */
+ unsigned int real_irq; /* interrupt level */
+};
+
+
+static unsigned int sun4d_encode_irq(int board, int lvl, int slot)
+{
+ return (board + 1) << 5 | (lvl << 2) | slot;
+}
+
struct sun4d_timer_regs {
u32 l10_timer_limit;
u32 l10_cur_countx;
@@ -48,17 +47,12 @@ struct sun4d_timer_regs {
static struct sun4d_timer_regs __iomem *sun4d_timers;
-#define TIMER_IRQ 10
-
-#define MAX_STATIC_ALLOC 4
-static unsigned char sbus_tid[32];
-
-static struct irqaction *irq_action[NR_IRQS];
+#define SUN4D_TIMER_IRQ 10
-static struct sbus_action {
- struct irqaction *action;
- /* For SMP this needs to be extended */
-} *sbus_actions;
+/* Specify which cpu handle interrupts from which board.
+ * Index is board - value is cpu.
+ */
+static unsigned char board_to_cpu[32];
static int pil_to_sbus[] = {
0,
@@ -79,152 +73,81 @@ static int pil_to_sbus[] = {
0,
};
-static int sbus_to_pil[] = {
- 0,
- 2,
- 3,
- 5,
- 7,
- 9,
- 11,
- 13,
-};
-
-static int nsbi;
-
/* Exported for sun4d_smp.c */
DEFINE_SPINLOCK(sun4d_imsk_lock);
-int show_sun4d_interrupts(struct seq_file *p, void *v)
+/* SBUS interrupts are encoded integers including the board number
+ * (plus one), the SBUS level, and the SBUS slot number. Sun4D
+ * IRQ dispatch is done by:
+ *
+ * 1) Reading the BW local interrupt table in order to get the bus
+ * interrupt mask.
+ *
+ * This table is indexed by SBUS interrupt level which can be
+ * derived from the PIL we got interrupted on.
+ *
+ * 2) For each bus showing interrupt pending from #1, read the
+ * SBI interrupt state register. This will indicate which slots
+ * have interrupts pending for that SBUS interrupt level.
+ *
+ * 3) Call the genreric IRQ support.
+ */
+static void sun4d_sbus_handler_irq(int sbusl)
{
- int i = *(loff_t *) v, j = 0, k = 0, sbusl;
- struct irqaction *action;
- unsigned long flags;
-#ifdef CONFIG_SMP
- int x;
-#endif
-
- spin_lock_irqsave(&irq_action_lock, flags);
- if (i < NR_IRQS) {
- sbusl = pil_to_sbus[i];
- if (!sbusl) {
- action = *(i + irq_action);
- if (!action)
- goto out_unlock;
- } else {
- for (j = 0; j < nsbi; j++) {
- for (k = 0; k < 4; k++)
- action = sbus_actions[(j << 5) + (sbusl << 2) + k].action;
- if (action)
- goto found_it;
- }
- goto out_unlock;
- }
-found_it: seq_printf(p, "%3d: ", i);
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for_each_online_cpu(x)
- seq_printf(p, "%10u ",
- kstat_cpu(cpu_logical_map(x)).irqs[i]);
-#endif
- seq_printf(p, "%c %s",
- (action->flags & IRQF_DISABLED) ? '+' : ' ',
- action->name);
- action = action->next;
- for (;;) {
- for (; action; action = action->next) {
- seq_printf(p, ",%s %s",
- (action->flags & IRQF_DISABLED) ? " +" : "",
- action->name);
- }
- if (!sbusl)
- break;
- k++;
- if (k < 4) {
- action = sbus_actions[(j << 5) + (sbusl << 2) + k].action;
- } else {
- j++;
- if (j == nsbi)
- break;
- k = 0;
- action = sbus_actions[(j << 5) + (sbusl << 2)].action;
+ unsigned int bus_mask;
+ unsigned int sbino, slot;
+ unsigned int sbil;
+
+ bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff;
+ bw_clear_intr_mask(sbusl, bus_mask);
+
+ sbil = (sbusl << 2);
+ /* Loop for each pending SBI */
+ for (sbino = 0; bus_mask; sbino++) {
+ unsigned int idx, mask;
+
+ bus_mask >>= 1;
+ if (!(bus_mask & 1))
+ continue;
+ /* XXX This seems to ACK the irq twice. acquire_sbi()
+ * XXX uses swap, therefore this writes 0xf << sbil,
+ * XXX then later release_sbi() will write the individual
+ * XXX bits which were set again.
+ */
+ mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil);
+ mask &= (0xf << sbil);
+
+ /* Loop for each pending SBI slot */
+ idx = 0;
+ slot = (1 << sbil);
+ while (mask != 0) {
+ unsigned int pil;
+ struct irq_bucket *p;
+
+ idx++;
+ slot <<= 1;
+ if (!(mask & slot))
+ continue;
+
+ mask &= ~slot;
+ pil = sun4d_encode_irq(sbino, sbil, idx);
+
+ p = irq_map[pil];
+ while (p) {
+ struct irq_bucket *next;
+
+ next = p->next;
+ generic_handle_irq(p->irq);
+ p = next;
}
+ release_sbi(SBI2DEVID(sbino), slot);
}
- seq_putc(p, '\n');
}
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
- return 0;
-}
-
-void sun4d_free_irq(unsigned int irq, void *dev_id)
-{
- struct irqaction *action, **actionp;
- struct irqaction *tmp = NULL;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_action_lock, flags);
- if (irq < 15)
- actionp = irq + irq_action;
- else
- actionp = &(sbus_actions[irq - (1 << 5)].action);
- action = *actionp;
- if (!action) {
- printk(KERN_ERR "Trying to free free IRQ%d\n", irq);
- goto out_unlock;
- }
- if (dev_id) {
- for (; action; action = action->next) {
- if (action->dev_id == dev_id)
- break;
- tmp = action;
- }
- if (!action) {
- printk(KERN_ERR "Trying to free free shared IRQ%d\n",
- irq);
- goto out_unlock;
- }
- } else if (action->flags & IRQF_SHARED) {
- printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n",
- irq);
- goto out_unlock;
- }
- if (action->flags & SA_STATIC_ALLOC) {
- /*
- * This interrupt is marked as specially allocated
- * so it is a bad idea to free it.
- */
- printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n",
- irq, action->name);
- goto out_unlock;
- }
-
- if (tmp)
- tmp->next = action->next;
- else
- *actionp = action->next;
-
- spin_unlock_irqrestore(&irq_action_lock, flags);
-
- synchronize_irq(irq);
-
- spin_lock_irqsave(&irq_action_lock, flags);
-
- kfree(action);
-
- if (!(*actionp))
- __disable_irq(irq);
-
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
}
void sun4d_handler_irq(int pil, struct pt_regs *regs)
{
struct pt_regs *old_regs;
- struct irqaction *action;
- int cpu = smp_processor_id();
/* SBUS IRQ level (1 - 7) */
int sbusl = pil_to_sbus[pil];
@@ -233,160 +156,96 @@ void sun4d_handler_irq(int pil, struct pt_regs *regs)
cc_set_iclr(1 << pil);
+#ifdef CONFIG_SMP
+ /*
+ * Check IPI data structures after IRQ has been cleared. Hard and Soft
+ * IRQ can happen at the same time, so both cases are always handled.
+ */
+ if (pil == SUN4D_IPI_IRQ)
+ sun4d_ipi_interrupt();
+#endif
+
old_regs = set_irq_regs(regs);
irq_enter();
- kstat_cpu(cpu).irqs[pil]++;
- if (!sbusl) {
- action = *(pil + irq_action);
- if (!action)
- unexpected_irq(pil, NULL, regs);
- do {
- action->handler(pil, action->dev_id);
- action = action->next;
- } while (action);
+ if (sbusl == 0) {
+ /* cpu interrupt */
+ struct irq_bucket *p;
+
+ p = irq_map[pil];
+ while (p) {
+ struct irq_bucket *next;
+
+ next = p->next;
+ generic_handle_irq(p->irq);
+ p = next;
+ }
} else {
- int bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff;
- int sbino;
- struct sbus_action *actionp;
- unsigned mask, slot;
- int sbil = (sbusl << 2);
-
- bw_clear_intr_mask(sbusl, bus_mask);
-
- /* Loop for each pending SBI */
- for (sbino = 0; bus_mask; sbino++, bus_mask >>= 1)
- if (bus_mask & 1) {
- mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil);
- mask &= (0xf << sbil);
- actionp = sbus_actions + (sbino << 5) + (sbil);
- /* Loop for each pending SBI slot */
- for (slot = (1 << sbil); mask; slot <<= 1, actionp++)
- if (mask & slot) {
- mask &= ~slot;
- action = actionp->action;
-
- if (!action)
- unexpected_irq(pil, NULL, regs);
- do {
- action->handler(pil, action->dev_id);
- action = action->next;
- } while (action);
- release_sbi(SBI2DEVID(sbino), slot);
- }
- }
+ /* SBUS interrupt */
+ sun4d_sbus_handler_irq(sbusl);
}
irq_exit();
set_irq_regs(old_regs);
}
-int sun4d_request_irq(unsigned int irq,
- irq_handler_t handler,
- unsigned long irqflags, const char *devname, void *dev_id)
+
+static void sun4d_mask_irq(struct irq_data *data)
{
- struct irqaction *action, *tmp = NULL, **actionp;
+ struct sun4d_handler_data *handler_data = data->handler_data;
+ unsigned int real_irq;
+#ifdef CONFIG_SMP
+ int cpuid = handler_data->cpuid;
unsigned long flags;
- int ret;
-
- if (irq > 14 && irq < (1 << 5)) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!handler) {
- ret = -EINVAL;
- goto out;
- }
-
- spin_lock_irqsave(&irq_action_lock, flags);
-
- if (irq >= (1 << 5))
- actionp = &(sbus_actions[irq - (1 << 5)].action);
- else
- actionp = irq + irq_action;
- action = *actionp;
-
- if (action) {
- if ((action->flags & IRQF_SHARED) && (irqflags & IRQF_SHARED)) {
- for (tmp = action; tmp->next; tmp = tmp->next)
- /* find last entry - tmp used below */;
- } else {
- ret = -EBUSY;
- goto out_unlock;
- }
- if ((action->flags & IRQF_DISABLED) ^ (irqflags & IRQF_DISABLED)) {
- printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n",
- irq);
- ret = -EBUSY;
- goto out_unlock;
- }
- action = NULL; /* Or else! */
- }
-
- /* If this is flagged as statically allocated then we use our
- * private struct which is never freed.
- */
- if (irqflags & SA_STATIC_ALLOC) {
- if (static_irq_count < MAX_STATIC_ALLOC)
- action = &static_irqaction[static_irq_count++];
- else
- printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
- irq, devname);
- }
-
- if (action == NULL)
- action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
-
- if (!action) {
- ret = -ENOMEM;
- goto out_unlock;
- }
-
- action->handler = handler;
- action->flags = irqflags;
- action->name = devname;
- action->next = NULL;
- action->dev_id = dev_id;
-
- if (tmp)
- tmp->next = action;
- else
- *actionp = action;
-
- __enable_irq(irq);
-
- ret = 0;
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
-out:
- return ret;
+#endif
+ real_irq = handler_data->real_irq;
+#ifdef CONFIG_SMP
+ spin_lock_irqsave(&sun4d_imsk_lock, flags);
+ cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | (1 << real_irq));
+ spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+#else
+ cc_set_imsk(cc_get_imsk() | (1 << real_irq));
+#endif
}
-static void sun4d_disable_irq(unsigned int irq)
+static void sun4d_unmask_irq(struct irq_data *data)
{
- int tid = sbus_tid[(irq >> 5) - 1];
+ struct sun4d_handler_data *handler_data = data->handler_data;
+ unsigned int real_irq;
+#ifdef CONFIG_SMP
+ int cpuid = handler_data->cpuid;
unsigned long flags;
+#endif
+ real_irq = handler_data->real_irq;
- if (irq < NR_IRQS)
- return;
-
+#ifdef CONFIG_SMP
spin_lock_irqsave(&sun4d_imsk_lock, flags);
- cc_set_imsk_other(tid, cc_get_imsk_other(tid) | (1 << sbus_to_pil[(irq >> 2) & 7]));
+ cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | ~(1 << real_irq));
spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+#else
+ cc_set_imsk(cc_get_imsk() | ~(1 << real_irq));
+#endif
}
-static void sun4d_enable_irq(unsigned int irq)
+static unsigned int sun4d_startup_irq(struct irq_data *data)
{
- int tid = sbus_tid[(irq >> 5) - 1];
- unsigned long flags;
-
- if (irq < NR_IRQS)
- return;
+ irq_link(data->irq);
+ sun4d_unmask_irq(data);
+ return 0;
+}
- spin_lock_irqsave(&sun4d_imsk_lock, flags);
- cc_set_imsk_other(tid, cc_get_imsk_other(tid) & ~(1 << sbus_to_pil[(irq >> 2) & 7]));
- spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+static void sun4d_shutdown_irq(struct irq_data *data)
+{
+ sun4d_mask_irq(data);
+ irq_unlink(data->irq);
}
+struct irq_chip sun4d_irq = {
+ .name = "sun4d",
+ .irq_startup = sun4d_startup_irq,
+ .irq_shutdown = sun4d_shutdown_irq,
+ .irq_unmask = sun4d_unmask_irq,
+ .irq_mask = sun4d_mask_irq,
+};
+
#ifdef CONFIG_SMP
static void sun4d_set_cpu_int(int cpu, int level)
{
@@ -413,7 +272,7 @@ void __init sun4d_distribute_irqs(void)
for_each_node_by_name(dp, "sbi") {
int devid = of_getintprop_default(dp, "device-id", 0);
int board = of_getintprop_default(dp, "board#", 0);
- sbus_tid[board] = cpuid;
+ board_to_cpu[board] = cpuid;
set_sbi_tid(devid, cpuid << 3);
}
printk(KERN_ERR "All sbus IRQs directed to CPU%d\n", cpuid);
@@ -443,15 +302,16 @@ static void __init sun4d_load_profile_irqs(void)
unsigned int sun4d_build_device_irq(struct platform_device *op,
unsigned int real_irq)
{
- static int pil_to_sbus[] = {
- 0, 0, 1, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 0,
- };
struct device_node *dp = op->dev.of_node;
struct device_node *io_unit, *sbi = dp->parent;
const struct linux_prom_registers *regs;
+ struct sun4d_handler_data *handler_data;
+ unsigned int pil;
+ unsigned int irq;
int board, slot;
int sbusl;
+ irq = 0;
while (sbi) {
if (!strcmp(sbi->name, "sbi"))
break;
@@ -484,7 +344,28 @@ unsigned int sun4d_build_device_irq(struct platform_device *op,
sbusl = pil_to_sbus[real_irq];
if (sbusl)
- return (((board + 1) << 5) + (sbusl << 2) + slot);
+ pil = sun4d_encode_irq(board, sbusl, slot);
+ else
+ pil = real_irq;
+
+ irq = irq_alloc(real_irq, pil);
+ if (irq == 0)
+ goto err_out;
+
+ handler_data = irq_get_handler_data(irq);
+ if (unlikely(handler_data))
+ goto err_out;
+
+ handler_data = kzalloc(sizeof(struct sun4d_handler_data), GFP_ATOMIC);
+ if (unlikely(!handler_data)) {
+ prom_printf("IRQ: kzalloc(sun4d_handler_data) failed.\n");
+ prom_halt();
+ }
+ handler_data->cpuid = board_to_cpu[board];
+ handler_data->real_irq = real_irq;
+ irq_set_chip_and_handler_name(irq, &sun4d_irq,
+ handle_level_irq, "level");
+ irq_set_handler_data(irq, handler_data);
err_out:
return real_irq;
@@ -518,6 +399,7 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
{
struct device_node *dp;
struct resource res;
+ unsigned int irq;
const u32 *reg;
int err;
@@ -552,9 +434,8 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
master_l10_counter = &sun4d_timers->l10_cur_count;
- err = request_irq(TIMER_IRQ, counter_fn,
- (IRQF_DISABLED | SA_STATIC_ALLOC),
- "timer", NULL);
+ irq = sun4d_build_device_irq(NULL, SUN4D_TIMER_IRQ);
+ err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
if (err) {
prom_printf("sun4d_init_timers: request_irq() failed with %d\n",
err);
@@ -567,27 +448,16 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
void __init sun4d_init_sbi_irq(void)
{
struct device_node *dp;
- int target_cpu = 0;
+ int target_cpu;
-#ifdef CONFIG_SMP
target_cpu = boot_cpu_id;
-#endif
-
- nsbi = 0;
- for_each_node_by_name(dp, "sbi")
- nsbi++;
- sbus_actions = kzalloc(nsbi * 8 * 4 * sizeof(struct sbus_action), GFP_ATOMIC);
- if (!sbus_actions) {
- prom_printf("SUN4D: Cannot allocate sbus_actions, halting.\n");
- prom_halt();
- }
for_each_node_by_name(dp, "sbi") {
int devid = of_getintprop_default(dp, "device-id", 0);
int board = of_getintprop_default(dp, "board#", 0);
unsigned int mask;
set_sbi_tid(devid, target_cpu << 3);
- sbus_tid[board] = target_cpu;
+ board_to_cpu[board] = target_cpu;
/* Get rid of pending irqs from PROM */
mask = acquire_sbi(devid, 0xffffffff);
@@ -603,12 +473,10 @@ void __init sun4d_init_IRQ(void)
{
local_irq_disable();
- BTFIXUPSET_CALL(enable_irq, sun4d_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_irq, sun4d_disable_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(clear_clock_irq, sun4d_clear_clock_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(load_profile_irq, sun4d_load_profile_irq, BTFIXUPCALL_NORM);
- sparc_irq_config.init_timers = sun4d_init_timers;
+ sparc_irq_config.init_timers = sun4d_init_timers;
sparc_irq_config.build_device_irq = sun4d_build_device_irq;
#ifdef CONFIG_SMP
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
index 475d50b96cd..133387980b5 100644
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -32,6 +32,7 @@ static inline unsigned long sun4d_swap(volatile unsigned long *ptr, unsigned lon
return val;
}
+static void smp4d_ipi_init(void);
static void smp_setup_percpu_timer(void);
static unsigned char cpu_leds[32];
@@ -80,8 +81,6 @@ void __cpuinit smp4d_callin(void)
local_flush_cache_all();
local_flush_tlb_all();
- cpu_probe();
-
while ((unsigned long)current_set[cpuid] < PAGE_OFFSET)
barrier();
@@ -105,7 +104,7 @@ void __cpuinit smp4d_callin(void)
local_irq_enable(); /* We don't allow PIL 14 yet */
- while (!cpu_isset(cpuid, smp_commenced_mask))
+ while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
barrier();
spin_lock_irqsave(&sun4d_imsk_lock, flags);
@@ -120,6 +119,7 @@ void __cpuinit smp4d_callin(void)
*/
void __init smp4d_boot_cpus(void)
{
+ smp4d_ipi_init();
if (boot_cpu_id)
current_set[0] = NULL;
smp_setup_percpu_timer();
@@ -191,6 +191,80 @@ void __init smp4d_smp_done(void)
sun4d_distribute_irqs();
}
+/* Memory structure giving interrupt handler information about IPI generated */
+struct sun4d_ipi_work {
+ int single;
+ int msk;
+ int resched;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct sun4d_ipi_work, sun4d_ipi_work);
+
+/* Initialize IPIs on the SUN4D SMP machine */
+static void __init smp4d_ipi_init(void)
+{
+ int cpu;
+ struct sun4d_ipi_work *work;
+
+ printk(KERN_INFO "smp4d: setup IPI at IRQ %d\n", SUN4D_IPI_IRQ);
+
+ for_each_possible_cpu(cpu) {
+ work = &per_cpu(sun4d_ipi_work, cpu);
+ work->single = work->msk = work->resched = 0;
+ }
+}
+
+void sun4d_ipi_interrupt(void)
+{
+ struct sun4d_ipi_work *work = &__get_cpu_var(sun4d_ipi_work);
+
+ if (work->single) {
+ work->single = 0;
+ smp_call_function_single_interrupt();
+ }
+ if (work->msk) {
+ work->msk = 0;
+ smp_call_function_interrupt();
+ }
+ if (work->resched) {
+ work->resched = 0;
+ smp_resched_interrupt();
+ }
+}
+
+static void smp4d_ipi_single(int cpu)
+{
+ struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+ /* Mark work */
+ work->single = 1;
+
+ /* Generate IRQ on the CPU */
+ sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
+static void smp4d_ipi_mask_one(int cpu)
+{
+ struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+ /* Mark work */
+ work->msk = 1;
+
+ /* Generate IRQ on the CPU */
+ sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
+static void smp4d_ipi_resched(int cpu)
+{
+ struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+ /* Mark work */
+ work->resched = 1;
+
+ /* Generate IRQ on the CPU (any IRQ will cause resched) */
+ sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
static struct smp_funcall {
smpfunc_t func;
unsigned long arg1;
@@ -239,10 +313,10 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
{
register int i;
- cpu_clear(smp_processor_id(), mask);
- cpus_and(mask, cpu_online_map, mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+ cpumask_and(&mask, cpu_online_mask, &mask);
for (i = 0; i <= high; i++) {
- if (cpu_isset(i, mask)) {
+ if (cpumask_test_cpu(i, &mask)) {
ccall_info.processors_in[i] = 0;
ccall_info.processors_out[i] = 0;
sun4d_send_ipi(i, IRQ_CROSS_CALL);
@@ -255,7 +329,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
i = 0;
do {
- if (!cpu_isset(i, mask))
+ if (!cpumask_test_cpu(i, &mask))
continue;
while (!ccall_info.processors_in[i])
barrier();
@@ -263,7 +337,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
i = 0;
do {
- if (!cpu_isset(i, mask))
+ if (!cpumask_test_cpu(i, &mask))
continue;
while (!ccall_info.processors_out[i])
barrier();
@@ -356,6 +430,9 @@ void __init sun4d_init_smp(void)
BTFIXUPSET_BLACKBOX(load_current, smp4d_blackbox_current);
BTFIXUPSET_CALL(smp_cross_call, smp4d_cross_call, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4d_processor_id, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_resched, smp4d_ipi_resched, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_single, smp4d_ipi_single, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_mask_one, smp4d_ipi_mask_one, BTFIXUPCALL_NORM);
for (i = 0; i < NR_CPUS; i++) {
ccall_info.processors_in[i] = 1;
diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c
index 69df6257a32..422c16dad1f 100644
--- a/arch/sparc/kernel/sun4m_irq.c
+++ b/arch/sparc/kernel/sun4m_irq.c
@@ -100,6 +100,11 @@
struct sun4m_irq_percpu __iomem *sun4m_irq_percpu[SUN4M_NCPUS];
struct sun4m_irq_global __iomem *sun4m_irq_global;
+struct sun4m_handler_data {
+ bool percpu;
+ long mask;
+};
+
/* Dave Redman (djhr@tadpole.co.uk)
* The sun4m interrupt registers.
*/
@@ -142,9 +147,9 @@ struct sun4m_irq_global __iomem *sun4m_irq_global;
#define OBP_INT_LEVEL_VME 0x40
#define SUN4M_TIMER_IRQ (OBP_INT_LEVEL_ONBOARD | 10)
-#define SUM4M_PROFILE_IRQ (OBP_INT_LEVEL_ONBOARD | 14)
+#define SUN4M_PROFILE_IRQ (OBP_INT_LEVEL_ONBOARD | 14)
-static unsigned long irq_mask[0x50] = {
+static unsigned long sun4m_imask[0x50] = {
/* 0x00 - SMP */
0, SUN4M_SOFT_INT(1),
SUN4M_SOFT_INT(2), SUN4M_SOFT_INT(3),
@@ -169,7 +174,7 @@ static unsigned long irq_mask[0x50] = {
SUN4M_INT_VIDEO, SUN4M_INT_MODULE,
SUN4M_INT_REALTIME, SUN4M_INT_FLOPPY,
(SUN4M_INT_SERIAL | SUN4M_INT_KBDMS),
- SUN4M_INT_AUDIO, 0, SUN4M_INT_MODULE_ERR,
+ SUN4M_INT_AUDIO, SUN4M_INT_E14, SUN4M_INT_MODULE_ERR,
/* 0x30 - sbus */
0, 0, SUN4M_INT_SBUS(0), SUN4M_INT_SBUS(1),
0, SUN4M_INT_SBUS(2), 0, SUN4M_INT_SBUS(3),
@@ -182,105 +187,110 @@ static unsigned long irq_mask[0x50] = {
0, SUN4M_INT_VME(6), 0, 0
};
-static unsigned long sun4m_get_irqmask(unsigned int irq)
+static void sun4m_mask_irq(struct irq_data *data)
{
- unsigned long mask;
-
- if (irq < 0x50)
- mask = irq_mask[irq];
- else
- mask = 0;
+ struct sun4m_handler_data *handler_data = data->handler_data;
+ int cpu = smp_processor_id();
- if (!mask)
- printk(KERN_ERR "sun4m_get_irqmask: IRQ%d has no valid mask!\n",
- irq);
+ if (handler_data->mask) {
+ unsigned long flags;
- return mask;
+ local_irq_save(flags);
+ if (handler_data->percpu) {
+ sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->set);
+ } else {
+ sbus_writel(handler_data->mask, &sun4m_irq_global->mask_set);
+ }
+ local_irq_restore(flags);
+ }
}
-static void sun4m_disable_irq(unsigned int irq_nr)
+static void sun4m_unmask_irq(struct irq_data *data)
{
- unsigned long mask, flags;
+ struct sun4m_handler_data *handler_data = data->handler_data;
int cpu = smp_processor_id();
- mask = sun4m_get_irqmask(irq_nr);
- local_irq_save(flags);
- if (irq_nr > 15)
- sbus_writel(mask, &sun4m_irq_global->mask_set);
- else
- sbus_writel(mask, &sun4m_irq_percpu[cpu]->set);
- local_irq_restore(flags);
-}
-
-static void sun4m_enable_irq(unsigned int irq_nr)
-{
- unsigned long mask, flags;
- int cpu = smp_processor_id();
+ if (handler_data->mask) {
+ unsigned long flags;
- /* Dreadful floppy hack. When we use 0x2b instead of
- * 0x0b the system blows (it starts to whistle!).
- * So we continue to use 0x0b. Fixme ASAP. --P3
- */
- if (irq_nr != 0x0b) {
- mask = sun4m_get_irqmask(irq_nr);
- local_irq_save(flags);
- if (irq_nr > 15)
- sbus_writel(mask, &sun4m_irq_global->mask_clear);
- else
- sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear);
- local_irq_restore(flags);
- } else {
local_irq_save(flags);
- sbus_writel(SUN4M_INT_FLOPPY, &sun4m_irq_global->mask_clear);
+ if (handler_data->percpu) {
+ sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->clear);
+ } else {
+ sbus_writel(handler_data->mask, &sun4m_irq_global->mask_clear);
+ }
local_irq_restore(flags);
}
}
-static unsigned long cpu_pil_to_imask[16] = {
-/*0*/ 0x00000000,
-/*1*/ 0x00000000,
-/*2*/ SUN4M_INT_SBUS(0) | SUN4M_INT_VME(0),
-/*3*/ SUN4M_INT_SBUS(1) | SUN4M_INT_VME(1),
-/*4*/ SUN4M_INT_SCSI,
-/*5*/ SUN4M_INT_SBUS(2) | SUN4M_INT_VME(2),
-/*6*/ SUN4M_INT_ETHERNET,
-/*7*/ SUN4M_INT_SBUS(3) | SUN4M_INT_VME(3),
-/*8*/ SUN4M_INT_VIDEO,
-/*9*/ SUN4M_INT_SBUS(4) | SUN4M_INT_VME(4) | SUN4M_INT_MODULE_ERR,
-/*10*/ SUN4M_INT_REALTIME,
-/*11*/ SUN4M_INT_SBUS(5) | SUN4M_INT_VME(5) | SUN4M_INT_FLOPPY,
-/*12*/ SUN4M_INT_SERIAL | SUN4M_INT_KBDMS,
-/*13*/ SUN4M_INT_SBUS(6) | SUN4M_INT_VME(6) | SUN4M_INT_AUDIO,
-/*14*/ SUN4M_INT_E14,
-/*15*/ SUN4M_INT_ERROR,
-};
+static unsigned int sun4m_startup_irq(struct irq_data *data)
+{
+ irq_link(data->irq);
+ sun4m_unmask_irq(data);
+ return 0;
+}
-/* We assume the caller has disabled local interrupts when these are called,
- * or else very bizarre behavior will result.
- */
-static void sun4m_disable_pil_irq(unsigned int pil)
+static void sun4m_shutdown_irq(struct irq_data *data)
{
- sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_set);
+ sun4m_mask_irq(data);
+ irq_unlink(data->irq);
}
-static void sun4m_enable_pil_irq(unsigned int pil)
+static struct irq_chip sun4m_irq = {
+ .name = "sun4m",
+ .irq_startup = sun4m_startup_irq,
+ .irq_shutdown = sun4m_shutdown_irq,
+ .irq_mask = sun4m_mask_irq,
+ .irq_unmask = sun4m_unmask_irq,
+};
+
+
+static unsigned int sun4m_build_device_irq(struct platform_device *op,
+ unsigned int real_irq)
{
- sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_clear);
+ struct sun4m_handler_data *handler_data;
+ unsigned int irq;
+ unsigned int pil;
+
+ if (real_irq >= OBP_INT_LEVEL_VME) {
+ prom_printf("Bogus sun4m IRQ %u\n", real_irq);
+ prom_halt();
+ }
+ pil = (real_irq & 0xf);
+ irq = irq_alloc(real_irq, pil);
+
+ if (irq == 0)
+ goto out;
+
+ handler_data = irq_get_handler_data(irq);
+ if (unlikely(handler_data))
+ goto out;
+
+ handler_data = kzalloc(sizeof(struct sun4m_handler_data), GFP_ATOMIC);
+ if (unlikely(!handler_data)) {
+ prom_printf("IRQ: kzalloc(sun4m_handler_data) failed.\n");
+ prom_halt();
+ }
+
+ handler_data->mask = sun4m_imask[real_irq];
+ handler_data->percpu = real_irq < OBP_INT_LEVEL_ONBOARD;
+ irq_set_chip_and_handler_name(irq, &sun4m_irq,
+ handle_level_irq, "level");
+ irq_set_handler_data(irq, handler_data);
+
+out:
+ return irq;
}
#ifdef CONFIG_SMP
static void sun4m_send_ipi(int cpu, int level)
{
- unsigned long mask = sun4m_get_irqmask(level);
-
- sbus_writel(mask, &sun4m_irq_percpu[cpu]->set);
+ sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->set);
}
static void sun4m_clear_ipi(int cpu, int level)
{
- unsigned long mask = sun4m_get_irqmask(level);
-
- sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear);
+ sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->clear);
}
static void sun4m_set_udt(int cpu)
@@ -343,7 +353,15 @@ void sun4m_nmi(struct pt_regs *regs)
prom_halt();
}
-/* Exported for sun4m_smp.c */
+void sun4m_unmask_profile_irq(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ sbus_writel(sun4m_imask[SUN4M_PROFILE_IRQ], &sun4m_irq_global->mask_clear);
+ local_irq_restore(flags);
+}
+
void sun4m_clear_profile_irq(int cpu)
{
sbus_readl(&timers_percpu[cpu]->l14_limit);
@@ -358,6 +376,7 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn)
{
struct device_node *dp = of_find_node_by_name(NULL, "counter");
int i, err, len, num_cpu_timers;
+ unsigned int irq;
const u32 *addr;
if (!dp) {
@@ -384,8 +403,9 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn)
master_l10_counter = &timers_global->l10_count;
- err = request_irq(SUN4M_TIMER_IRQ, counter_fn,
- (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL);
+ irq = sun4m_build_device_irq(NULL, SUN4M_TIMER_IRQ);
+
+ err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
if (err) {
printk(KERN_ERR "sun4m_init_timers: Register IRQ error %d.\n",
err);
@@ -452,14 +472,11 @@ void __init sun4m_init_IRQ(void)
if (num_cpu_iregs == 4)
sbus_writel(0, &sun4m_irq_global->interrupt_target);
- BTFIXUPSET_CALL(enable_irq, sun4m_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_irq, sun4m_disable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(enable_pil_irq, sun4m_enable_pil_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_pil_irq, sun4m_disable_pil_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(clear_clock_irq, sun4m_clear_clock_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(load_profile_irq, sun4m_load_profile_irq, BTFIXUPCALL_NORM);
sparc_irq_config.init_timers = sun4m_init_timers;
+ sparc_irq_config.build_device_irq = sun4m_build_device_irq;
#ifdef CONFIG_SMP
BTFIXUPSET_CALL(set_cpu_int, sun4m_send_ipi, BTFIXUPCALL_NORM);
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index 5cc7dc51de3..59476868652 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -15,6 +15,9 @@
#include "irq.h"
#include "kernel.h"
+#define IRQ_IPI_SINGLE 12
+#define IRQ_IPI_MASK 13
+#define IRQ_IPI_RESCHED 14
#define IRQ_CROSS_CALL 15
static inline unsigned long
@@ -26,6 +29,7 @@ swap_ulong(volatile unsigned long *ptr, unsigned long val)
return val;
}
+static void smp4m_ipi_init(void);
static void smp_setup_percpu_timer(void);
void __cpuinit smp4m_callin(void)
@@ -59,8 +63,6 @@ void __cpuinit smp4m_callin(void)
local_flush_cache_all();
local_flush_tlb_all();
- cpu_probe();
-
/* Fix idle thread fields. */
__asm__ __volatile__("ld [%0], %%g6\n\t"
: : "r" (&current_set[cpuid])
@@ -70,7 +72,7 @@ void __cpuinit smp4m_callin(void)
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
- while (!cpu_isset(cpuid, smp_commenced_mask))
+ while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
mb();
local_irq_enable();
@@ -83,6 +85,7 @@ void __cpuinit smp4m_callin(void)
*/
void __init smp4m_boot_cpus(void)
{
+ smp4m_ipi_init();
smp_setup_percpu_timer();
local_flush_cache_all();
}
@@ -150,18 +153,25 @@ void __init smp4m_smp_done(void)
/* Ok, they are spinning and ready to go. */
}
-/* At each hardware IRQ, we get this called to forward IRQ reception
- * to the next processor. The caller must disable the IRQ level being
- * serviced globally so that there are no double interrupts received.
- *
- * XXX See sparc64 irq.c.
- */
-void smp4m_irq_rotate(int cpu)
+
+/* Initialize IPIs on the SUN4M SMP machine */
+static void __init smp4m_ipi_init(void)
+{
+}
+
+static void smp4m_ipi_resched(int cpu)
+{
+ set_cpu_int(cpu, IRQ_IPI_RESCHED);
+}
+
+static void smp4m_ipi_single(int cpu)
{
- int next = cpu_data(cpu).next;
+ set_cpu_int(cpu, IRQ_IPI_SINGLE);
+}
- if (next != cpu)
- set_irq_udt(next);
+static void smp4m_ipi_mask_one(int cpu)
+{
+ set_cpu_int(cpu, IRQ_IPI_MASK);
}
static struct smp_funcall {
@@ -199,10 +209,10 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
{
register int i;
- cpu_clear(smp_processor_id(), mask);
- cpus_and(mask, cpu_online_map, mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+ cpumask_and(&mask, cpu_online_mask, &mask);
for (i = 0; i < ncpus; i++) {
- if (cpu_isset(i, mask)) {
+ if (cpumask_test_cpu(i, &mask)) {
ccall_info.processors_in[i] = 0;
ccall_info.processors_out[i] = 0;
set_cpu_int(i, IRQ_CROSS_CALL);
@@ -218,7 +228,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
i = 0;
do {
- if (!cpu_isset(i, mask))
+ if (!cpumask_test_cpu(i, &mask))
continue;
while (!ccall_info.processors_in[i])
barrier();
@@ -226,7 +236,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
i = 0;
do {
- if (!cpu_isset(i, mask))
+ if (!cpumask_test_cpu(i, &mask))
continue;
while (!ccall_info.processors_out[i])
barrier();
@@ -277,7 +287,7 @@ static void __cpuinit smp_setup_percpu_timer(void)
load_profile_irq(cpu, lvl14_resolution);
if (cpu == boot_cpu_id)
- enable_pil_irq(14);
+ sun4m_unmask_profile_irq();
}
static void __init smp4m_blackbox_id(unsigned *addr)
@@ -306,4 +316,7 @@ void __init sun4m_init_smp(void)
BTFIXUPSET_BLACKBOX(load_current, smp4m_blackbox_current);
BTFIXUPSET_CALL(smp_cross_call, smp4m_cross_call, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4m_processor_id, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_resched, smp4m_ipi_resched, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_single, smp4m_ipi_single, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_mask_one, smp4m_ipi_mask_one, BTFIXUPCALL_NORM);
}
diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c
index 1eb8b00aed7..7408201d7ef 100644
--- a/arch/sparc/kernel/sysfs.c
+++ b/arch/sparc/kernel/sysfs.c
@@ -103,9 +103,10 @@ static unsigned long run_on_cpu(unsigned long cpu,
unsigned long (*func)(unsigned long),
unsigned long arg)
{
- cpumask_t old_affinity = current->cpus_allowed;
+ cpumask_t old_affinity;
unsigned long ret;
+ cpumask_copy(&old_affinity, tsk_cpus_allowed(current));
/* should return -EINVAL to userspace */
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
return 0;
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 47ac73c32e8..332c83ff770 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -84,4 +84,4 @@ sys_call_table:
/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
/*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-/*335*/ .long sys_syncfs
+/*335*/ .long sys_syncfs, sys_sendmmsg
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 4f3170c1ef4..43887ca0be0 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -85,7 +85,7 @@ sys_call_table32:
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
/*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
- .word sys_syncfs
+ .word sys_syncfs, compat_sys_sendmmsg
#endif /* CONFIG_COMPAT */
@@ -162,4 +162,4 @@ sys_call_table:
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
/*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
- .word sys_syncfs
+ .word sys_syncfs, sys_sendmmsg
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 96046a4024c..1060e0672a4 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -228,14 +228,10 @@ static void __init sbus_time_init(void)
void __init time_init(void)
{
-#ifdef CONFIG_PCI
- extern void pci_time_init(void);
- if (pcic_present()) {
+ if (pcic_present())
pci_time_init();
- return;
- }
-#endif
- sbus_time_init();
+ else
+ sbus_time_init();
}
diff --git a/arch/sparc/kernel/us2e_cpufreq.c b/arch/sparc/kernel/us2e_cpufreq.c
index 8f982b76c71..531d54fc982 100644
--- a/arch/sparc/kernel/us2e_cpufreq.c
+++ b/arch/sparc/kernel/us2e_cpufreq.c
@@ -237,7 +237,7 @@ static unsigned int us2e_freq_get(unsigned int cpu)
if (!cpu_online(cpu))
return 0;
- cpus_allowed = current->cpus_allowed;
+ cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
set_cpus_allowed_ptr(current, cpumask_of(cpu));
clock_tick = sparc64_get_clock_tick(cpu) / 1000;
@@ -258,7 +258,7 @@ static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index)
if (!cpu_online(cpu))
return;
- cpus_allowed = current->cpus_allowed;
+ cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
set_cpus_allowed_ptr(current, cpumask_of(cpu));
new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/arch/sparc/kernel/us3_cpufreq.c b/arch/sparc/kernel/us3_cpufreq.c
index f35d1e79454..9a8ceb70083 100644
--- a/arch/sparc/kernel/us3_cpufreq.c
+++ b/arch/sparc/kernel/us3_cpufreq.c
@@ -85,7 +85,7 @@ static unsigned int us3_freq_get(unsigned int cpu)
if (!cpu_online(cpu))
return 0;
- cpus_allowed = current->cpus_allowed;
+ cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
set_cpus_allowed_ptr(current, cpumask_of(cpu));
reg = read_safari_cfg();
@@ -105,7 +105,7 @@ static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index)
if (!cpu_online(cpu))
return;
- cpus_allowed = current->cpus_allowed;
+ cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
set_cpus_allowed_ptr(current, cpumask_of(cpu));
new_freq = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 846d1c4374e..7f01b8fce8b 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -15,7 +15,6 @@ lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o
lib-$(CONFIG_SPARC32) += copy_user.o locks.o
lib-y += atomic_$(BITS).o
lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
-lib-$(CONFIG_SPARC32) += rwsem_32.o
lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
diff --git a/arch/sparc/lib/rwsem_32.S b/arch/sparc/lib/rwsem_32.S
deleted file mode 100644
index 9675268e7fd..00000000000
--- a/arch/sparc/lib/rwsem_32.S
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Assembly part of rw semaphores.
- *
- * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com)
- */
-
-#include <asm/ptrace.h>
-#include <asm/psr.h>
-
- .section .sched.text, "ax"
- .align 4
-
- .globl ___down_read
-___down_read:
- rd %psr, %g3
- nop
- nop
- nop
- or %g3, PSR_PIL, %g7
- wr %g7, 0, %psr
- nop
- nop
- nop
-#ifdef CONFIG_SMP
-1: ldstub [%g1 + 4], %g7
- tst %g7
- bne 1b
- ld [%g1], %g7
- sub %g7, 1, %g7
- st %g7, [%g1]
- stb %g0, [%g1 + 4]
-#else
- ld [%g1], %g7
- sub %g7, 1, %g7
- st %g7, [%g1]
-#endif
- wr %g3, 0, %psr
- add %g7, 1, %g7
- nop
- nop
- subcc %g7, 1, %g7
- bneg 3f
- nop
-2: jmpl %o7, %g0
- mov %g4, %o7
-3: save %sp, -64, %sp
- mov %g1, %l1
- mov %g4, %l4
- bcs 4f
- mov %g5, %l5
- call down_read_failed
- mov %l1, %o0
- mov %l1, %g1
- mov %l4, %g4
- ba ___down_read
- restore %l5, %g0, %g5
-4: call down_read_failed_biased
- mov %l1, %o0
- mov %l1, %g1
- mov %l4, %g4
- ba 2b
- restore %l5, %g0, %g5
-
- .globl ___down_write
-___down_write:
- rd %psr, %g3
- nop
- nop
- nop
- or %g3, PSR_PIL, %g7
- wr %g7, 0, %psr
- sethi %hi(0x01000000), %g2
- nop
- nop
-#ifdef CONFIG_SMP
-1: ldstub [%g1 + 4], %g7
- tst %g7
- bne 1b
- ld [%g1], %g7
- sub %g7, %g2, %g7
- st %g7, [%g1]
- stb %g0, [%g1 + 4]
-#else
- ld [%g1], %g7
- sub %g7, %g2, %g7
- st %g7, [%g1]
-#endif
- wr %g3, 0, %psr
- add %g7, %g2, %g7
- nop
- nop
- subcc %g7, %g2, %g7
- bne 3f
- nop
-2: jmpl %o7, %g0
- mov %g4, %o7
-3: save %sp, -64, %sp
- mov %g1, %l1
- mov %g4, %l4
- bcs 4f
- mov %g5, %l5
- call down_write_failed
- mov %l1, %o0
- mov %l1, %g1
- mov %l4, %g4
- ba ___down_write
- restore %l5, %g0, %g5
-4: call down_write_failed_biased
- mov %l1, %o0
- mov %l1, %g1
- mov %l4, %g4
- ba 2b
- restore %l5, %g0, %g5
-
- .text
- .globl ___up_read
-___up_read:
- rd %psr, %g3
- nop
- nop
- nop
- or %g3, PSR_PIL, %g7
- wr %g7, 0, %psr
- nop
- nop
- nop
-#ifdef CONFIG_SMP
-1: ldstub [%g1 + 4], %g7
- tst %g7
- bne 1b
- ld [%g1], %g7
- add %g7, 1, %g7
- st %g7, [%g1]
- stb %g0, [%g1 + 4]
-#else
- ld [%g1], %g7
- add %g7, 1, %g7
- st %g7, [%g1]
-#endif
- wr %g3, 0, %psr
- nop
- nop
- nop
- cmp %g7, 0
- be 3f
- nop
-2: jmpl %o7, %g0
- mov %g4, %o7
-3: save %sp, -64, %sp
- mov %g1, %l1
- mov %g4, %l4
- mov %g5, %l5
- clr %o1
- call __rwsem_wake
- mov %l1, %o0
- mov %l1, %g1
- mov %l4, %g4
- ba 2b
- restore %l5, %g0, %g5
-
- .globl ___up_write
-___up_write:
- rd %psr, %g3
- nop
- nop
- nop
- or %g3, PSR_PIL, %g7
- wr %g7, 0, %psr
- sethi %hi(0x01000000), %g2
- nop
- nop
-#ifdef CONFIG_SMP
-1: ldstub [%g1 + 4], %g7
- tst %g7
- bne 1b
- ld [%g1], %g7
- add %g7, %g2, %g7
- st %g7, [%g1]
- stb %g0, [%g1 + 4]
-#else
- ld [%g1], %g7
- add %g7, %g2, %g7
- st %g7, [%g1]
-#endif
- wr %g3, 0, %psr
- sub %g7, %g2, %g7
- nop
- nop
- addcc %g7, %g2, %g7
- bcs 3f
- nop
-2: jmpl %o7, %g0
- mov %g4, %o7
-3: save %sp, -64, %sp
- mov %g1, %l1
- mov %g4, %l4
- mov %g5, %l5
- mov %g7, %o1
- call __rwsem_wake
- mov %l1, %o0
- mov %l1, %g1
- mov %l4, %g4
- ba 2b
- restore %l5, %g0, %g5
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 2f6ae1d1fb6..e10cd03fab8 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -862,7 +862,7 @@ static void init_node_masks_nonnuma(void)
for (i = 0; i < NR_CPUS; i++)
numa_cpu_lookup_table[i] = 0;
- numa_cpumask_lookup_table[0] = CPU_MASK_ALL;
+ cpumask_setall(&numa_cpumask_lookup_table[0]);
}
#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -1080,7 +1080,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
{
u64 arc;
- cpus_clear(*mask);
+ cpumask_clear(mask);
mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) {
u64 target = mdesc_arc_target(md, arc);
@@ -1091,7 +1091,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
continue;
id = mdesc_get_property(md, target, "id", NULL);
if (*id < nr_cpu_ids)
- cpu_set(*id, *mask);
+ cpumask_set_cpu(*id, mask);
}
}
@@ -1153,13 +1153,13 @@ static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp,
numa_parse_mdesc_group_cpus(md, grp, &mask);
- for_each_cpu_mask(cpu, mask)
+ for_each_cpu(cpu, &mask)
numa_cpu_lookup_table[cpu] = index;
- numa_cpumask_lookup_table[index] = mask;
+ cpumask_copy(&numa_cpumask_lookup_table[index], &mask);
if (numa_debug) {
printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index);
- for_each_cpu_mask(cpu, mask)
+ for_each_cpu(cpu, &mask)
printk("%d ", cpu);
printk("]\n");
}
@@ -1218,7 +1218,7 @@ static int __init numa_parse_jbus(void)
index = 0;
for_each_present_cpu(cpu) {
numa_cpu_lookup_table[cpu] = index;
- numa_cpumask_lookup_table[index] = cpumask_of_cpu(cpu);
+ cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu));
node_masks[index].mask = ~((1UL << 36UL) - 1UL);
node_masks[index].val = cpu << 36UL;
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 0e103236b75..0e9dec6cadd 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -15,3 +15,4 @@ obj-y += vdso/
obj-$(CONFIG_IA32_EMULATION) += ia32/
obj-y += platform/
+obj-y += net/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4168e5d8632..880fcb6c86f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -72,6 +72,7 @@ config X86
select GENERIC_IRQ_SHOW
select IRQ_FORCED_THREADING
select USE_GENERIC_SMP_HELPERS if SMP
+ select HAVE_BPF_JIT if (X86_64 && NET)
config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS)
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 1a58ad89fdf..c04f1b7a913 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,8 +2,6 @@
# Arch-specific CryptoAPI modules.
#
-obj-$(CONFIG_CRYPTO_FPU) += fpu.o
-
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
@@ -24,6 +22,6 @@ aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
-aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
+aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 2577613fb32..feee8ff1d05 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -94,6 +94,10 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
+
+int crypto_fpu_init(void);
+void crypto_fpu_exit(void);
+
#ifdef CONFIG_X86_64
asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
@@ -1257,6 +1261,8 @@ static int __init aesni_init(void)
return -ENODEV;
}
+ if ((err = crypto_fpu_init()))
+ goto fpu_err;
if ((err = crypto_register_alg(&aesni_alg)))
goto aes_err;
if ((err = crypto_register_alg(&__aesni_alg)))
@@ -1334,6 +1340,7 @@ blk_ecb_err:
__aes_err:
crypto_unregister_alg(&aesni_alg);
aes_err:
+fpu_err:
return err;
}
@@ -1363,6 +1370,8 @@ static void __exit aesni_exit(void)
crypto_unregister_alg(&blk_ecb_alg);
crypto_unregister_alg(&__aesni_alg);
crypto_unregister_alg(&aesni_alg);
+
+ crypto_fpu_exit();
}
module_init(aesni_init);
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index 1a8f8649c03..98d7a188f46 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -150,18 +150,12 @@ static struct crypto_template crypto_fpu_tmpl = {
.module = THIS_MODULE,
};
-static int __init crypto_fpu_module_init(void)
+int __init crypto_fpu_init(void)
{
return crypto_register_template(&crypto_fpu_tmpl);
}
-static void __exit crypto_fpu_module_exit(void)
+void __exit crypto_fpu_exit(void)
{
crypto_unregister_template(&crypto_fpu_tmpl);
}
-
-module_init(crypto_fpu_module_init);
-module_exit(crypto_fpu_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("FPU block cipher wrapper");
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 849a9d23c71..95f5826be45 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -848,4 +848,5 @@ ia32_sys_call_table:
.quad compat_sys_open_by_handle_at
.quad compat_sys_clock_adjtime
.quad sys_syncfs
+ .quad compat_sys_sendmmsg /* 345 */
ia32_syscall_end:
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 0f521356432..0049211959c 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -14,6 +14,8 @@
#include <asm/desc_defs.h>
struct x86_emulate_ctxt;
+enum x86_intercept;
+enum x86_intercept_stage;
struct x86_exception {
u8 vector;
@@ -24,6 +26,24 @@ struct x86_exception {
};
/*
+ * This struct is used to carry enough information from the instruction
+ * decoder to main KVM so that a decision can be made whether the
+ * instruction needs to be intercepted or not.
+ */
+struct x86_instruction_info {
+ u8 intercept; /* which intercept */
+ u8 rep_prefix; /* rep prefix? */
+ u8 modrm_mod; /* mod part of modrm */
+ u8 modrm_reg; /* index of register used */
+ u8 modrm_rm; /* rm part of modrm */
+ u64 src_val; /* value of source operand */
+ u8 src_bytes; /* size of source operand */
+ u8 dst_bytes; /* size of destination operand */
+ u8 ad_bytes; /* size of src/dst address */
+ u64 next_rip; /* rip following the instruction */
+};
+
+/*
* x86_emulate_ops:
*
* These operations represent the instruction emulator's interface to memory.
@@ -62,6 +82,7 @@ struct x86_exception {
#define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */
#define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */
#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */
+#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */
struct x86_emulate_ops {
/*
@@ -71,8 +92,9 @@ struct x86_emulate_ops {
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
*/
- int (*read_std)(unsigned long addr, void *val,
- unsigned int bytes, struct kvm_vcpu *vcpu,
+ int (*read_std)(struct x86_emulate_ctxt *ctxt,
+ unsigned long addr, void *val,
+ unsigned int bytes,
struct x86_exception *fault);
/*
@@ -82,8 +104,8 @@ struct x86_emulate_ops {
* @val: [OUT] Value write to memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to write to memory.
*/
- int (*write_std)(unsigned long addr, void *val,
- unsigned int bytes, struct kvm_vcpu *vcpu,
+ int (*write_std)(struct x86_emulate_ctxt *ctxt,
+ unsigned long addr, void *val, unsigned int bytes,
struct x86_exception *fault);
/*
* fetch: Read bytes of standard (non-emulated/special) memory.
@@ -92,8 +114,8 @@ struct x86_emulate_ops {
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
*/
- int (*fetch)(unsigned long addr, void *val,
- unsigned int bytes, struct kvm_vcpu *vcpu,
+ int (*fetch)(struct x86_emulate_ctxt *ctxt,
+ unsigned long addr, void *val, unsigned int bytes,
struct x86_exception *fault);
/*
@@ -102,11 +124,9 @@ struct x86_emulate_ops {
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
*/
- int (*read_emulated)(unsigned long addr,
- void *val,
- unsigned int bytes,
- struct x86_exception *fault,
- struct kvm_vcpu *vcpu);
+ int (*read_emulated)(struct x86_emulate_ctxt *ctxt,
+ unsigned long addr, void *val, unsigned int bytes,
+ struct x86_exception *fault);
/*
* write_emulated: Write bytes to emulated/special memory area.
@@ -115,11 +135,10 @@ struct x86_emulate_ops {
* required).
* @bytes: [IN ] Number of bytes to write to memory.
*/
- int (*write_emulated)(unsigned long addr,
- const void *val,
+ int (*write_emulated)(struct x86_emulate_ctxt *ctxt,
+ unsigned long addr, const void *val,
unsigned int bytes,
- struct x86_exception *fault,
- struct kvm_vcpu *vcpu);
+ struct x86_exception *fault);
/*
* cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
@@ -129,40 +148,54 @@ struct x86_emulate_ops {
* @new: [IN ] Value to write to @addr.
* @bytes: [IN ] Number of bytes to access using CMPXCHG.
*/
- int (*cmpxchg_emulated)(unsigned long addr,
+ int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt,
+ unsigned long addr,
const void *old,
const void *new,
unsigned int bytes,
- struct x86_exception *fault,
- struct kvm_vcpu *vcpu);
-
- int (*pio_in_emulated)(int size, unsigned short port, void *val,
- unsigned int count, struct kvm_vcpu *vcpu);
-
- int (*pio_out_emulated)(int size, unsigned short port, const void *val,
- unsigned int count, struct kvm_vcpu *vcpu);
-
- bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3,
- int seg, struct kvm_vcpu *vcpu);
- void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3,
- int seg, struct kvm_vcpu *vcpu);
- u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu);
- void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu);
- unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu);
- void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu);
- void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu);
- ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu);
- int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu);
- int (*cpl)(struct kvm_vcpu *vcpu);
- int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu);
- int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu);
- int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
- int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
+ struct x86_exception *fault);
+ void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr);
+
+ int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt,
+ int size, unsigned short port, void *val,
+ unsigned int count);
+
+ int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt,
+ int size, unsigned short port, const void *val,
+ unsigned int count);
+
+ bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector,
+ struct desc_struct *desc, u32 *base3, int seg);
+ void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector,
+ struct desc_struct *desc, u32 base3, int seg);
+ unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt,
+ int seg);
+ void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
+ void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
+ void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
+ void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
+ ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
+ int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
+ int (*cpl)(struct x86_emulate_ctxt *ctxt);
+ int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
+ int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
+ int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
+ int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
+ void (*halt)(struct x86_emulate_ctxt *ctxt);
+ void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
+ int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
+ void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */
+ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
+ int (*intercept)(struct x86_emulate_ctxt *ctxt,
+ struct x86_instruction_info *info,
+ enum x86_intercept_stage stage);
};
+typedef u32 __attribute__((vector_size(16))) sse128_t;
+
/* Type, address-of, and value of an instruction's operand. */
struct operand {
- enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
+ enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type;
unsigned int bytes;
union {
unsigned long orig_val;
@@ -174,11 +207,13 @@ struct operand {
ulong ea;
unsigned seg;
} mem;
+ unsigned xmm;
} addr;
union {
unsigned long val;
u64 val64;
char valptr[sizeof(unsigned long) + 2];
+ sse128_t vec_val;
};
};
@@ -197,6 +232,7 @@ struct read_cache {
struct decode_cache {
u8 twobyte;
u8 b;
+ u8 intercept;
u8 lock_prefix;
u8 rep_prefix;
u8 op_bytes;
@@ -209,6 +245,7 @@ struct decode_cache {
u8 seg_override;
unsigned int d;
int (*execute)(struct x86_emulate_ctxt *ctxt);
+ int (*check_perm)(struct x86_emulate_ctxt *ctxt);
unsigned long regs[NR_VCPU_REGS];
unsigned long eip;
/* modrm */
@@ -227,17 +264,15 @@ struct x86_emulate_ctxt {
struct x86_emulate_ops *ops;
/* Register state before/after emulation. */
- struct kvm_vcpu *vcpu;
-
unsigned long eflags;
unsigned long eip; /* eip before instruction emulation */
/* Emulated execution mode, represented by an X86EMUL_MODE value. */
int mode;
- u32 cs_base;
/* interruptibility state, as a result of execution of STI or MOV SS */
int interruptibility;
+ bool guest_mode; /* guest running a nested guest */
bool perm_ok; /* do not check permissions if true */
bool only_vendor_specific_insn;
@@ -249,8 +284,8 @@ struct x86_emulate_ctxt {
};
/* Repeat String Operation Prefix */
-#define REPE_PREFIX 1
-#define REPNE_PREFIX 2
+#define REPE_PREFIX 0xf3
+#define REPNE_PREFIX 0xf2
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
@@ -259,6 +294,69 @@ struct x86_emulate_ctxt {
#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */
#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
+/* any protected mode */
+#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
+ X86EMUL_MODE_PROT64)
+
+enum x86_intercept_stage {
+ X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */
+ X86_ICPT_PRE_EXCEPT,
+ X86_ICPT_POST_EXCEPT,
+ X86_ICPT_POST_MEMACCESS,
+};
+
+enum x86_intercept {
+ x86_intercept_none,
+ x86_intercept_cr_read,
+ x86_intercept_cr_write,
+ x86_intercept_clts,
+ x86_intercept_lmsw,
+ x86_intercept_smsw,
+ x86_intercept_dr_read,
+ x86_intercept_dr_write,
+ x86_intercept_lidt,
+ x86_intercept_sidt,
+ x86_intercept_lgdt,
+ x86_intercept_sgdt,
+ x86_intercept_lldt,
+ x86_intercept_sldt,
+ x86_intercept_ltr,
+ x86_intercept_str,
+ x86_intercept_rdtsc,
+ x86_intercept_rdpmc,
+ x86_intercept_pushf,
+ x86_intercept_popf,
+ x86_intercept_cpuid,
+ x86_intercept_rsm,
+ x86_intercept_iret,
+ x86_intercept_intn,
+ x86_intercept_invd,
+ x86_intercept_pause,
+ x86_intercept_hlt,
+ x86_intercept_invlpg,
+ x86_intercept_invlpga,
+ x86_intercept_vmrun,
+ x86_intercept_vmload,
+ x86_intercept_vmsave,
+ x86_intercept_vmmcall,
+ x86_intercept_stgi,
+ x86_intercept_clgi,
+ x86_intercept_skinit,
+ x86_intercept_rdtscp,
+ x86_intercept_icebp,
+ x86_intercept_wbinvd,
+ x86_intercept_monitor,
+ x86_intercept_mwait,
+ x86_intercept_rdmsr,
+ x86_intercept_wrmsr,
+ x86_intercept_in,
+ x86_intercept_ins,
+ x86_intercept_out,
+ x86_intercept_outs,
+
+ nr_x86_intercepts
+};
+
/* Host execution mode. */
#if defined(CONFIG_X86_32)
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
@@ -270,6 +368,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
#define EMULATION_FAILED -1
#define EMULATION_OK 0
#define EMULATION_RESTART 1
+#define EMULATION_INTERCEPTED 2
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
u16 tss_selector, int reason,
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c8af0991fdf..d2ac8e2ee89 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -30,14 +30,30 @@
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_MMIO_SIZE 16
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+#define CR0_RESERVED_BITS \
+ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
+ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
+ | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
+
#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
0xFFFFFF0000000000ULL)
+#define CR4_RESERVED_BITS \
+ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
+ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
+ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
+ | X86_CR4_OSXSAVE \
+ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
+
+#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
+
+
#define INVALID_PAGE (~(hpa_t)0)
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
@@ -118,6 +134,9 @@ enum kvm_reg {
enum kvm_reg_ex {
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
VCPU_EXREG_CR3,
+ VCPU_EXREG_RFLAGS,
+ VCPU_EXREG_CPL,
+ VCPU_EXREG_SEGMENTS,
};
enum {
@@ -256,7 +275,7 @@ struct kvm_mmu {
struct kvm_mmu_page *sp);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- u64 *spte, const void *pte, unsigned long mmu_seq);
+ u64 *spte, const void *pte);
hpa_t root_hpa;
int root_level;
int shadow_root_level;
@@ -340,7 +359,6 @@ struct kvm_vcpu_arch {
struct fpu guest_fpu;
u64 xcr0;
- gva_t mmio_fault_cr2;
struct kvm_pio_request pio;
void *pio_data;
@@ -367,18 +385,22 @@ struct kvm_vcpu_arch {
/* emulate context */
struct x86_emulate_ctxt emulate_ctxt;
+ bool emulate_regs_need_sync_to_vcpu;
+ bool emulate_regs_need_sync_from_vcpu;
gpa_t time;
struct pvclock_vcpu_time_info hv_clock;
unsigned int hw_tsc_khz;
unsigned int time_offset;
struct page *time_page;
- u64 last_host_tsc;
u64 last_guest_tsc;
u64 last_kernel_ns;
u64 last_tsc_nsec;
u64 last_tsc_write;
+ u32 virtual_tsc_khz;
bool tsc_catchup;
+ u32 tsc_catchup_mult;
+ s8 tsc_catchup_shift;
bool nmi_pending;
bool nmi_injected;
@@ -448,9 +470,6 @@ struct kvm_arch {
u64 last_tsc_nsec;
u64 last_tsc_offset;
u64 last_tsc_write;
- u32 virtual_tsc_khz;
- u32 virtual_tsc_mult;
- s8 virtual_tsc_shift;
struct kvm_xen_hvm_config xen_hvm_config;
@@ -502,6 +521,8 @@ struct kvm_vcpu_stat {
u32 nmi_injections;
};
+struct x86_instruction_info;
+
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
@@ -586,9 +607,17 @@ struct kvm_x86_ops {
bool (*has_wbinvd_exit)(void);
+ void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
+ u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
+
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
+
+ int (*check_intercept)(struct kvm_vcpu *vcpu,
+ struct x86_instruction_info *info,
+ enum x86_intercept_stage stage);
+
const struct trace_print_flags *exit_reasons_str;
};
@@ -627,6 +656,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
extern bool tdp_enabled;
+/* control of guest tsc rate supported? */
+extern bool kvm_has_tsc_control;
+/* minimum supported tsc_khz for guests */
+extern u32 kvm_min_guest_tsc_khz;
+/* maximum supported tsc_khz for guests */
+extern u32 kvm_max_guest_tsc_khz;
+
enum emulation_result {
EMULATE_DONE, /* no further processing */
EMULATE_DO_MMIO, /* kvm_run filled with mmio request */
@@ -645,9 +681,6 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
}
-void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
-void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
-
void kvm_enable_efer_bits(u64);
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
@@ -657,8 +690,6 @@ struct x86_emulate_ctxt;
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
-int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
-int emulate_clts(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
@@ -721,8 +752,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
-int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
-
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3cce71413d0..485b4f1f079 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -118,6 +118,7 @@
complete list. */
#define MSR_AMD64_PATCH_LEVEL 0x0000008b
+#define MSR_AMD64_TSC_RATIO 0xc0000104
#define MSR_AMD64_NB_CFG 0xc001001f
#define MSR_AMD64_PATCH_LOADER 0xc0010020
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 99f0ad753f3..99ddd148a76 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -6,7 +6,6 @@
#include <linux/errno.h>
#include <linux/compiler.h>
#include <linux/thread_info.h>
-#include <linux/prefetch.h>
#include <linux/string.h>
#include <asm/asm.h>
#include <asm/page.h>
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 088d09fb161..566e803cc60 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -6,7 +6,6 @@
*/
#include <linux/errno.h>
#include <linux/thread_info.h>
-#include <linux/prefetch.h>
#include <linux/string.h>
#include <asm/asm.h>
#include <asm/page.h>
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 316708d5af9..1c66d30971a 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -6,7 +6,6 @@
*/
#include <linux/compiler.h>
#include <linux/errno.h>
-#include <linux/prefetch.h>
#include <linux/lockdep.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index a755ef5e597..fb6a625c99b 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -350,10 +350,11 @@
#define __NR_open_by_handle_at 342
#define __NR_clock_adjtime 343
#define __NR_syncfs 344
+#define __NR_sendmmsg 345
#ifdef __KERNEL__
-#define NR_syscalls 345
+#define NR_syscalls 346
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 160fa76bd57..79f90eb15aa 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
#define __NR_syncfs 306
__SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_sendmmsg 307
+__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4fd173cd8e5..40a24932a8a 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -601,10 +601,7 @@ long sys_rt_sigreturn(struct pt_regs *regs)
goto badframe;
sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
+ set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
@@ -682,6 +679,7 @@ static int
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
sigset_t *oldset, struct pt_regs *regs)
{
+ sigset_t blocked;
int ret;
/* Are we from a system call? */
@@ -741,12 +739,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
*/
regs->flags &= ~X86_EFLAGS_TF;
- spin_lock_irq(&current->sighand->siglock);
- sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+ sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
if (!(ka->sa.sa_flags & SA_NODEFER))
- sigaddset(&current->blocked, sig);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
+ sigaddset(&blocked, sig);
+ set_current_blocked(&blocked);
tracehook_signal_handler(sig, info, ka, regs,
test_thread_flag(TIF_SINGLESTEP));
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index abce34d5c79..32cbffb0c49 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -344,3 +344,4 @@ ENTRY(sys_call_table)
.long sys_open_by_handle_at
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_sendmmsg /* 345 */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0ad47b819a8..d6e2477feb1 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -73,9 +73,14 @@
#define MemAbs (1<<11) /* Memory operand is absolute displacement */
#define String (1<<12) /* String instruction (rep capable) */
#define Stack (1<<13) /* Stack instruction (push/pop) */
+#define GroupMask (7<<14) /* Opcode uses one of the group mechanisms */
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
-#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
+#define GroupDual (2<<14) /* Alternate decoding of mod == 3 */
+#define Prefix (3<<14) /* Instruction varies with 66/f2/f3 prefix */
+#define RMExt (4<<14) /* Opcode extension in ModRM r/m if mod == 3 */
+#define Sse (1<<17) /* SSE Vector instruction */
/* Misc flags */
+#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
#define VendorSpecific (1<<22) /* Vendor specific instruction */
#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
#define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
@@ -102,11 +107,14 @@
struct opcode {
u32 flags;
+ u8 intercept;
union {
int (*execute)(struct x86_emulate_ctxt *ctxt);
struct opcode *group;
struct group_dual *gdual;
+ struct gprefix *gprefix;
} u;
+ int (*check_perm)(struct x86_emulate_ctxt *ctxt);
};
struct group_dual {
@@ -114,6 +122,13 @@ struct group_dual {
struct opcode mod3[8];
};
+struct gprefix {
+ struct opcode pfx_no;
+ struct opcode pfx_66;
+ struct opcode pfx_f2;
+ struct opcode pfx_f3;
+};
+
/* EFLAGS bit definitions. */
#define EFLG_ID (1<<21)
#define EFLG_VIP (1<<20)
@@ -248,42 +263,42 @@ struct group_dual {
"w", "r", _LO32, "r", "", "r")
/* Instruction has three operands and one operand is stored in ECX register */
-#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
- do { \
- unsigned long _tmp; \
- _type _clv = (_cl).val; \
- _type _srcv = (_src).val; \
- _type _dstv = (_dst).val; \
- \
- __asm__ __volatile__ ( \
- _PRE_EFLAGS("0", "5", "2") \
- _op _suffix " %4,%1 \n" \
- _POST_EFLAGS("0", "5", "2") \
- : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
- : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
- ); \
- \
- (_cl).val = (unsigned long) _clv; \
- (_src).val = (unsigned long) _srcv; \
- (_dst).val = (unsigned long) _dstv; \
+#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
+ do { \
+ unsigned long _tmp; \
+ _type _clv = (_cl).val; \
+ _type _srcv = (_src).val; \
+ _type _dstv = (_dst).val; \
+ \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "5", "2") \
+ _op _suffix " %4,%1 \n" \
+ _POST_EFLAGS("0", "5", "2") \
+ : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
+ : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
+ ); \
+ \
+ (_cl).val = (unsigned long) _clv; \
+ (_src).val = (unsigned long) _srcv; \
+ (_dst).val = (unsigned long) _dstv; \
} while (0)
-#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
- do { \
- switch ((_dst).bytes) { \
- case 2: \
- __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
- "w", unsigned short); \
- break; \
- case 4: \
- __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
- "l", unsigned int); \
- break; \
- case 8: \
- ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
- "q", unsigned long)); \
- break; \
- } \
+#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
+ do { \
+ switch ((_dst).bytes) { \
+ case 2: \
+ __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
+ "w", unsigned short); \
+ break; \
+ case 4: \
+ __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
+ "l", unsigned int); \
+ break; \
+ case 8: \
+ ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
+ "q", unsigned long)); \
+ break; \
+ } \
} while (0)
#define __emulate_1op(_op, _dst, _eflags, _suffix) \
@@ -346,13 +361,25 @@ struct group_dual {
} while (0)
/* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */
-#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \
- do { \
- switch((_src).bytes) { \
- case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \
- case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "w"); break; \
- case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \
- case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \
+#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \
+ do { \
+ switch((_src).bytes) { \
+ case 1: \
+ __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \
+ _eflags, "b"); \
+ break; \
+ case 2: \
+ __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \
+ _eflags, "w"); \
+ break; \
+ case 4: \
+ __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \
+ _eflags, "l"); \
+ break; \
+ case 8: \
+ ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \
+ _eflags, "q")); \
+ break; \
} \
} while (0)
@@ -388,13 +415,33 @@ struct group_dual {
(_type)_x; \
})
-#define insn_fetch_arr(_arr, _size, _eip) \
+#define insn_fetch_arr(_arr, _size, _eip) \
({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \
if (rc != X86EMUL_CONTINUE) \
goto done; \
(_eip) += (_size); \
})
+static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
+ enum x86_intercept intercept,
+ enum x86_intercept_stage stage)
+{
+ struct x86_instruction_info info = {
+ .intercept = intercept,
+ .rep_prefix = ctxt->decode.rep_prefix,
+ .modrm_mod = ctxt->decode.modrm_mod,
+ .modrm_reg = ctxt->decode.modrm_reg,
+ .modrm_rm = ctxt->decode.modrm_rm,
+ .src_val = ctxt->decode.src.val64,
+ .src_bytes = ctxt->decode.src.bytes,
+ .dst_bytes = ctxt->decode.dst.bytes,
+ .ad_bytes = ctxt->decode.ad_bytes,
+ .next_rip = ctxt->eip,
+ };
+
+ return ctxt->ops->intercept(ctxt, &info, stage);
+}
+
static inline unsigned long ad_mask(struct decode_cache *c)
{
return (1UL << (c->ad_bytes << 3)) - 1;
@@ -430,6 +477,13 @@ static inline void jmp_rel(struct decode_cache *c, int rel)
register_address_increment(c, &c->eip, rel);
}
+static u32 desc_limit_scaled(struct desc_struct *desc)
+{
+ u32 limit = get_desc_limit(desc);
+
+ return desc->g ? (limit << 12) | 0xfff : limit;
+}
+
static void set_seg_override(struct decode_cache *c, int seg)
{
c->has_seg_override = true;
@@ -442,11 +496,10 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt,
if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
return 0;
- return ops->get_cached_segment_base(seg, ctxt->vcpu);
+ return ops->get_cached_segment_base(ctxt, seg);
}
static unsigned seg_override(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops,
struct decode_cache *c)
{
if (!c->has_seg_override)
@@ -455,18 +508,6 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt,
return c->seg_override;
}
-static ulong linear(struct x86_emulate_ctxt *ctxt,
- struct segmented_address addr)
-{
- struct decode_cache *c = &ctxt->decode;
- ulong la;
-
- la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea;
- if (c->ad_bytes != 8)
- la &= (u32)-1;
- return la;
-}
-
static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
u32 error, bool valid)
{
@@ -476,11 +517,21 @@ static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
return X86EMUL_PROPAGATE_FAULT;
}
+static int emulate_db(struct x86_emulate_ctxt *ctxt)
+{
+ return emulate_exception(ctxt, DB_VECTOR, 0, false);
+}
+
static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
{
return emulate_exception(ctxt, GP_VECTOR, err, true);
}
+static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
+{
+ return emulate_exception(ctxt, SS_VECTOR, err, true);
+}
+
static int emulate_ud(struct x86_emulate_ctxt *ctxt)
{
return emulate_exception(ctxt, UD_VECTOR, 0, false);
@@ -496,6 +547,128 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt)
return emulate_exception(ctxt, DE_VECTOR, 0, false);
}
+static int emulate_nm(struct x86_emulate_ctxt *ctxt)
+{
+ return emulate_exception(ctxt, NM_VECTOR, 0, false);
+}
+
+static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
+{
+ u16 selector;
+ struct desc_struct desc;
+
+ ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
+ return selector;
+}
+
+static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
+ unsigned seg)
+{
+ u16 dummy;
+ u32 base3;
+ struct desc_struct desc;
+
+ ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
+ ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
+}
+
+static int __linearize(struct x86_emulate_ctxt *ctxt,
+ struct segmented_address addr,
+ unsigned size, bool write, bool fetch,
+ ulong *linear)
+{
+ struct decode_cache *c = &ctxt->decode;
+ struct desc_struct desc;
+ bool usable;
+ ulong la;
+ u32 lim;
+ u16 sel;
+ unsigned cpl, rpl;
+
+ la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea;
+ switch (ctxt->mode) {
+ case X86EMUL_MODE_REAL:
+ break;
+ case X86EMUL_MODE_PROT64:
+ if (((signed long)la << 16) >> 16 != la)
+ return emulate_gp(ctxt, 0);
+ break;
+ default:
+ usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
+ addr.seg);
+ if (!usable)
+ goto bad;
+ /* code segment or read-only data segment */
+ if (((desc.type & 8) || !(desc.type & 2)) && write)
+ goto bad;
+ /* unreadable code segment */
+ if (!fetch && (desc.type & 8) && !(desc.type & 2))
+ goto bad;
+ lim = desc_limit_scaled(&desc);
+ if ((desc.type & 8) || !(desc.type & 4)) {
+ /* expand-up segment */
+ if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
+ goto bad;
+ } else {
+ /* exapand-down segment */
+ if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
+ goto bad;
+ lim = desc.d ? 0xffffffff : 0xffff;
+ if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
+ goto bad;
+ }
+ cpl = ctxt->ops->cpl(ctxt);
+ rpl = sel & 3;
+ cpl = max(cpl, rpl);
+ if (!(desc.type & 8)) {
+ /* data segment */
+ if (cpl > desc.dpl)
+ goto bad;
+ } else if ((desc.type & 8) && !(desc.type & 4)) {
+ /* nonconforming code segment */
+ if (cpl != desc.dpl)
+ goto bad;
+ } else if ((desc.type & 8) && (desc.type & 4)) {
+ /* conforming code segment */
+ if (cpl < desc.dpl)
+ goto bad;
+ }
+ break;
+ }
+ if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : c->ad_bytes != 8)
+ la &= (u32)-1;
+ *linear = la;
+ return X86EMUL_CONTINUE;
+bad:
+ if (addr.seg == VCPU_SREG_SS)
+ return emulate_ss(ctxt, addr.seg);
+ else
+ return emulate_gp(ctxt, addr.seg);
+}
+
+static int linearize(struct x86_emulate_ctxt *ctxt,
+ struct segmented_address addr,
+ unsigned size, bool write,
+ ulong *linear)
+{
+ return __linearize(ctxt, addr, size, write, false, linear);
+}
+
+
+static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
+ struct segmented_address addr,
+ void *data,
+ unsigned size)
+{
+ int rc;
+ ulong linear;
+
+ rc = linearize(ctxt, addr, size, false, &linear);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
+}
+
static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops,
unsigned long eip, u8 *dest)
@@ -505,10 +678,15 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
int size, cur_size;
if (eip == fc->end) {
+ unsigned long linear;
+ struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip};
cur_size = fc->end - fc->start;
size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
- rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size,
- size, ctxt->vcpu, &ctxt->exception);
+ rc = __linearize(ctxt, addr, size, false, true, &linear);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ rc = ops->fetch(ctxt, linear, fc->data + cur_size,
+ size, &ctxt->exception);
if (rc != X86EMUL_CONTINUE)
return rc;
fc->end += size;
@@ -551,7 +729,6 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs,
}
static int read_descriptor(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops,
struct segmented_address addr,
u16 *size, unsigned long *address, int op_bytes)
{
@@ -560,13 +737,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
if (op_bytes == 2)
op_bytes = 3;
*address = 0;
- rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2,
- ctxt->vcpu, &ctxt->exception);
+ rc = segmented_read_std(ctxt, addr, size, 2);
if (rc != X86EMUL_CONTINUE)
return rc;
addr.ea += 2;
- rc = ops->read_std(linear(ctxt, addr), address, op_bytes,
- ctxt->vcpu, &ctxt->exception);
+ rc = segmented_read_std(ctxt, addr, address, op_bytes);
return rc;
}
@@ -623,7 +798,63 @@ static void fetch_register_operand(struct operand *op)
}
}
-static void decode_register_operand(struct operand *op,
+static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
+{
+ ctxt->ops->get_fpu(ctxt);
+ switch (reg) {
+ case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
+ case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
+ case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
+ case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
+ case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
+ case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
+ case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
+ case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
+#ifdef CONFIG_X86_64
+ case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
+ case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
+ case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
+ case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
+ case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
+ case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
+ case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
+ case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
+#endif
+ default: BUG();
+ }
+ ctxt->ops->put_fpu(ctxt);
+}
+
+static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
+ int reg)
+{
+ ctxt->ops->get_fpu(ctxt);
+ switch (reg) {
+ case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
+ case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
+ case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
+ case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
+ case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
+ case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
+ case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
+ case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
+#ifdef CONFIG_X86_64
+ case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
+ case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
+ case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
+ case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
+ case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
+ case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
+ case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
+ case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
+#endif
+ default: BUG();
+ }
+ ctxt->ops->put_fpu(ctxt);
+}
+
+static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
+ struct operand *op,
struct decode_cache *c,
int inhibit_bytereg)
{
@@ -632,6 +863,15 @@ static void decode_register_operand(struct operand *op,
if (!(c->d & ModRM))
reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
+
+ if (c->d & Sse) {
+ op->type = OP_XMM;
+ op->bytes = 16;
+ op->addr.xmm = reg;
+ read_sse_reg(ctxt, &op->vec_val, reg);
+ return;
+ }
+
op->type = OP_REG;
if ((c->d & ByteOp) && !inhibit_bytereg) {
op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
@@ -671,6 +911,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
op->addr.reg = decode_register(c->modrm_rm,
c->regs, c->d & ByteOp);
+ if (c->d & Sse) {
+ op->type = OP_XMM;
+ op->bytes = 16;
+ op->addr.xmm = c->modrm_rm;
+ read_sse_reg(ctxt, &op->vec_val, c->modrm_rm);
+ return rc;
+ }
fetch_register_operand(op);
return rc;
}
@@ -819,8 +1066,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
if (mc->pos < mc->end)
goto read_cached;
- rc = ops->read_emulated(addr, mc->data + mc->end, n,
- &ctxt->exception, ctxt->vcpu);
+ rc = ops->read_emulated(ctxt, addr, mc->data + mc->end, n,
+ &ctxt->exception);
if (rc != X86EMUL_CONTINUE)
return rc;
mc->end += n;
@@ -834,6 +1081,50 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
return X86EMUL_CONTINUE;
}
+static int segmented_read(struct x86_emulate_ctxt *ctxt,
+ struct segmented_address addr,
+ void *data,
+ unsigned size)
+{
+ int rc;
+ ulong linear;
+
+ rc = linearize(ctxt, addr, size, false, &linear);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ return read_emulated(ctxt, ctxt->ops, linear, data, size);
+}
+
+static int segmented_write(struct x86_emulate_ctxt *ctxt,
+ struct segmented_address addr,
+ const void *data,
+ unsigned size)
+{
+ int rc;
+ ulong linear;
+
+ rc = linearize(ctxt, addr, size, true, &linear);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ return ctxt->ops->write_emulated(ctxt, linear, data, size,
+ &ctxt->exception);
+}
+
+static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
+ struct segmented_address addr,
+ const void *orig_data, const void *data,
+ unsigned size)
+{
+ int rc;
+ ulong linear;
+
+ rc = linearize(ctxt, addr, size, true, &linear);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
+ size, &ctxt->exception);
+}
+
static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops,
unsigned int size, unsigned short port,
@@ -854,7 +1145,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
if (n == 0)
n = 1;
rc->pos = rc->end = 0;
- if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu))
+ if (!ops->pio_in_emulated(ctxt, size, port, rc->data, n))
return 0;
rc->end = n * size;
}
@@ -864,28 +1155,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
return 1;
}
-static u32 desc_limit_scaled(struct desc_struct *desc)
-{
- u32 limit = get_desc_limit(desc);
-
- return desc->g ? (limit << 12) | 0xfff : limit;
-}
-
static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops,
u16 selector, struct desc_ptr *dt)
{
if (selector & 1 << 2) {
struct desc_struct desc;
+ u16 sel;
+
memset (dt, 0, sizeof *dt);
- if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR,
- ctxt->vcpu))
+ if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR))
return;
dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
dt->address = get_desc_base(&desc);
} else
- ops->get_gdt(dt, ctxt->vcpu);
+ ops->get_gdt(ctxt, dt);
}
/* allowed just for 8 bytes segments */
@@ -903,8 +1188,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
if (dt.size < index * 8 + 7)
return emulate_gp(ctxt, selector & 0xfffc);
addr = dt.address + index * 8;
- ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu,
- &ctxt->exception);
+ ret = ops->read_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception);
return ret;
}
@@ -925,8 +1209,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
return emulate_gp(ctxt, selector & 0xfffc);
addr = dt.address + index * 8;
- ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu,
- &ctxt->exception);
+ ret = ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception);
return ret;
}
@@ -986,7 +1269,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
rpl = selector & 3;
dpl = seg_desc.dpl;
- cpl = ops->cpl(ctxt->vcpu);
+ cpl = ops->cpl(ctxt);
switch (seg) {
case VCPU_SREG_SS:
@@ -1042,8 +1325,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
return ret;
}
load:
- ops->set_segment_selector(selector, seg, ctxt->vcpu);
- ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu);
+ ops->set_segment(ctxt, selector, &seg_desc, 0, seg);
return X86EMUL_CONTINUE;
exception:
emulate_exception(ctxt, err_vec, err_code, true);
@@ -1069,8 +1351,7 @@ static void write_register_operand(struct operand *op)
}
}
-static inline int writeback(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+static int writeback(struct x86_emulate_ctxt *ctxt)
{
int rc;
struct decode_cache *c = &ctxt->decode;
@@ -1081,23 +1362,22 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
break;
case OP_MEM:
if (c->lock_prefix)
- rc = ops->cmpxchg_emulated(
- linear(ctxt, c->dst.addr.mem),
- &c->dst.orig_val,
- &c->dst.val,
- c->dst.bytes,
- &ctxt->exception,
- ctxt->vcpu);
+ rc = segmented_cmpxchg(ctxt,
+ c->dst.addr.mem,
+ &c->dst.orig_val,
+ &c->dst.val,
+ c->dst.bytes);
else
- rc = ops->write_emulated(
- linear(ctxt, c->dst.addr.mem),
- &c->dst.val,
- c->dst.bytes,
- &ctxt->exception,
- ctxt->vcpu);
+ rc = segmented_write(ctxt,
+ c->dst.addr.mem,
+ &c->dst.val,
+ c->dst.bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
break;
+ case OP_XMM:
+ write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm);
+ break;
case OP_NONE:
/* no writeback */
break;
@@ -1107,21 +1387,21 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
return X86EMUL_CONTINUE;
}
-static inline void emulate_push(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+static int em_push(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
+ struct segmented_address addr;
- c->dst.type = OP_MEM;
- c->dst.bytes = c->op_bytes;
- c->dst.val = c->src.val;
register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
- c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
- c->dst.addr.mem.seg = VCPU_SREG_SS;
+ addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
+ addr.seg = VCPU_SREG_SS;
+
+ /* Disable writeback. */
+ c->dst.type = OP_NONE;
+ return segmented_write(ctxt, addr, &c->src.val, c->op_bytes);
}
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops,
void *dest, int len)
{
struct decode_cache *c = &ctxt->decode;
@@ -1130,7 +1410,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
addr.seg = VCPU_SREG_SS;
- rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len);
+ rc = segmented_read(ctxt, addr, dest, len);
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -1138,6 +1418,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
return rc;
}
+static int em_pop(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ return emulate_pop(ctxt, &c->dst.val, c->op_bytes);
+}
+
static int emulate_popf(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops,
void *dest, int len)
@@ -1145,9 +1432,9 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
int rc;
unsigned long val, change_mask;
int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
- int cpl = ops->cpl(ctxt->vcpu);
+ int cpl = ops->cpl(ctxt);
- rc = emulate_pop(ctxt, ops, &val, len);
+ rc = emulate_pop(ctxt, &val, len);
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -1179,14 +1466,24 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
return rc;
}
-static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops, int seg)
+static int em_popf(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
- c->src.val = ops->get_segment_selector(seg, ctxt->vcpu);
+ c->dst.type = OP_REG;
+ c->dst.addr.reg = &ctxt->eflags;
+ c->dst.bytes = c->op_bytes;
+ return emulate_popf(ctxt, ctxt->ops, &c->dst.val, c->op_bytes);
+}
- emulate_push(ctxt, ops);
+static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops, int seg)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ c->src.val = get_segment_selector(ctxt, seg);
+
+ return em_push(ctxt);
}
static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
@@ -1196,7 +1493,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
unsigned long selector;
int rc;
- rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
+ rc = emulate_pop(ctxt, &selector, c->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -1204,8 +1501,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
return rc;
}
-static int emulate_pusha(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+static int em_pusha(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
unsigned long old_esp = c->regs[VCPU_REGS_RSP];
@@ -1216,23 +1512,25 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt,
(reg == VCPU_REGS_RSP) ?
(c->src.val = old_esp) : (c->src.val = c->regs[reg]);
- emulate_push(ctxt, ops);
-
- rc = writeback(ctxt, ops);
+ rc = em_push(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc;
++reg;
}
- /* Disable writeback. */
- c->dst.type = OP_NONE;
-
return rc;
}
-static int emulate_popa(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+static int em_pushf(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ c->src.val = (unsigned long)ctxt->eflags;
+ return em_push(ctxt);
+}
+
+static int em_popa(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
int rc = X86EMUL_CONTINUE;
@@ -1245,7 +1543,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt,
--reg;
}
- rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
+ rc = emulate_pop(ctxt, &c->regs[reg], c->op_bytes);
if (rc != X86EMUL_CONTINUE)
break;
--reg;
@@ -1265,37 +1563,32 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt,
/* TODO: Add limit checks */
c->src.val = ctxt->eflags;
- emulate_push(ctxt, ops);
- rc = writeback(ctxt, ops);
+ rc = em_push(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc;
ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
- c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
- emulate_push(ctxt, ops);
- rc = writeback(ctxt, ops);
+ c->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
+ rc = em_push(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc;
c->src.val = c->eip;
- emulate_push(ctxt, ops);
- rc = writeback(ctxt, ops);
+ rc = em_push(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc;
- c->dst.type = OP_NONE;
-
- ops->get_idt(&dt, ctxt->vcpu);
+ ops->get_idt(ctxt, &dt);
eip_addr = dt.address + (irq << 2);
cs_addr = dt.address + (irq << 2) + 2;
- rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception);
+ rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception);
+ rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -1339,7 +1632,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt,
/* TODO: Add stack limit check */
- rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes);
+ rc = emulate_pop(ctxt, &temp_eip, c->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -1347,12 +1640,12 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt,
if (temp_eip & ~0xffff)
return emulate_gp(ctxt, 0);
- rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
+ rc = emulate_pop(ctxt, &cs, c->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes);
+ rc = emulate_pop(ctxt, &temp_eflags, c->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -1394,15 +1687,31 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt,
}
}
-static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc;
+ unsigned short sel;
+
+ memcpy(&sel, c->src.valptr + c->op_bytes, 2);
+
+ rc = load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ c->eip = 0;
+ memcpy(&c->eip, c->src.valptr, c->op_bytes);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_grp1a(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
- return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
+ return emulate_pop(ctxt, &c->dst.val, c->dst.bytes);
}
-static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
+static int em_grp2(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
switch (c->modrm_reg) {
@@ -1429,10 +1738,10 @@ static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
break;
}
+ return X86EMUL_CONTINUE;
}
-static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+static int em_grp3(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
unsigned long *rax = &c->regs[VCPU_REGS_RAX];
@@ -1471,10 +1780,10 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
return X86EMUL_CONTINUE;
}
-static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+static int em_grp45(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
+ int rc = X86EMUL_CONTINUE;
switch (c->modrm_reg) {
case 0: /* inc */
@@ -1488,21 +1797,23 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
old_eip = c->eip;
c->eip = c->src.val;
c->src.val = old_eip;
- emulate_push(ctxt, ops);
+ rc = em_push(ctxt);
break;
}
case 4: /* jmp abs */
c->eip = c->src.val;
break;
+ case 5: /* jmp far */
+ rc = em_jmp_far(ctxt);
+ break;
case 6: /* push */
- emulate_push(ctxt, ops);
+ rc = em_push(ctxt);
break;
}
- return X86EMUL_CONTINUE;
+ return rc;
}
-static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+static int em_grp9(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
u64 old = c->dst.orig_val64;
@@ -1528,12 +1839,12 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
int rc;
unsigned long cs;
- rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
+ rc = emulate_pop(ctxt, &c->eip, c->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
if (c->op_bytes == 4)
c->eip = (u32)c->eip;
- rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
+ rc = emulate_pop(ctxt, &cs, c->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
@@ -1562,8 +1873,10 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops, struct desc_struct *cs,
struct desc_struct *ss)
{
+ u16 selector;
+
memset(cs, 0, sizeof(struct desc_struct));
- ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu);
+ ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS);
memset(ss, 0, sizeof(struct desc_struct));
cs->l = 0; /* will be adjusted later */
@@ -1593,44 +1906,44 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
struct desc_struct cs, ss;
u64 msr_data;
u16 cs_sel, ss_sel;
+ u64 efer = 0;
/* syscall is not available in real mode */
if (ctxt->mode == X86EMUL_MODE_REAL ||
ctxt->mode == X86EMUL_MODE_VM86)
return emulate_ud(ctxt);
+ ops->get_msr(ctxt, MSR_EFER, &efer);
setup_syscalls_segments(ctxt, ops, &cs, &ss);
- ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
+ ops->get_msr(ctxt, MSR_STAR, &msr_data);
msr_data >>= 32;
cs_sel = (u16)(msr_data & 0xfffc);
ss_sel = (u16)(msr_data + 8);
- if (is_long_mode(ctxt->vcpu)) {
+ if (efer & EFER_LMA) {
cs.d = 0;
cs.l = 1;
}
- ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu);
- ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
- ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu);
- ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
+ ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
+ ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
c->regs[VCPU_REGS_RCX] = c->eip;
- if (is_long_mode(ctxt->vcpu)) {
+ if (efer & EFER_LMA) {
#ifdef CONFIG_X86_64
c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
- ops->get_msr(ctxt->vcpu,
+ ops->get_msr(ctxt,
ctxt->mode == X86EMUL_MODE_PROT64 ?
MSR_LSTAR : MSR_CSTAR, &msr_data);
c->eip = msr_data;
- ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
+ ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
ctxt->eflags &= ~(msr_data | EFLG_RF);
#endif
} else {
/* legacy mode */
- ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
+ ops->get_msr(ctxt, MSR_STAR, &msr_data);
c->eip = (u32)msr_data;
ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
@@ -1646,7 +1959,9 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
struct desc_struct cs, ss;
u64 msr_data;
u16 cs_sel, ss_sel;
+ u64 efer = 0;
+ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
/* inject #GP if in real mode */
if (ctxt->mode == X86EMUL_MODE_REAL)
return emulate_gp(ctxt, 0);
@@ -1659,7 +1974,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
setup_syscalls_segments(ctxt, ops, &cs, &ss);
- ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
+ ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
switch (ctxt->mode) {
case X86EMUL_MODE_PROT32:
if ((msr_data & 0xfffc) == 0x0)
@@ -1676,21 +1991,18 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
cs_sel &= ~SELECTOR_RPL_MASK;
ss_sel = cs_sel + 8;
ss_sel &= ~SELECTOR_RPL_MASK;
- if (ctxt->mode == X86EMUL_MODE_PROT64
- || is_long_mode(ctxt->vcpu)) {
+ if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) {
cs.d = 0;
cs.l = 1;
}
- ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu);
- ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
- ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu);
- ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
+ ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
+ ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
- ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
+ ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
c->eip = msr_data;
- ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
+ ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
c->regs[VCPU_REGS_RSP] = msr_data;
return X86EMUL_CONTINUE;
@@ -1719,7 +2031,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
cs.dpl = 3;
ss.dpl = 3;
- ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
+ ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
switch (usermode) {
case X86EMUL_MODE_PROT32:
cs_sel = (u16)(msr_data + 16);
@@ -1739,10 +2051,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
cs_sel |= SELECTOR_RPL_MASK;
ss_sel |= SELECTOR_RPL_MASK;
- ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu);
- ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
- ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu);
- ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
+ ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
+ ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
c->eip = c->regs[VCPU_REGS_RDX];
c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX];
@@ -1759,7 +2069,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
if (ctxt->mode == X86EMUL_MODE_VM86)
return true;
iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
- return ops->cpl(ctxt->vcpu) > iopl;
+ return ops->cpl(ctxt) > iopl;
}
static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
@@ -1769,11 +2079,11 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
struct desc_struct tr_seg;
u32 base3;
int r;
- u16 io_bitmap_ptr, perm, bit_idx = port & 0x7;
+ u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
unsigned mask = (1 << len) - 1;
unsigned long base;
- ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu);
+ ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
if (!tr_seg.p)
return false;
if (desc_limit_scaled(&tr_seg) < 103)
@@ -1782,13 +2092,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
#ifdef CONFIG_X86_64
base |= ((u64)base3) << 32;
#endif
- r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL);
+ r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
if (r != X86EMUL_CONTINUE)
return false;
if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
return false;
- r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu,
- NULL);
+ r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
if (r != X86EMUL_CONTINUE)
return false;
if ((perm >> bit_idx) & mask)
@@ -1829,11 +2138,11 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
tss->si = c->regs[VCPU_REGS_RSI];
tss->di = c->regs[VCPU_REGS_RDI];
- tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
- tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
- tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
- tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
- tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
+ tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
+ tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
+ tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
+ tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
+ tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
}
static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
@@ -1858,11 +2167,11 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
* SDM says that segment selectors are loaded before segment
* descriptors
*/
- ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
- ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
- ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
- ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
- ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
+ set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
+ set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
+ set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
+ set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
+ set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
/*
* Now load segment descriptors. If fault happenes at this stage
@@ -1896,7 +2205,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
int ret;
u32 new_tss_base = get_desc_base(new_desc);
- ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
+ ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
&ctxt->exception);
if (ret != X86EMUL_CONTINUE)
/* FIXME: need to provide precise fault address */
@@ -1904,13 +2213,13 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
save_state_to_tss16(ctxt, ops, &tss_seg);
- ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
+ ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
&ctxt->exception);
if (ret != X86EMUL_CONTINUE)
/* FIXME: need to provide precise fault address */
return ret;
- ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
+ ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
&ctxt->exception);
if (ret != X86EMUL_CONTINUE)
/* FIXME: need to provide precise fault address */
@@ -1919,10 +2228,10 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
if (old_tss_sel != 0xffff) {
tss_seg.prev_task_link = old_tss_sel;
- ret = ops->write_std(new_tss_base,
+ ret = ops->write_std(ctxt, new_tss_base,
&tss_seg.prev_task_link,
sizeof tss_seg.prev_task_link,
- ctxt->vcpu, &ctxt->exception);
+ &ctxt->exception);
if (ret != X86EMUL_CONTINUE)
/* FIXME: need to provide precise fault address */
return ret;
@@ -1937,7 +2246,7 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
{
struct decode_cache *c = &ctxt->decode;
- tss->cr3 = ops->get_cr(3, ctxt->vcpu);
+ tss->cr3 = ops->get_cr(ctxt, 3);
tss->eip = c->eip;
tss->eflags = ctxt->eflags;
tss->eax = c->regs[VCPU_REGS_RAX];
@@ -1949,13 +2258,13 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
tss->esi = c->regs[VCPU_REGS_RSI];
tss->edi = c->regs[VCPU_REGS_RDI];
- tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
- tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
- tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
- tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
- tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
- tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
- tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
+ tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
+ tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
+ tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
+ tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
+ tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
+ tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
+ tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR);
}
static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
@@ -1965,7 +2274,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
struct decode_cache *c = &ctxt->decode;
int ret;
- if (ops->set_cr(3, tss->cr3, ctxt->vcpu))
+ if (ops->set_cr(ctxt, 3, tss->cr3))
return emulate_gp(ctxt, 0);
c->eip = tss->eip;
ctxt->eflags = tss->eflags | 2;
@@ -1982,13 +2291,13 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
* SDM says that segment selectors are loaded before segment
* descriptors
*/
- ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
- ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
- ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
- ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
- ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
- ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
- ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
+ set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
+ set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
+ set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
+ set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
+ set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
+ set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
+ set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
/*
* Now load segment descriptors. If fault happenes at this stage
@@ -2028,7 +2337,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
int ret;
u32 new_tss_base = get_desc_base(new_desc);
- ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
+ ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
&ctxt->exception);
if (ret != X86EMUL_CONTINUE)
/* FIXME: need to provide precise fault address */
@@ -2036,13 +2345,13 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
save_state_to_tss32(ctxt, ops, &tss_seg);
- ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
+ ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
&ctxt->exception);
if (ret != X86EMUL_CONTINUE)
/* FIXME: need to provide precise fault address */
return ret;
- ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
+ ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
&ctxt->exception);
if (ret != X86EMUL_CONTINUE)
/* FIXME: need to provide precise fault address */
@@ -2051,10 +2360,10 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
if (old_tss_sel != 0xffff) {
tss_seg.prev_task_link = old_tss_sel;
- ret = ops->write_std(new_tss_base,
+ ret = ops->write_std(ctxt, new_tss_base,
&tss_seg.prev_task_link,
sizeof tss_seg.prev_task_link,
- ctxt->vcpu, &ctxt->exception);
+ &ctxt->exception);
if (ret != X86EMUL_CONTINUE)
/* FIXME: need to provide precise fault address */
return ret;
@@ -2070,9 +2379,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
{
struct desc_struct curr_tss_desc, next_tss_desc;
int ret;
- u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
+ u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
ulong old_tss_base =
- ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu);
+ ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
u32 desc_limit;
/* FIXME: old_tss_base == ~0 ? */
@@ -2088,7 +2397,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
if (reason != TASK_SWITCH_IRET) {
if ((tss_selector & 3) > next_tss_desc.dpl ||
- ops->cpl(ctxt->vcpu) > next_tss_desc.dpl)
+ ops->cpl(ctxt) > next_tss_desc.dpl)
return emulate_gp(ctxt, 0);
}
@@ -2132,9 +2441,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
&next_tss_desc);
}
- ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
- ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu);
- ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
+ ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
+ ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
if (has_error_code) {
struct decode_cache *c = &ctxt->decode;
@@ -2142,7 +2450,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
c->lock_prefix = 0;
c->src.val = (unsigned long) error_code;
- emulate_push(ctxt, ops);
+ ret = em_push(ctxt);
}
return ret;
@@ -2162,13 +2470,10 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason,
has_error_code, error_code);
- if (rc == X86EMUL_CONTINUE) {
- rc = writeback(ctxt, ops);
- if (rc == X86EMUL_CONTINUE)
- ctxt->eip = c->eip;
- }
+ if (rc == X86EMUL_CONTINUE)
+ ctxt->eip = c->eip;
- return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
+ return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
}
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
@@ -2182,12 +2487,6 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
op->addr.mem.seg = seg;
}
-static int em_push(struct x86_emulate_ctxt *ctxt)
-{
- emulate_push(ctxt, ctxt->ops);
- return X86EMUL_CONTINUE;
-}
-
static int em_das(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
@@ -2234,7 +2533,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
ulong old_eip;
int rc;
- old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
+ old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
old_eip = c->eip;
memcpy(&sel, c->src.valptr + c->op_bytes, 2);
@@ -2245,20 +2544,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
memcpy(&c->eip, c->src.valptr, c->op_bytes);
c->src.val = old_cs;
- emulate_push(ctxt, ctxt->ops);
- rc = writeback(ctxt, ctxt->ops);
+ rc = em_push(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc;
c->src.val = old_eip;
- emulate_push(ctxt, ctxt->ops);
- rc = writeback(ctxt, ctxt->ops);
- if (rc != X86EMUL_CONTINUE)
- return rc;
-
- c->dst.type = OP_NONE;
-
- return X86EMUL_CONTINUE;
+ return em_push(ctxt);
}
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
@@ -2269,13 +2560,79 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
c->dst.type = OP_REG;
c->dst.addr.reg = &c->eip;
c->dst.bytes = c->op_bytes;
- rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes);
+ rc = emulate_pop(ctxt, &c->dst.val, c->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val);
return X86EMUL_CONTINUE;
}
+static int em_add(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_or(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_adc(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_sbb(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_and(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_sub(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_xor(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_cmp(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
+ /* Disable writeback. */
+ c->dst.type = OP_NONE;
+ return X86EMUL_CONTINUE;
+}
+
static int em_imul(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
@@ -2306,13 +2663,10 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
{
- unsigned cpl = ctxt->ops->cpl(ctxt->vcpu);
struct decode_cache *c = &ctxt->decode;
u64 tsc = 0;
- if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD))
- return emulate_gp(ctxt, 0);
- ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc);
+ ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
c->regs[VCPU_REGS_RAX] = (u32)tsc;
c->regs[VCPU_REGS_RDX] = tsc >> 32;
return X86EMUL_CONTINUE;
@@ -2325,22 +2679,375 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
+static int em_movdqu(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_invlpg(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc;
+ ulong linear;
+
+ rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear);
+ if (rc == X86EMUL_CONTINUE)
+ ctxt->ops->invlpg(ctxt, linear);
+ /* Disable writeback. */
+ c->dst.type = OP_NONE;
+ return X86EMUL_CONTINUE;
+}
+
+static int em_clts(struct x86_emulate_ctxt *ctxt)
+{
+ ulong cr0;
+
+ cr0 = ctxt->ops->get_cr(ctxt, 0);
+ cr0 &= ~X86_CR0_TS;
+ ctxt->ops->set_cr(ctxt, 0, cr0);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_vmcall(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc;
+
+ if (c->modrm_mod != 3 || c->modrm_rm != 1)
+ return X86EMUL_UNHANDLEABLE;
+
+ rc = ctxt->ops->fix_hypercall(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ /* Let the processor re-execute the fixed hypercall */
+ c->eip = ctxt->eip;
+ /* Disable writeback. */
+ c->dst.type = OP_NONE;
+ return X86EMUL_CONTINUE;
+}
+
+static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ struct desc_ptr desc_ptr;
+ int rc;
+
+ rc = read_descriptor(ctxt, c->src.addr.mem,
+ &desc_ptr.size, &desc_ptr.address,
+ c->op_bytes);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ ctxt->ops->set_gdt(ctxt, &desc_ptr);
+ /* Disable writeback. */
+ c->dst.type = OP_NONE;
+ return X86EMUL_CONTINUE;
+}
+
+static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc;
+
+ rc = ctxt->ops->fix_hypercall(ctxt);
+
+ /* Disable writeback. */
+ c->dst.type = OP_NONE;
+ return rc;
+}
+
+static int em_lidt(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ struct desc_ptr desc_ptr;
+ int rc;
+
+ rc = read_descriptor(ctxt, c->src.addr.mem,
+ &desc_ptr.size, &desc_ptr.address,
+ c->op_bytes);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ ctxt->ops->set_idt(ctxt, &desc_ptr);
+ /* Disable writeback. */
+ c->dst.type = OP_NONE;
+ return X86EMUL_CONTINUE;
+}
+
+static int em_smsw(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ c->dst.bytes = 2;
+ c->dst.val = ctxt->ops->get_cr(ctxt, 0);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_lmsw(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
+ | (c->src.val & 0x0f));
+ c->dst.type = OP_NONE;
+ return X86EMUL_CONTINUE;
+}
+
+static bool valid_cr(int nr)
+{
+ switch (nr) {
+ case 0:
+ case 2 ... 4:
+ case 8:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int check_cr_read(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ if (!valid_cr(c->modrm_reg))
+ return emulate_ud(ctxt);
+
+ return X86EMUL_CONTINUE;
+}
+
+static int check_cr_write(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ u64 new_val = c->src.val64;
+ int cr = c->modrm_reg;
+ u64 efer = 0;
+
+ static u64 cr_reserved_bits[] = {
+ 0xffffffff00000000ULL,
+ 0, 0, 0, /* CR3 checked later */
+ CR4_RESERVED_BITS,
+ 0, 0, 0,
+ CR8_RESERVED_BITS,
+ };
+
+ if (!valid_cr(cr))
+ return emulate_ud(ctxt);
+
+ if (new_val & cr_reserved_bits[cr])
+ return emulate_gp(ctxt, 0);
+
+ switch (cr) {
+ case 0: {
+ u64 cr4;
+ if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
+ ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
+ return emulate_gp(ctxt, 0);
+
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+
+ if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
+ !(cr4 & X86_CR4_PAE))
+ return emulate_gp(ctxt, 0);
+
+ break;
+ }
+ case 3: {
+ u64 rsvd = 0;
+
+ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+ if (efer & EFER_LMA)
+ rsvd = CR3_L_MODE_RESERVED_BITS;
+ else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE)
+ rsvd = CR3_PAE_RESERVED_BITS;
+ else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG)
+ rsvd = CR3_NONPAE_RESERVED_BITS;
+
+ if (new_val & rsvd)
+ return emulate_gp(ctxt, 0);
+
+ break;
+ }
+ case 4: {
+ u64 cr4;
+
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+
+ if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
+ return emulate_gp(ctxt, 0);
+
+ break;
+ }
+ }
+
+ return X86EMUL_CONTINUE;
+}
+
+static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
+{
+ unsigned long dr7;
+
+ ctxt->ops->get_dr(ctxt, 7, &dr7);
+
+ /* Check if DR7.Global_Enable is set */
+ return dr7 & (1 << 13);
+}
+
+static int check_dr_read(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int dr = c->modrm_reg;
+ u64 cr4;
+
+ if (dr > 7)
+ return emulate_ud(ctxt);
+
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
+ return emulate_ud(ctxt);
+
+ if (check_dr7_gd(ctxt))
+ return emulate_db(ctxt);
+
+ return X86EMUL_CONTINUE;
+}
+
+static int check_dr_write(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ u64 new_val = c->src.val64;
+ int dr = c->modrm_reg;
+
+ if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
+ return emulate_gp(ctxt, 0);
+
+ return check_dr_read(ctxt);
+}
+
+static int check_svme(struct x86_emulate_ctxt *ctxt)
+{
+ u64 efer;
+
+ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+
+ if (!(efer & EFER_SVME))
+ return emulate_ud(ctxt);
+
+ return X86EMUL_CONTINUE;
+}
+
+static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
+{
+ u64 rax = ctxt->decode.regs[VCPU_REGS_RAX];
+
+ /* Valid physical address? */
+ if (rax & 0xffff000000000000ULL)
+ return emulate_gp(ctxt, 0);
+
+ return check_svme(ctxt);
+}
+
+static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
+{
+ u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
+
+ if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
+ return emulate_ud(ctxt);
+
+ return X86EMUL_CONTINUE;
+}
+
+static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
+{
+ u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
+ u64 rcx = ctxt->decode.regs[VCPU_REGS_RCX];
+
+ if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
+ (rcx > 3))
+ return emulate_gp(ctxt, 0);
+
+ return X86EMUL_CONTINUE;
+}
+
+static int check_perm_in(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ c->dst.bytes = min(c->dst.bytes, 4u);
+ if (!emulator_io_permited(ctxt, ctxt->ops, c->src.val, c->dst.bytes))
+ return emulate_gp(ctxt, 0);
+
+ return X86EMUL_CONTINUE;
+}
+
+static int check_perm_out(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ c->src.bytes = min(c->src.bytes, 4u);
+ if (!emulator_io_permited(ctxt, ctxt->ops, c->dst.val, c->src.bytes))
+ return emulate_gp(ctxt, 0);
+
+ return X86EMUL_CONTINUE;
+}
+
#define D(_y) { .flags = (_y) }
+#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i }
+#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \
+ .check_perm = (_p) }
#define N D(0)
+#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
#define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
-#define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) }
+#define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) }
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
+#define II(_f, _e, _i) \
+ { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
+#define IIP(_f, _e, _i, _p) \
+ { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \
+ .check_perm = (_p) }
+#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
#define D2bv(_f) D((_f) | ByteOp), D(_f)
+#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
-#define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM), \
- D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \
- D2bv(((_f) & ~Lock) | DstAcc | SrcImm)
+#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \
+ I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
+ I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
+static struct opcode group7_rm1[] = {
+ DI(SrcNone | ModRM | Priv, monitor),
+ DI(SrcNone | ModRM | Priv, mwait),
+ N, N, N, N, N, N,
+};
+
+static struct opcode group7_rm3[] = {
+ DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa),
+ II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall),
+ DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa),
+ DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa),
+ DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme),
+ DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme),
+ DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme),
+ DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme),
+};
+
+static struct opcode group7_rm7[] = {
+ N,
+ DIP(SrcNone | ModRM, rdtscp, check_rdtsc),
+ N, N, N, N, N, N,
+};
static struct opcode group1[] = {
- X7(D(Lock)), N
+ I(Lock, em_add),
+ I(Lock, em_or),
+ I(Lock, em_adc),
+ I(Lock, em_sbb),
+ I(Lock, em_and),
+ I(Lock, em_sub),
+ I(Lock, em_xor),
+ I(0, em_cmp),
};
static struct opcode group1A[] = {
@@ -2366,16 +3073,28 @@ static struct opcode group5[] = {
D(SrcMem | ModRM | Stack), N,
};
+static struct opcode group6[] = {
+ DI(ModRM | Prot, sldt),
+ DI(ModRM | Prot, str),
+ DI(ModRM | Prot | Priv, lldt),
+ DI(ModRM | Prot | Priv, ltr),
+ N, N, N, N,
+};
+
static struct group_dual group7 = { {
- N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv),
- D(SrcNone | ModRM | DstMem | Mov), N,
- D(SrcMem16 | ModRM | Mov | Priv),
- D(SrcMem | ModRM | ByteOp | Priv | NoAccess),
+ DI(ModRM | Mov | DstMem | Priv, sgdt),
+ DI(ModRM | Mov | DstMem | Priv, sidt),
+ II(ModRM | SrcMem | Priv, em_lgdt, lgdt),
+ II(ModRM | SrcMem | Priv, em_lidt, lidt),
+ II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N,
+ II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw),
+ II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
}, {
- D(SrcNone | ModRM | Priv | VendorSpecific), N,
- N, D(SrcNone | ModRM | Priv | VendorSpecific),
- D(SrcNone | ModRM | DstMem | Mov), N,
- D(SrcMem16 | ModRM | Mov | Priv), N,
+ I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall),
+ EXT(0, group7_rm1),
+ N, EXT(0, group7_rm3),
+ II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N,
+ II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7),
} };
static struct opcode group8[] = {
@@ -2394,35 +3113,40 @@ static struct opcode group11[] = {
I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)),
};
+static struct gprefix pfx_0f_6f_0f_7f = {
+ N, N, N, I(Sse, em_movdqu),
+};
+
static struct opcode opcode_table[256] = {
/* 0x00 - 0x07 */
- D6ALU(Lock),
+ I6ALU(Lock, em_add),
D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
/* 0x08 - 0x0F */
- D6ALU(Lock),
+ I6ALU(Lock, em_or),
D(ImplicitOps | Stack | No64), N,
/* 0x10 - 0x17 */
- D6ALU(Lock),
+ I6ALU(Lock, em_adc),
D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
/* 0x18 - 0x1F */
- D6ALU(Lock),
+ I6ALU(Lock, em_sbb),
D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
/* 0x20 - 0x27 */
- D6ALU(Lock), N, N,
+ I6ALU(Lock, em_and), N, N,
/* 0x28 - 0x2F */
- D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das),
+ I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
/* 0x30 - 0x37 */
- D6ALU(Lock), N, N,
+ I6ALU(Lock, em_xor), N, N,
/* 0x38 - 0x3F */
- D6ALU(0), N, N,
+ I6ALU(0, em_cmp), N, N,
/* 0x40 - 0x4F */
X16(D(DstReg)),
/* 0x50 - 0x57 */
X8(I(SrcReg | Stack, em_push)),
/* 0x58 - 0x5F */
- X8(D(DstReg | Stack)),
+ X8(I(DstReg | Stack, em_pop)),
/* 0x60 - 0x67 */
- D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
+ I(ImplicitOps | Stack | No64, em_pusha),
+ I(ImplicitOps | Stack | No64, em_popa),
N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ ,
N, N, N, N,
/* 0x68 - 0x6F */
@@ -2430,8 +3154,8 @@ static struct opcode opcode_table[256] = {
I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
I(SrcImmByte | Mov | Stack, em_push),
I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
- D2bv(DstDI | Mov | String), /* insb, insw/insd */
- D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */
+ D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */
+ D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */
/* 0x70 - 0x7F */
X16(D(SrcImmByte)),
/* 0x80 - 0x87 */
@@ -2446,21 +3170,22 @@ static struct opcode opcode_table[256] = {
D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg),
D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A),
/* 0x90 - 0x97 */
- X8(D(SrcAcc | DstReg)),
+ DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
/* 0x98 - 0x9F */
D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
I(SrcImmFAddr | No64, em_call_far), N,
- D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N,
+ II(ImplicitOps | Stack, em_pushf, pushf),
+ II(ImplicitOps | Stack, em_popf, popf), N, N,
/* 0xA0 - 0xA7 */
I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov),
I2bv(SrcSI | DstDI | Mov | String, em_mov),
- D2bv(SrcSI | DstDI | String),
+ I2bv(SrcSI | DstDI | String, em_cmp),
/* 0xA8 - 0xAF */
D2bv(DstAcc | SrcImm),
I2bv(SrcAcc | DstDI | Mov | String, em_mov),
I2bv(SrcSI | DstAcc | Mov | String, em_mov),
- D2bv(SrcAcc | DstDI | String),
+ I2bv(SrcAcc | DstDI | String, em_cmp),
/* 0xB0 - 0xB7 */
X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
/* 0xB8 - 0xBF */
@@ -2473,7 +3198,8 @@ static struct opcode opcode_table[256] = {
G(ByteOp, group11), G(0, group11),
/* 0xC8 - 0xCF */
N, N, N, D(ImplicitOps | Stack),
- D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps),
+ D(ImplicitOps), DI(SrcImmByte, intn),
+ D(ImplicitOps | No64), DI(ImplicitOps, iret),
/* 0xD0 - 0xD7 */
D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM),
N, N, N, N,
@@ -2481,14 +3207,17 @@ static struct opcode opcode_table[256] = {
N, N, N, N, N, N, N, N,
/* 0xE0 - 0xE7 */
X4(D(SrcImmByte)),
- D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte),
+ D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in),
+ D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out),
/* 0xE8 - 0xEF */
D(SrcImm | Stack), D(SrcImm | ImplicitOps),
D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps),
- D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps),
+ D2bvIP(SrcNone | DstAcc, in, check_perm_in),
+ D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out),
/* 0xF0 - 0xF7 */
- N, N, N, N,
- D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3),
+ N, DI(ImplicitOps, icebp), N, N,
+ DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
+ G(ByteOp, group3), G(0, group3),
/* 0xF8 - 0xFF */
D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps),
D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
@@ -2496,20 +3225,24 @@ static struct opcode opcode_table[256] = {
static struct opcode twobyte_table[256] = {
/* 0x00 - 0x0F */
- N, GD(0, &group7), N, N,
- N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N,
- D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N,
+ G(0, group6), GD(0, &group7), N, N,
+ N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N,
+ DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
N, D(ImplicitOps | ModRM), N, N,
/* 0x10 - 0x1F */
N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N,
/* 0x20 - 0x2F */
- D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264),
- D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264),
+ DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read),
+ DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read),
+ DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write),
+ DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write),
N, N, N, N,
N, N, N, N, N, N, N, N,
/* 0x30 - 0x3F */
- D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc),
- D(ImplicitOps | Priv), N,
+ DI(ImplicitOps | Priv, wrmsr),
+ IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
+ DI(ImplicitOps | Priv, rdmsr),
+ DIP(ImplicitOps | Priv, rdpmc, check_rdpmc),
D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific),
N, N,
N, N, N, N, N, N, N, N,
@@ -2518,21 +3251,27 @@ static struct opcode twobyte_table[256] = {
/* 0x50 - 0x5F */
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
/* 0x60 - 0x6F */
- N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ N, N, N, N,
+ N, N, N, N,
+ N, N, N, N,
+ N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
/* 0x70 - 0x7F */
- N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ N, N, N, N,
+ N, N, N, N,
+ N, N, N, N,
+ N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
/* 0x80 - 0x8F */
X16(D(SrcImm)),
/* 0x90 - 0x9F */
X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
/* 0xA0 - 0xA7 */
D(ImplicitOps | Stack), D(ImplicitOps | Stack),
- N, D(DstMem | SrcReg | ModRM | BitOp),
+ DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp),
D(DstMem | SrcReg | Src2ImmByte | ModRM),
D(DstMem | SrcReg | Src2CL | ModRM), N, N,
/* 0xA8 - 0xAF */
D(ImplicitOps | Stack), D(ImplicitOps | Stack),
- N, D(DstMem | SrcReg | ModRM | BitOp | Lock),
+ DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock),
D(DstMem | SrcReg | Src2ImmByte | ModRM),
D(DstMem | SrcReg | Src2CL | ModRM),
D(ModRM), I(DstReg | SrcMem | ModRM, em_imul),
@@ -2564,10 +3303,13 @@ static struct opcode twobyte_table[256] = {
#undef G
#undef GD
#undef I
+#undef GP
+#undef EXT
#undef D2bv
+#undef D2bvIP
#undef I2bv
-#undef D6ALU
+#undef I6ALU
static unsigned imm_size(struct decode_cache *c)
{
@@ -2625,8 +3367,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
struct decode_cache *c = &ctxt->decode;
int rc = X86EMUL_CONTINUE;
int mode = ctxt->mode;
- int def_op_bytes, def_ad_bytes, dual, goffset;
- struct opcode opcode, *g_mod012, *g_mod3;
+ int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
+ bool op_prefix = false;
+ struct opcode opcode;
struct operand memop = { .type = OP_NONE };
c->eip = ctxt->eip;
@@ -2634,7 +3377,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
c->fetch.end = c->fetch.start + insn_len;
if (insn_len > 0)
memcpy(c->fetch.data, insn, insn_len);
- ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS);
switch (mode) {
case X86EMUL_MODE_REAL:
@@ -2662,6 +3404,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
for (;;) {
switch (c->b = insn_fetch(u8, 1, c->eip)) {
case 0x66: /* operand-size override */
+ op_prefix = true;
/* switch between 2/4 bytes */
c->op_bytes = def_op_bytes ^ 6;
break;
@@ -2692,10 +3435,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
c->lock_prefix = 1;
break;
case 0xf2: /* REPNE/REPNZ */
- c->rep_prefix = REPNE_PREFIX;
- break;
case 0xf3: /* REP/REPE/REPZ */
- c->rep_prefix = REPE_PREFIX;
+ c->rep_prefix = c->b;
break;
default:
goto done_prefixes;
@@ -2722,29 +3463,49 @@ done_prefixes:
}
c->d = opcode.flags;
- if (c->d & Group) {
- dual = c->d & GroupDual;
- c->modrm = insn_fetch(u8, 1, c->eip);
- --c->eip;
-
- if (c->d & GroupDual) {
- g_mod012 = opcode.u.gdual->mod012;
- g_mod3 = opcode.u.gdual->mod3;
- } else
- g_mod012 = g_mod3 = opcode.u.group;
-
- c->d &= ~(Group | GroupDual);
-
- goffset = (c->modrm >> 3) & 7;
+ while (c->d & GroupMask) {
+ switch (c->d & GroupMask) {
+ case Group:
+ c->modrm = insn_fetch(u8, 1, c->eip);
+ --c->eip;
+ goffset = (c->modrm >> 3) & 7;
+ opcode = opcode.u.group[goffset];
+ break;
+ case GroupDual:
+ c->modrm = insn_fetch(u8, 1, c->eip);
+ --c->eip;
+ goffset = (c->modrm >> 3) & 7;
+ if ((c->modrm >> 6) == 3)
+ opcode = opcode.u.gdual->mod3[goffset];
+ else
+ opcode = opcode.u.gdual->mod012[goffset];
+ break;
+ case RMExt:
+ goffset = c->modrm & 7;
+ opcode = opcode.u.group[goffset];
+ break;
+ case Prefix:
+ if (c->rep_prefix && op_prefix)
+ return X86EMUL_UNHANDLEABLE;
+ simd_prefix = op_prefix ? 0x66 : c->rep_prefix;
+ switch (simd_prefix) {
+ case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
+ case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
+ case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
+ case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
+ }
+ break;
+ default:
+ return X86EMUL_UNHANDLEABLE;
+ }
- if ((c->modrm >> 6) == 3)
- opcode = g_mod3[goffset];
- else
- opcode = g_mod012[goffset];
+ c->d &= ~GroupMask;
c->d |= opcode.flags;
}
c->execute = opcode.u.execute;
+ c->check_perm = opcode.check_perm;
+ c->intercept = opcode.intercept;
/* Unrecognised? */
if (c->d == 0 || (c->d & Undefined))
@@ -2763,6 +3524,9 @@ done_prefixes:
c->op_bytes = 4;
}
+ if (c->d & Sse)
+ c->op_bytes = 16;
+
/* ModRM and SIB bytes. */
if (c->d & ModRM) {
rc = decode_modrm(ctxt, ops, &memop);
@@ -2776,7 +3540,7 @@ done_prefixes:
if (!c->has_seg_override)
set_seg_override(c, VCPU_SREG_DS);
- memop.addr.mem.seg = seg_override(ctxt, ops, c);
+ memop.addr.mem.seg = seg_override(ctxt, c);
if (memop.type == OP_MEM && c->ad_bytes != 8)
memop.addr.mem.ea = (u32)memop.addr.mem.ea;
@@ -2792,7 +3556,7 @@ done_prefixes:
case SrcNone:
break;
case SrcReg:
- decode_register_operand(&c->src, c, 0);
+ decode_register_operand(ctxt, &c->src, c, 0);
break;
case SrcMem16:
memop.bytes = 2;
@@ -2836,7 +3600,7 @@ done_prefixes:
c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
c->src.addr.mem.ea =
register_address(c, c->regs[VCPU_REGS_RSI]);
- c->src.addr.mem.seg = seg_override(ctxt, ops, c),
+ c->src.addr.mem.seg = seg_override(ctxt, c);
c->src.val = 0;
break;
case SrcImmFAddr:
@@ -2883,7 +3647,7 @@ done_prefixes:
/* Decode and fetch the destination operand: register or memory. */
switch (c->d & DstMask) {
case DstReg:
- decode_register_operand(&c->dst, c,
+ decode_register_operand(ctxt, &c->dst, c,
c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
break;
case DstImmUByte:
@@ -2926,7 +3690,7 @@ done_prefixes:
}
done:
- return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
+ return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
}
static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
@@ -2979,12 +3743,51 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
+ if ((c->d & Sse)
+ && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)
+ || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
+ rc = emulate_ud(ctxt);
+ goto done;
+ }
+
+ if ((c->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
+ rc = emulate_nm(ctxt);
+ goto done;
+ }
+
+ if (unlikely(ctxt->guest_mode) && c->intercept) {
+ rc = emulator_check_intercept(ctxt, c->intercept,
+ X86_ICPT_PRE_EXCEPT);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ }
+
/* Privileged instruction can be executed only in CPL=0 */
- if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
+ if ((c->d & Priv) && ops->cpl(ctxt)) {
rc = emulate_gp(ctxt, 0);
goto done;
}
+ /* Instruction can only be executed in protected mode */
+ if ((c->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) {
+ rc = emulate_ud(ctxt);
+ goto done;
+ }
+
+ /* Do instruction specific permission checks */
+ if (c->check_perm) {
+ rc = c->check_perm(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ }
+
+ if (unlikely(ctxt->guest_mode) && c->intercept) {
+ rc = emulator_check_intercept(ctxt, c->intercept,
+ X86_ICPT_POST_EXCEPT);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ }
+
if (c->rep_prefix && (c->d & String)) {
/* All REP prefixes have the same first termination condition */
if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
@@ -2994,16 +3797,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
}
if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) {
- rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem),
- c->src.valptr, c->src.bytes);
+ rc = segmented_read(ctxt, c->src.addr.mem,
+ c->src.valptr, c->src.bytes);
if (rc != X86EMUL_CONTINUE)
goto done;
c->src.orig_val64 = c->src.val64;
}
if (c->src2.type == OP_MEM) {
- rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem),
- &c->src2.val, c->src2.bytes);
+ rc = segmented_read(ctxt, c->src2.addr.mem,
+ &c->src2.val, c->src2.bytes);
if (rc != X86EMUL_CONTINUE)
goto done;
}
@@ -3014,7 +3817,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
/* optimisation - avoid slow emulated read if Mov */
- rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem),
+ rc = segmented_read(ctxt, c->dst.addr.mem,
&c->dst.val, c->dst.bytes);
if (rc != X86EMUL_CONTINUE)
goto done;
@@ -3023,6 +3826,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
special_insn:
+ if (unlikely(ctxt->guest_mode) && c->intercept) {
+ rc = emulator_check_intercept(ctxt, c->intercept,
+ X86_ICPT_POST_MEMACCESS);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ }
+
if (c->execute) {
rc = c->execute(ctxt);
if (rc != X86EMUL_CONTINUE)
@@ -3034,75 +3844,33 @@ special_insn:
goto twobyte_insn;
switch (c->b) {
- case 0x00 ... 0x05:
- add: /* add */
- emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
- break;
case 0x06: /* push es */
- emulate_push_sreg(ctxt, ops, VCPU_SREG_ES);
+ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES);
break;
case 0x07: /* pop es */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
break;
- case 0x08 ... 0x0d:
- or: /* or */
- emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
- break;
case 0x0e: /* push cs */
- emulate_push_sreg(ctxt, ops, VCPU_SREG_CS);
- break;
- case 0x10 ... 0x15:
- adc: /* adc */
- emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
+ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS);
break;
case 0x16: /* push ss */
- emulate_push_sreg(ctxt, ops, VCPU_SREG_SS);
+ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS);
break;
case 0x17: /* pop ss */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
break;
- case 0x18 ... 0x1d:
- sbb: /* sbb */
- emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
- break;
case 0x1e: /* push ds */
- emulate_push_sreg(ctxt, ops, VCPU_SREG_DS);
+ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS);
break;
case 0x1f: /* pop ds */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
break;
- case 0x20 ... 0x25:
- and: /* and */
- emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
- break;
- case 0x28 ... 0x2d:
- sub: /* sub */
- emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
- break;
- case 0x30 ... 0x35:
- xor: /* xor */
- emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
- break;
- case 0x38 ... 0x3d:
- cmp: /* cmp */
- emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
- break;
case 0x40 ... 0x47: /* inc r16/r32 */
emulate_1op("inc", c->dst, ctxt->eflags);
break;
case 0x48 ... 0x4f: /* dec r16/r32 */
emulate_1op("dec", c->dst, ctxt->eflags);
break;
- case 0x58 ... 0x5f: /* pop reg */
- pop_instruction:
- rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
- break;
- case 0x60: /* pusha */
- rc = emulate_pusha(ctxt, ops);
- break;
- case 0x61: /* popa */
- rc = emulate_popa(ctxt, ops);
- break;
case 0x63: /* movsxd */
if (ctxt->mode != X86EMUL_MODE_PROT64)
goto cannot_emulate;
@@ -3121,26 +3889,6 @@ special_insn:
if (test_cc(c->b, ctxt->eflags))
jmp_rel(c, c->src.val);
break;
- case 0x80 ... 0x83: /* Grp1 */
- switch (c->modrm_reg) {
- case 0:
- goto add;
- case 1:
- goto or;
- case 2:
- goto adc;
- case 3:
- goto sbb;
- case 4:
- goto and;
- case 5:
- goto sub;
- case 6:
- goto xor;
- case 7:
- goto cmp;
- }
- break;
case 0x84 ... 0x85:
test:
emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
@@ -3162,7 +3910,7 @@ special_insn:
rc = emulate_ud(ctxt);
goto done;
}
- c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu);
+ c->dst.val = get_segment_selector(ctxt, c->modrm_reg);
break;
case 0x8d: /* lea r16/r32, m */
c->dst.val = c->src.addr.mem.ea;
@@ -3187,7 +3935,7 @@ special_insn:
break;
}
case 0x8f: /* pop (sole member of Grp1a) */
- rc = emulate_grp1a(ctxt, ops);
+ rc = em_grp1a(ctxt);
break;
case 0x90 ... 0x97: /* nop / xchg reg, rax */
if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX])
@@ -3200,31 +3948,17 @@ special_insn:
case 8: c->dst.val = (s32)c->dst.val; break;
}
break;
- case 0x9c: /* pushf */
- c->src.val = (unsigned long) ctxt->eflags;
- emulate_push(ctxt, ops);
- break;
- case 0x9d: /* popf */
- c->dst.type = OP_REG;
- c->dst.addr.reg = &ctxt->eflags;
- c->dst.bytes = c->op_bytes;
- rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
- break;
- case 0xa6 ... 0xa7: /* cmps */
- c->dst.type = OP_NONE; /* Disable writeback. */
- goto cmp;
case 0xa8 ... 0xa9: /* test ax, imm */
goto test;
- case 0xae ... 0xaf: /* scas */
- goto cmp;
case 0xc0 ... 0xc1:
- emulate_grp2(ctxt);
+ rc = em_grp2(ctxt);
break;
case 0xc3: /* ret */
c->dst.type = OP_REG;
c->dst.addr.reg = &c->eip;
c->dst.bytes = c->op_bytes;
- goto pop_instruction;
+ rc = em_pop(ctxt);
+ break;
case 0xc4: /* les */
rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES);
break;
@@ -3252,11 +3986,11 @@ special_insn:
rc = emulate_iret(ctxt, ops);
break;
case 0xd0 ... 0xd1: /* Grp2 */
- emulate_grp2(ctxt);
+ rc = em_grp2(ctxt);
break;
case 0xd2 ... 0xd3: /* Grp2 */
c->src.val = c->regs[VCPU_REGS_RCX];
- emulate_grp2(ctxt);
+ rc = em_grp2(ctxt);
break;
case 0xe0 ... 0xe2: /* loop/loopz/loopnz */
register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
@@ -3278,23 +4012,14 @@ special_insn:
long int rel = c->src.val;
c->src.val = (unsigned long) c->eip;
jmp_rel(c, rel);
- emulate_push(ctxt, ops);
+ rc = em_push(ctxt);
break;
}
case 0xe9: /* jmp rel */
goto jmp;
- case 0xea: { /* jmp far */
- unsigned short sel;
- jump_far:
- memcpy(&sel, c->src.valptr + c->op_bytes, 2);
-
- if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS))
- goto done;
-
- c->eip = 0;
- memcpy(&c->eip, c->src.valptr, c->op_bytes);
+ case 0xea: /* jmp far */
+ rc = em_jmp_far(ctxt);
break;
- }
case 0xeb:
jmp: /* jmp rel short */
jmp_rel(c, c->src.val);
@@ -3304,11 +4029,6 @@ special_insn:
case 0xed: /* in (e/r)ax,dx */
c->src.val = c->regs[VCPU_REGS_RDX];
do_io_in:
- c->dst.bytes = min(c->dst.bytes, 4u);
- if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
- rc = emulate_gp(ctxt, 0);
- goto done;
- }
if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
&c->dst.val))
goto done; /* IO is needed */
@@ -3317,25 +4037,19 @@ special_insn:
case 0xef: /* out dx,(e/r)ax */
c->dst.val = c->regs[VCPU_REGS_RDX];
do_io_out:
- c->src.bytes = min(c->src.bytes, 4u);
- if (!emulator_io_permited(ctxt, ops, c->dst.val,
- c->src.bytes)) {
- rc = emulate_gp(ctxt, 0);
- goto done;
- }
- ops->pio_out_emulated(c->src.bytes, c->dst.val,
- &c->src.val, 1, ctxt->vcpu);
+ ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val,
+ &c->src.val, 1);
c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xf4: /* hlt */
- ctxt->vcpu->arch.halt_request = 1;
+ ctxt->ops->halt(ctxt);
break;
case 0xf5: /* cmc */
/* complement carry flag from eflags reg */
ctxt->eflags ^= EFLG_CF;
break;
case 0xf6 ... 0xf7: /* Grp3 */
- rc = emulate_grp3(ctxt, ops);
+ rc = em_grp3(ctxt);
break;
case 0xf8: /* clc */
ctxt->eflags &= ~EFLG_CF;
@@ -3366,13 +4080,11 @@ special_insn:
ctxt->eflags |= EFLG_DF;
break;
case 0xfe: /* Grp4 */
- grp45:
- rc = emulate_grp45(ctxt, ops);
+ rc = em_grp45(ctxt);
break;
case 0xff: /* Grp5 */
- if (c->modrm_reg == 5)
- goto jump_far;
- goto grp45;
+ rc = em_grp45(ctxt);
+ break;
default:
goto cannot_emulate;
}
@@ -3381,7 +4093,7 @@ special_insn:
goto done;
writeback:
- rc = writeback(ctxt, ops);
+ rc = writeback(ctxt);
if (rc != X86EMUL_CONTINUE)
goto done;
@@ -3392,7 +4104,7 @@ writeback:
c->dst.type = saved_dst_type;
if ((c->d & SrcMask) == SrcSI)
- string_addr_inc(ctxt, seg_override(ctxt, ops, c),
+ string_addr_inc(ctxt, seg_override(ctxt, c),
VCPU_REGS_RSI, &c->src);
if ((c->d & DstMask) == DstDI)
@@ -3427,115 +4139,34 @@ writeback:
done:
if (rc == X86EMUL_PROPAGATE_FAULT)
ctxt->have_exception = true;
+ if (rc == X86EMUL_INTERCEPTED)
+ return EMULATION_INTERCEPTED;
+
return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
twobyte_insn:
switch (c->b) {
- case 0x01: /* lgdt, lidt, lmsw */
- switch (c->modrm_reg) {
- u16 size;
- unsigned long address;
-
- case 0: /* vmcall */
- if (c->modrm_mod != 3 || c->modrm_rm != 1)
- goto cannot_emulate;
-
- rc = kvm_fix_hypercall(ctxt->vcpu);
- if (rc != X86EMUL_CONTINUE)
- goto done;
-
- /* Let the processor re-execute the fixed hypercall */
- c->eip = ctxt->eip;
- /* Disable writeback. */
- c->dst.type = OP_NONE;
- break;
- case 2: /* lgdt */
- rc = read_descriptor(ctxt, ops, c->src.addr.mem,
- &size, &address, c->op_bytes);
- if (rc != X86EMUL_CONTINUE)
- goto done;
- realmode_lgdt(ctxt->vcpu, size, address);
- /* Disable writeback. */
- c->dst.type = OP_NONE;
- break;
- case 3: /* lidt/vmmcall */
- if (c->modrm_mod == 3) {
- switch (c->modrm_rm) {
- case 1:
- rc = kvm_fix_hypercall(ctxt->vcpu);
- break;
- default:
- goto cannot_emulate;
- }
- } else {
- rc = read_descriptor(ctxt, ops, c->src.addr.mem,
- &size, &address,
- c->op_bytes);
- if (rc != X86EMUL_CONTINUE)
- goto done;
- realmode_lidt(ctxt->vcpu, size, address);
- }
- /* Disable writeback. */
- c->dst.type = OP_NONE;
- break;
- case 4: /* smsw */
- c->dst.bytes = 2;
- c->dst.val = ops->get_cr(0, ctxt->vcpu);
- break;
- case 6: /* lmsw */
- ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) |
- (c->src.val & 0x0f), ctxt->vcpu);
- c->dst.type = OP_NONE;
- break;
- case 5: /* not defined */
- emulate_ud(ctxt);
- rc = X86EMUL_PROPAGATE_FAULT;
- goto done;
- case 7: /* invlpg*/
- emulate_invlpg(ctxt->vcpu,
- linear(ctxt, c->src.addr.mem));
- /* Disable writeback. */
- c->dst.type = OP_NONE;
- break;
- default:
- goto cannot_emulate;
- }
- break;
case 0x05: /* syscall */
rc = emulate_syscall(ctxt, ops);
break;
case 0x06:
- emulate_clts(ctxt->vcpu);
+ rc = em_clts(ctxt);
break;
case 0x09: /* wbinvd */
- kvm_emulate_wbinvd(ctxt->vcpu);
+ (ctxt->ops->wbinvd)(ctxt);
break;
case 0x08: /* invd */
case 0x0d: /* GrpP (prefetch) */
case 0x18: /* Grp16 (prefetch/nop) */
break;
case 0x20: /* mov cr, reg */
- switch (c->modrm_reg) {
- case 1:
- case 5 ... 7:
- case 9 ... 15:
- emulate_ud(ctxt);
- rc = X86EMUL_PROPAGATE_FAULT;
- goto done;
- }
- c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu);
+ c->dst.val = ops->get_cr(ctxt, c->modrm_reg);
break;
case 0x21: /* mov from dr to reg */
- if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
- (c->modrm_reg == 4 || c->modrm_reg == 5)) {
- emulate_ud(ctxt);
- rc = X86EMUL_PROPAGATE_FAULT;
- goto done;
- }
- ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu);
+ ops->get_dr(ctxt, c->modrm_reg, &c->dst.val);
break;
case 0x22: /* mov reg, cr */
- if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) {
+ if (ops->set_cr(ctxt, c->modrm_reg, c->src.val)) {
emulate_gp(ctxt, 0);
rc = X86EMUL_PROPAGATE_FAULT;
goto done;
@@ -3543,16 +4174,9 @@ twobyte_insn:
c->dst.type = OP_NONE;
break;
case 0x23: /* mov from reg to dr */
- if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
- (c->modrm_reg == 4 || c->modrm_reg == 5)) {
- emulate_ud(ctxt);
- rc = X86EMUL_PROPAGATE_FAULT;
- goto done;
- }
-
- if (ops->set_dr(c->modrm_reg, c->src.val &
+ if (ops->set_dr(ctxt, c->modrm_reg, c->src.val &
((ctxt->mode == X86EMUL_MODE_PROT64) ?
- ~0ULL : ~0U), ctxt->vcpu) < 0) {
+ ~0ULL : ~0U)) < 0) {
/* #UD condition is already handled by the code above */
emulate_gp(ctxt, 0);
rc = X86EMUL_PROPAGATE_FAULT;
@@ -3565,7 +4189,7 @@ twobyte_insn:
/* wrmsr */
msr_data = (u32)c->regs[VCPU_REGS_RAX]
| ((u64)c->regs[VCPU_REGS_RDX] << 32);
- if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
+ if (ops->set_msr(ctxt, c->regs[VCPU_REGS_RCX], msr_data)) {
emulate_gp(ctxt, 0);
rc = X86EMUL_PROPAGATE_FAULT;
goto done;
@@ -3574,7 +4198,7 @@ twobyte_insn:
break;
case 0x32:
/* rdmsr */
- if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
+ if (ops->get_msr(ctxt, c->regs[VCPU_REGS_RCX], &msr_data)) {
emulate_gp(ctxt, 0);
rc = X86EMUL_PROPAGATE_FAULT;
goto done;
@@ -3603,7 +4227,7 @@ twobyte_insn:
c->dst.val = test_cc(c->b, ctxt->eflags);
break;
case 0xa0: /* push fs */
- emulate_push_sreg(ctxt, ops, VCPU_SREG_FS);
+ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_FS);
break;
case 0xa1: /* pop fs */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
@@ -3620,7 +4244,7 @@ twobyte_insn:
emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
break;
case 0xa8: /* push gs */
- emulate_push_sreg(ctxt, ops, VCPU_SREG_GS);
+ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_GS);
break;
case 0xa9: /* pop gs */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
@@ -3727,7 +4351,7 @@ twobyte_insn:
(u64) c->src.val;
break;
case 0xc7: /* Grp9 (cmpxchg8b) */
- rc = emulate_grp9(ctxt, ops);
+ rc = em_grp9(ctxt);
break;
default:
goto cannot_emulate;
@@ -3739,5 +4363,5 @@ twobyte_insn:
goto writeback;
cannot_emulate:
- return -1;
+ return EMULATION_FAILED;
}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 46d08ca0b48..51a97426e79 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -33,7 +33,6 @@ struct kvm_kpit_state {
};
struct kvm_pit {
- unsigned long base_addresss;
struct kvm_io_device dev;
struct kvm_io_device speaker_dev;
struct kvm *kvm;
@@ -51,7 +50,6 @@ struct kvm_pit {
#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
#define KVM_PIT_CHANNEL_MASK 0x3
-void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
void kvm_free_pit(struct kvm *kvm);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ba910d14941..53e2d084bff 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -75,7 +75,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm);
void kvm_destroy_pic(struct kvm *kvm);
int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
-void kvm_pic_clear_isr_ack(struct kvm *kvm);
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
{
@@ -100,7 +99,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
-int pit_has_pending_timer(struct kvm_vcpu *vcpu);
int apic_has_pending_timer(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 22fae7593ee..28418054b88 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1206,7 +1206,7 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, u64 *spte,
- const void *pte, unsigned long mmu_seq)
+ const void *pte)
{
WARN_ON(1);
}
@@ -3163,9 +3163,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
}
static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp,
- u64 *spte,
- const void *new, unsigned long mmu_seq)
+ struct kvm_mmu_page *sp, u64 *spte,
+ const void *new)
{
if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
++vcpu->kvm->stat.mmu_pde_zapped;
@@ -3173,7 +3172,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
}
++vcpu->kvm->stat.mmu_pte_updated;
- vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq);
+ vcpu->arch.mmu.update_pte(vcpu, sp, spte, new);
}
static bool need_remote_flush(u64 old, u64 new)
@@ -3229,7 +3228,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
struct kvm_mmu_page *sp;
struct hlist_node *node;
LIST_HEAD(invalid_list);
- unsigned long mmu_seq;
u64 entry, gentry, *spte;
unsigned pte_size, page_offset, misaligned, quadrant, offset;
int level, npte, invlpg_counter, r, flooded = 0;
@@ -3271,9 +3269,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
break;
}
- mmu_seq = vcpu->kvm->mmu_notifier_seq;
- smp_rmb();
-
spin_lock(&vcpu->kvm->mmu_lock);
if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
gentry = 0;
@@ -3345,8 +3340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
if (gentry &&
!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
& mask.word))
- mmu_pte_write_new_pte(vcpu, sp, spte, &gentry,
- mmu_seq);
+ mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
if (!remote_flush && need_remote_flush(entry, *spte))
remote_flush = true;
++spte;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index c6397795d86..6c4dc010c4c 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -78,15 +78,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
}
-static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
- gfn_t table_gfn, unsigned index,
- pt_element_t orig_pte, pt_element_t new_pte)
+static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ pt_element_t __user *ptep_user, unsigned index,
+ pt_element_t orig_pte, pt_element_t new_pte)
{
+ int npages;
pt_element_t ret;
pt_element_t *table;
struct page *page;
- page = gfn_to_page(kvm, table_gfn);
+ npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page);
+ /* Check if the user is doing something meaningless. */
+ if (unlikely(npages != 1))
+ return -EFAULT;
table = kmap_atomic(page, KM_USER0);
ret = CMPXCHG(&table[index], orig_pte, new_pte);
@@ -117,6 +121,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
gva_t addr, u32 access)
{
pt_element_t pte;
+ pt_element_t __user *ptep_user;
gfn_t table_gfn;
unsigned index, pt_access, uninitialized_var(pte_access);
gpa_t pte_gpa;
@@ -152,6 +157,9 @@ walk:
pt_access = ACC_ALL;
for (;;) {
+ gfn_t real_gfn;
+ unsigned long host_addr;
+
index = PT_INDEX(addr, walker->level);
table_gfn = gpte_to_gfn(pte);
@@ -160,43 +168,64 @@ walk:
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
- if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte,
- offset, sizeof(pte),
- PFERR_USER_MASK|PFERR_WRITE_MASK)) {
+ real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
+ PFERR_USER_MASK|PFERR_WRITE_MASK);
+ if (unlikely(real_gfn == UNMAPPED_GVA)) {
+ present = false;
+ break;
+ }
+ real_gfn = gpa_to_gfn(real_gfn);
+
+ host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
+ if (unlikely(kvm_is_error_hva(host_addr))) {
+ present = false;
+ break;
+ }
+
+ ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
+ if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) {
present = false;
break;
}
trace_kvm_mmu_paging_element(pte, walker->level);
- if (!is_present_gpte(pte)) {
+ if (unlikely(!is_present_gpte(pte))) {
present = false;
break;
}
- if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) {
+ if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte,
+ walker->level))) {
rsvd_fault = true;
break;
}
- if (write_fault && !is_writable_pte(pte))
- if (user_fault || is_write_protection(vcpu))
- eperm = true;
+ if (unlikely(write_fault && !is_writable_pte(pte)
+ && (user_fault || is_write_protection(vcpu))))
+ eperm = true;
- if (user_fault && !(pte & PT_USER_MASK))
+ if (unlikely(user_fault && !(pte & PT_USER_MASK)))
eperm = true;
#if PTTYPE == 64
- if (fetch_fault && (pte & PT64_NX_MASK))
+ if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
eperm = true;
#endif
- if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) {
+ if (!eperm && !rsvd_fault
+ && unlikely(!(pte & PT_ACCESSED_MASK))) {
+ int ret;
trace_kvm_mmu_set_accessed_bit(table_gfn, index,
sizeof(pte));
- if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
- index, pte, pte|PT_ACCESSED_MASK))
+ ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
+ pte, pte|PT_ACCESSED_MASK);
+ if (unlikely(ret < 0)) {
+ present = false;
+ break;
+ } else if (ret)
goto walk;
+
mark_page_dirty(vcpu->kvm, table_gfn);
pte |= PT_ACCESSED_MASK;
}
@@ -241,17 +270,21 @@ walk:
--walker->level;
}
- if (!present || eperm || rsvd_fault)
+ if (unlikely(!present || eperm || rsvd_fault))
goto error;
- if (write_fault && !is_dirty_gpte(pte)) {
- bool ret;
+ if (write_fault && unlikely(!is_dirty_gpte(pte))) {
+ int ret;
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
- ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
- pte|PT_DIRTY_MASK);
- if (ret)
+ ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
+ pte, pte|PT_DIRTY_MASK);
+ if (unlikely(ret < 0)) {
+ present = false;
+ goto error;
+ } else if (ret)
goto walk;
+
mark_page_dirty(vcpu->kvm, table_gfn);
pte |= PT_DIRTY_MASK;
walker->ptes[walker->level - 1] = pte;
@@ -325,7 +358,7 @@ no_present:
}
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- u64 *spte, const void *pte, unsigned long mmu_seq)
+ u64 *spte, const void *pte)
{
pt_element_t gpte;
unsigned pte_access;
@@ -342,8 +375,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
kvm_release_pfn_clean(pfn);
return;
}
- if (mmu_notifier_retry(vcpu, mmu_seq))
- return;
/*
* we call mmu_set_spte() with host_writable = true because that
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6bb15d583e4..506e4fe23ad 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -63,6 +63,10 @@ MODULE_LICENSE("GPL");
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+#define TSC_RATIO_RSVD 0xffffff0000000000ULL
+#define TSC_RATIO_MIN 0x0000000000000001ULL
+#define TSC_RATIO_MAX 0x000000ffffffffffULL
+
static bool erratum_383_found __read_mostly;
static const u32 host_save_user_msrs[] = {
@@ -93,14 +97,6 @@ struct nested_state {
/* A VMEXIT is required but not yet emulated */
bool exit_required;
- /*
- * If we vmexit during an instruction emulation we need this to restore
- * the l1 guest rip after the emulation
- */
- unsigned long vmexit_rip;
- unsigned long vmexit_rsp;
- unsigned long vmexit_rax;
-
/* cache for intercepts of the guest */
u32 intercept_cr;
u32 intercept_dr;
@@ -144,8 +140,13 @@ struct vcpu_svm {
unsigned int3_injected;
unsigned long int3_rip;
u32 apf_reason;
+
+ u64 tsc_ratio;
};
+static DEFINE_PER_CPU(u64, current_tsc_ratio);
+#define TSC_RATIO_DEFAULT 0x0100000000ULL
+
#define MSR_INVALID 0xffffffffU
static struct svm_direct_access_msrs {
@@ -190,6 +191,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm);
static int nested_svm_vmexit(struct vcpu_svm *svm);
static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
+static u64 __scale_tsc(u64 ratio, u64 tsc);
enum {
VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
@@ -376,7 +378,6 @@ struct svm_cpu_data {
};
static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
-static uint32_t svm_features;
struct svm_init_data {
int cpu;
@@ -569,6 +570,10 @@ static int has_svm(void)
static void svm_hardware_disable(void *garbage)
{
+ /* Make sure we clean up behind us */
+ if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
+ wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
+
cpu_svm_disable();
}
@@ -610,6 +615,11 @@ static int svm_hardware_enable(void *garbage)
wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
+ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+ wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
+ __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
+ }
+
svm_init_erratum_383();
return 0;
@@ -791,6 +801,23 @@ static __init int svm_hardware_setup(void)
if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
kvm_enable_efer_bits(EFER_FFXSR);
+ if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+ u64 max;
+
+ kvm_has_tsc_control = true;
+
+ /*
+ * Make sure the user can only configure tsc_khz values that
+ * fit into a signed integer.
+ * A min value is not calculated needed because it will always
+ * be 1 on all machines and a value of 0 is used to disable
+ * tsc-scaling for the vcpu.
+ */
+ max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
+
+ kvm_max_guest_tsc_khz = max;
+ }
+
if (nested) {
printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
@@ -802,8 +829,6 @@ static __init int svm_hardware_setup(void)
goto err;
}
- svm_features = cpuid_edx(SVM_CPUID_FUNC);
-
if (!boot_cpu_has(X86_FEATURE_NPT))
npt_enabled = false;
@@ -854,6 +879,64 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
seg->base = 0;
}
+static u64 __scale_tsc(u64 ratio, u64 tsc)
+{
+ u64 mult, frac, _tsc;
+
+ mult = ratio >> 32;
+ frac = ratio & ((1ULL << 32) - 1);
+
+ _tsc = tsc;
+ _tsc *= mult;
+ _tsc += (tsc >> 32) * frac;
+ _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
+
+ return _tsc;
+}
+
+static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ u64 _tsc = tsc;
+
+ if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
+ _tsc = __scale_tsc(svm->tsc_ratio, tsc);
+
+ return _tsc;
+}
+
+static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ u64 ratio;
+ u64 khz;
+
+ /* TSC scaling supported? */
+ if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR))
+ return;
+
+ /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */
+ if (user_tsc_khz == 0) {
+ vcpu->arch.virtual_tsc_khz = 0;
+ svm->tsc_ratio = TSC_RATIO_DEFAULT;
+ return;
+ }
+
+ khz = user_tsc_khz;
+
+ /* TSC scaling required - calculate ratio */
+ ratio = khz << 32;
+ do_div(ratio, tsc_khz);
+
+ if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
+ WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
+ user_tsc_khz);
+ return;
+ }
+ vcpu->arch.virtual_tsc_khz = user_tsc_khz;
+ svm->tsc_ratio = ratio;
+}
+
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -880,6 +963,15 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
+static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
+{
+ u64 tsc;
+
+ tsc = svm_scale_tsc(vcpu, native_read_tsc());
+
+ return target_tsc - tsc;
+}
+
static void init_vmcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -975,7 +1067,7 @@ static void init_vmcb(struct vcpu_svm *svm)
svm_set_efer(&svm->vcpu, 0);
save->dr6 = 0xffff0ff0;
save->dr7 = 0x400;
- save->rflags = 2;
+ kvm_set_rflags(&svm->vcpu, 2);
save->rip = 0x0000fff0;
svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
@@ -1048,6 +1140,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
goto out;
}
+ svm->tsc_ratio = TSC_RATIO_DEFAULT;
+
err = kvm_vcpu_init(&svm->vcpu, kvm, id);
if (err)
goto free_svm;
@@ -1141,6 +1235,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
+
+ if (static_cpu_has(X86_FEATURE_TSCRATEMSR) &&
+ svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) {
+ __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio;
+ wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio);
+ }
}
static void svm_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1365,31 +1465,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (is_guest_mode(vcpu)) {
- /*
- * We are here because we run in nested mode, the host kvm
- * intercepts cr0 writes but the l1 hypervisor does not.
- * But the L1 hypervisor may intercept selective cr0 writes.
- * This needs to be checked here.
- */
- unsigned long old, new;
-
- /* Remove bits that would trigger a real cr0 write intercept */
- old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK;
- new = cr0 & SVM_CR0_SELECTIVE_MASK;
-
- if (old == new) {
- /* cr0 write with ts and mp unchanged */
- svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
- if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) {
- svm->nested.vmexit_rip = kvm_rip_read(vcpu);
- svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
- svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
- return;
- }
- }
- }
-
#ifdef CONFIG_X86_64
if (vcpu->arch.efer & EFER_LME) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
@@ -2127,7 +2202,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
nested_vmcb->save.cr2 = vmcb->save.cr2;
nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
- nested_vmcb->save.rflags = vmcb->save.rflags;
+ nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
nested_vmcb->save.rip = vmcb->save.rip;
nested_vmcb->save.rsp = vmcb->save.rsp;
nested_vmcb->save.rax = vmcb->save.rax;
@@ -2184,7 +2259,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
svm->vmcb->save.ds = hsave->save.ds;
svm->vmcb->save.gdtr = hsave->save.gdtr;
svm->vmcb->save.idtr = hsave->save.idtr;
- svm->vmcb->save.rflags = hsave->save.rflags;
+ kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
svm_set_efer(&svm->vcpu, hsave->save.efer);
svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
svm_set_cr4(&svm->vcpu, hsave->save.cr4);
@@ -2312,7 +2387,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
hsave->save.efer = svm->vcpu.arch.efer;
hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
hsave->save.cr4 = svm->vcpu.arch.cr4;
- hsave->save.rflags = vmcb->save.rflags;
+ hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
hsave->save.rip = kvm_rip_read(&svm->vcpu);
hsave->save.rsp = vmcb->save.rsp;
hsave->save.rax = vmcb->save.rax;
@@ -2323,7 +2398,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
copy_vmcb_control_area(hsave, vmcb);
- if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
+ if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
svm->vcpu.arch.hflags |= HF_HIF_MASK;
else
svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
@@ -2341,7 +2416,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
svm->vmcb->save.ds = nested_vmcb->save.ds;
svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
svm->vmcb->save.idtr = nested_vmcb->save.idtr;
- svm->vmcb->save.rflags = nested_vmcb->save.rflags;
+ kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
@@ -2443,13 +2518,13 @@ static int vmload_interception(struct vcpu_svm *svm)
if (nested_svm_check_permissions(svm))
return 1;
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
-
nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
if (!nested_vmcb)
return 1;
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
nested_svm_unmap(page);
@@ -2464,13 +2539,13 @@ static int vmsave_interception(struct vcpu_svm *svm)
if (nested_svm_check_permissions(svm))
return 1;
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
-
nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
if (!nested_vmcb)
return 1;
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
nested_svm_unmap(page);
@@ -2676,6 +2751,29 @@ static int emulate_on_interception(struct vcpu_svm *svm)
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
}
+bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
+{
+ unsigned long cr0 = svm->vcpu.arch.cr0;
+ bool ret = false;
+ u64 intercept;
+
+ intercept = svm->nested.intercept;
+
+ if (!is_guest_mode(&svm->vcpu) ||
+ (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
+ return false;
+
+ cr0 &= ~SVM_CR0_SELECTIVE_MASK;
+ val &= ~SVM_CR0_SELECTIVE_MASK;
+
+ if (cr0 ^ val) {
+ svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
+ ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
+ }
+
+ return ret;
+}
+
#define CR_VALID (1ULL << 63)
static int cr_interception(struct vcpu_svm *svm)
@@ -2699,7 +2797,11 @@ static int cr_interception(struct vcpu_svm *svm)
val = kvm_register_read(&svm->vcpu, reg);
switch (cr) {
case 0:
- err = kvm_set_cr0(&svm->vcpu, val);
+ if (!check_selective_cr0_intercepted(svm, val))
+ err = kvm_set_cr0(&svm->vcpu, val);
+ else
+ return 1;
+
break;
case 3:
err = kvm_set_cr3(&svm->vcpu, val);
@@ -2744,23 +2846,6 @@ static int cr_interception(struct vcpu_svm *svm)
return 1;
}
-static int cr0_write_interception(struct vcpu_svm *svm)
-{
- struct kvm_vcpu *vcpu = &svm->vcpu;
- int r;
-
- r = cr_interception(svm);
-
- if (svm->nested.vmexit_rip) {
- kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip);
- kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp);
- kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax);
- svm->nested.vmexit_rip = 0;
- }
-
- return r;
-}
-
static int dr_interception(struct vcpu_svm *svm)
{
int reg, dr;
@@ -2813,7 +2898,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
case MSR_IA32_TSC: {
struct vmcb *vmcb = get_host_vmcb(svm);
- *data = vmcb->control.tsc_offset + native_read_tsc();
+ *data = vmcb->control.tsc_offset +
+ svm_scale_tsc(vcpu, native_read_tsc());
+
break;
}
case MSR_STAR:
@@ -3048,7 +3135,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_CR4] = cr_interception,
[SVM_EXIT_READ_CR8] = cr_interception,
[SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
- [SVM_EXIT_WRITE_CR0] = cr0_write_interception,
+ [SVM_EXIT_WRITE_CR0] = cr_interception,
[SVM_EXIT_WRITE_CR3] = cr_interception,
[SVM_EXIT_WRITE_CR4] = cr_interception,
[SVM_EXIT_WRITE_CR8] = cr8_write_interception,
@@ -3104,97 +3191,109 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_NPF] = pf_interception,
};
-void dump_vmcb(struct kvm_vcpu *vcpu)
+static void dump_vmcb(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
struct vmcb_save_area *save = &svm->vmcb->save;
pr_err("VMCB Control Area:\n");
- pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff);
- pr_err("cr_write: %04x\n", control->intercept_cr >> 16);
- pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff);
- pr_err("dr_write: %04x\n", control->intercept_dr >> 16);
- pr_err("exceptions: %08x\n", control->intercept_exceptions);
- pr_err("intercepts: %016llx\n", control->intercept);
- pr_err("pause filter count: %d\n", control->pause_filter_count);
- pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa);
- pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa);
- pr_err("tsc_offset: %016llx\n", control->tsc_offset);
- pr_err("asid: %d\n", control->asid);
- pr_err("tlb_ctl: %d\n", control->tlb_ctl);
- pr_err("int_ctl: %08x\n", control->int_ctl);
- pr_err("int_vector: %08x\n", control->int_vector);
- pr_err("int_state: %08x\n", control->int_state);
- pr_err("exit_code: %08x\n", control->exit_code);
- pr_err("exit_info1: %016llx\n", control->exit_info_1);
- pr_err("exit_info2: %016llx\n", control->exit_info_2);
- pr_err("exit_int_info: %08x\n", control->exit_int_info);
- pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err);
- pr_err("nested_ctl: %lld\n", control->nested_ctl);
- pr_err("nested_cr3: %016llx\n", control->nested_cr3);
- pr_err("event_inj: %08x\n", control->event_inj);
- pr_err("event_inj_err: %08x\n", control->event_inj_err);
- pr_err("lbr_ctl: %lld\n", control->lbr_ctl);
- pr_err("next_rip: %016llx\n", control->next_rip);
+ pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
+ pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
+ pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
+ pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
+ pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
+ pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
+ pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
+ pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
+ pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
+ pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
+ pr_err("%-20s%d\n", "asid:", control->asid);
+ pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
+ pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
+ pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
+ pr_err("%-20s%08x\n", "int_state:", control->int_state);
+ pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
+ pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
+ pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
+ pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
+ pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
+ pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
+ pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
+ pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
+ pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
+ pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
+ pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
pr_err("VMCB State Save Area:\n");
- pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n",
- save->es.selector, save->es.attrib,
- save->es.limit, save->es.base);
- pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n",
- save->cs.selector, save->cs.attrib,
- save->cs.limit, save->cs.base);
- pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n",
- save->ss.selector, save->ss.attrib,
- save->ss.limit, save->ss.base);
- pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n",
- save->ds.selector, save->ds.attrib,
- save->ds.limit, save->ds.base);
- pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n",
- save->fs.selector, save->fs.attrib,
- save->fs.limit, save->fs.base);
- pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n",
- save->gs.selector, save->gs.attrib,
- save->gs.limit, save->gs.base);
- pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n",
- save->gdtr.selector, save->gdtr.attrib,
- save->gdtr.limit, save->gdtr.base);
- pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n",
- save->ldtr.selector, save->ldtr.attrib,
- save->ldtr.limit, save->ldtr.base);
- pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n",
- save->idtr.selector, save->idtr.attrib,
- save->idtr.limit, save->idtr.base);
- pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n",
- save->tr.selector, save->tr.attrib,
- save->tr.limit, save->tr.base);
+ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+ "es:",
+ save->es.selector, save->es.attrib,
+ save->es.limit, save->es.base);
+ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+ "cs:",
+ save->cs.selector, save->cs.attrib,
+ save->cs.limit, save->cs.base);
+ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+ "ss:",
+ save->ss.selector, save->ss.attrib,
+ save->ss.limit, save->ss.base);
+ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+ "ds:",
+ save->ds.selector, save->ds.attrib,
+ save->ds.limit, save->ds.base);
+ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+ "fs:",
+ save->fs.selector, save->fs.attrib,
+ save->fs.limit, save->fs.base);
+ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+ "gs:",
+ save->gs.selector, save->gs.attrib,
+ save->gs.limit, save->gs.base);
+ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+ "gdtr:",
+ save->gdtr.selector, save->gdtr.attrib,
+ save->gdtr.limit, save->gdtr.base);
+ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+ "ldtr:",
+ save->ldtr.selector, save->ldtr.attrib,
+ save->ldtr.limit, save->ldtr.base);
+ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+ "idtr:",
+ save->idtr.selector, save->idtr.attrib,
+ save->idtr.limit, save->idtr.base);
+ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+ "tr:",
+ save->tr.selector, save->tr.attrib,
+ save->tr.limit, save->tr.base);
pr_err("cpl: %d efer: %016llx\n",
save->cpl, save->efer);
- pr_err("cr0: %016llx cr2: %016llx\n",
- save->cr0, save->cr2);
- pr_err("cr3: %016llx cr4: %016llx\n",
- save->cr3, save->cr4);
- pr_err("dr6: %016llx dr7: %016llx\n",
- save->dr6, save->dr7);
- pr_err("rip: %016llx rflags: %016llx\n",
- save->rip, save->rflags);
- pr_err("rsp: %016llx rax: %016llx\n",
- save->rsp, save->rax);
- pr_err("star: %016llx lstar: %016llx\n",
- save->star, save->lstar);
- pr_err("cstar: %016llx sfmask: %016llx\n",
- save->cstar, save->sfmask);
- pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n",
- save->kernel_gs_base, save->sysenter_cs);
- pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n",
- save->sysenter_esp, save->sysenter_eip);
- pr_err("gpat: %016llx dbgctl: %016llx\n",
- save->g_pat, save->dbgctl);
- pr_err("br_from: %016llx br_to: %016llx\n",
- save->br_from, save->br_to);
- pr_err("excp_from: %016llx excp_to: %016llx\n",
- save->last_excp_from, save->last_excp_to);
-
+ pr_err("%-15s %016llx %-13s %016llx\n",
+ "cr0:", save->cr0, "cr2:", save->cr2);
+ pr_err("%-15s %016llx %-13s %016llx\n",
+ "cr3:", save->cr3, "cr4:", save->cr4);
+ pr_err("%-15s %016llx %-13s %016llx\n",
+ "dr6:", save->dr6, "dr7:", save->dr7);
+ pr_err("%-15s %016llx %-13s %016llx\n",
+ "rip:", save->rip, "rflags:", save->rflags);
+ pr_err("%-15s %016llx %-13s %016llx\n",
+ "rsp:", save->rsp, "rax:", save->rax);
+ pr_err("%-15s %016llx %-13s %016llx\n",
+ "star:", save->star, "lstar:", save->lstar);
+ pr_err("%-15s %016llx %-13s %016llx\n",
+ "cstar:", save->cstar, "sfmask:", save->sfmask);
+ pr_err("%-15s %016llx %-13s %016llx\n",
+ "kernel_gs_base:", save->kernel_gs_base,
+ "sysenter_cs:", save->sysenter_cs);
+ pr_err("%-15s %016llx %-13s %016llx\n",
+ "sysenter_esp:", save->sysenter_esp,
+ "sysenter_eip:", save->sysenter_eip);
+ pr_err("%-15s %016llx %-13s %016llx\n",
+ "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
+ pr_err("%-15s %016llx %-13s %016llx\n",
+ "br_from:", save->br_from, "br_to:", save->br_to);
+ pr_err("%-15s %016llx %-13s %016llx\n",
+ "excp_from:", save->last_excp_from,
+ "excp_to:", save->last_excp_to);
}
static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
@@ -3384,7 +3483,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
return 0;
- ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
+ ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
if (is_guest_mode(vcpu))
return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
@@ -3871,6 +3970,186 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
update_cr0_intercept(svm);
}
+#define PRE_EX(exit) { .exit_code = (exit), \
+ .stage = X86_ICPT_PRE_EXCEPT, }
+#define POST_EX(exit) { .exit_code = (exit), \
+ .stage = X86_ICPT_POST_EXCEPT, }
+#define POST_MEM(exit) { .exit_code = (exit), \
+ .stage = X86_ICPT_POST_MEMACCESS, }
+
+static struct __x86_intercept {
+ u32 exit_code;
+ enum x86_intercept_stage stage;
+} x86_intercept_map[] = {
+ [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
+ [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0),
+ [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0),
+ [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0),
+ [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0),
+ [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
+ [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0),
+ [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ),
+ [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ),
+ [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE),
+ [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE),
+ [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ),
+ [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ),
+ [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE),
+ [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE),
+ [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN),
+ [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL),
+ [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD),
+ [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE),
+ [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI),
+ [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI),
+ [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT),
+ [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA),
+ [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP),
+ [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR),
+ [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT),
+ [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG),
+ [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD),
+ [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD),
+ [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR),
+ [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC),
+ [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR),
+ [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC),
+ [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID),
+ [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM),
+ [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE),
+ [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF),
+ [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF),
+ [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT),
+ [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET),
+ [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP),
+ [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT),
+ [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO),
+ [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
+ [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
+ [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
+};
+
+#undef PRE_EX
+#undef POST_EX
+#undef POST_MEM
+
+static int svm_check_intercept(struct kvm_vcpu *vcpu,
+ struct x86_instruction_info *info,
+ enum x86_intercept_stage stage)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ int vmexit, ret = X86EMUL_CONTINUE;
+ struct __x86_intercept icpt_info;
+ struct vmcb *vmcb = svm->vmcb;
+
+ if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
+ goto out;
+
+ icpt_info = x86_intercept_map[info->intercept];
+
+ if (stage != icpt_info.stage)
+ goto out;
+
+ switch (icpt_info.exit_code) {
+ case SVM_EXIT_READ_CR0:
+ if (info->intercept == x86_intercept_cr_read)
+ icpt_info.exit_code += info->modrm_reg;
+ break;
+ case SVM_EXIT_WRITE_CR0: {
+ unsigned long cr0, val;
+ u64 intercept;
+
+ if (info->intercept == x86_intercept_cr_write)
+ icpt_info.exit_code += info->modrm_reg;
+
+ if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0)
+ break;
+
+ intercept = svm->nested.intercept;
+
+ if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
+ break;
+
+ cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
+ val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
+
+ if (info->intercept == x86_intercept_lmsw) {
+ cr0 &= 0xfUL;
+ val &= 0xfUL;
+ /* lmsw can't clear PE - catch this here */
+ if (cr0 & X86_CR0_PE)
+ val |= X86_CR0_PE;
+ }
+
+ if (cr0 ^ val)
+ icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
+
+ break;
+ }
+ case SVM_EXIT_READ_DR0:
+ case SVM_EXIT_WRITE_DR0:
+ icpt_info.exit_code += info->modrm_reg;
+ break;
+ case SVM_EXIT_MSR:
+ if (info->intercept == x86_intercept_wrmsr)
+ vmcb->control.exit_info_1 = 1;
+ else
+ vmcb->control.exit_info_1 = 0;
+ break;
+ case SVM_EXIT_PAUSE:
+ /*
+ * We get this for NOP only, but pause
+ * is rep not, check this here
+ */
+ if (info->rep_prefix != REPE_PREFIX)
+ goto out;
+ case SVM_EXIT_IOIO: {
+ u64 exit_info;
+ u32 bytes;
+
+ exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16;
+
+ if (info->intercept == x86_intercept_in ||
+ info->intercept == x86_intercept_ins) {
+ exit_info |= SVM_IOIO_TYPE_MASK;
+ bytes = info->src_bytes;
+ } else {
+ bytes = info->dst_bytes;
+ }
+
+ if (info->intercept == x86_intercept_outs ||
+ info->intercept == x86_intercept_ins)
+ exit_info |= SVM_IOIO_STR_MASK;
+
+ if (info->rep_prefix)
+ exit_info |= SVM_IOIO_REP_MASK;
+
+ bytes = min(bytes, 4u);
+
+ exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
+
+ exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
+
+ vmcb->control.exit_info_1 = exit_info;
+ vmcb->control.exit_info_2 = info->next_rip;
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ vmcb->control.next_rip = info->next_rip;
+ vmcb->control.exit_code = icpt_info.exit_code;
+ vmexit = nested_svm_exit_handled(svm);
+
+ ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
+ : X86EMUL_CONTINUE;
+
+out:
+ return ret;
+}
+
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -3952,10 +4231,14 @@ static struct kvm_x86_ops svm_x86_ops = {
.has_wbinvd_exit = svm_has_wbinvd_exit,
+ .set_tsc_khz = svm_set_tsc_khz,
.write_tsc_offset = svm_write_tsc_offset,
.adjust_tsc_offset = svm_adjust_tsc_offset,
+ .compute_tsc_offset = svm_compute_tsc_offset,
.set_tdp_cr3 = set_tdp_cr3,
+
+ .check_intercept = svm_check_intercept,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5b4cdcbd154..4c3fa0f6746 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -128,8 +128,11 @@ struct vcpu_vmx {
unsigned long host_rsp;
int launched;
u8 fail;
+ u8 cpl;
+ bool nmi_known_unmasked;
u32 exit_intr_info;
u32 idt_vectoring_info;
+ ulong rflags;
struct shared_msr_entry *guest_msrs;
int nmsrs;
int save_nmsrs;
@@ -159,6 +162,10 @@ struct vcpu_vmx {
u32 ar;
} tr, es, ds, fs, gs;
} rmode;
+ struct {
+ u32 bitmask; /* 4 bits per segment (1 bit per field) */
+ struct kvm_save_segment seg[8];
+ } segment_cache;
int vpid;
bool emulation_required;
@@ -171,6 +178,15 @@ struct vcpu_vmx {
bool rdtscp_enabled;
};
+enum segment_cache_field {
+ SEG_FIELD_SEL = 0,
+ SEG_FIELD_BASE = 1,
+ SEG_FIELD_LIMIT = 2,
+ SEG_FIELD_AR = 3,
+
+ SEG_FIELD_NR = 4
+};
+
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_vmx, vcpu);
@@ -643,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
vmcs_writel(field, vmcs_readl(field) | mask);
}
+static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
+{
+ vmx->segment_cache.bitmask = 0;
+}
+
+static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
+ unsigned field)
+{
+ bool ret;
+ u32 mask = 1 << (seg * SEG_FIELD_NR + field);
+
+ if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
+ vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
+ vmx->segment_cache.bitmask = 0;
+ }
+ ret = vmx->segment_cache.bitmask & mask;
+ vmx->segment_cache.bitmask |= mask;
+ return ret;
+}
+
+static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
+{
+ u16 *p = &vmx->segment_cache.seg[seg].selector;
+
+ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
+ *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
+ return *p;
+}
+
+static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
+{
+ ulong *p = &vmx->segment_cache.seg[seg].base;
+
+ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
+ *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
+ return *p;
+}
+
+static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
+{
+ u32 *p = &vmx->segment_cache.seg[seg].limit;
+
+ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
+ *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
+ return *p;
+}
+
+static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
+{
+ u32 *p = &vmx->segment_cache.seg[seg].ar;
+
+ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
+ *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
+ return *p;
+}
+
static void update_exception_bitmap(struct kvm_vcpu *vcpu)
{
u32 eb;
@@ -970,17 +1042,24 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
{
unsigned long rflags, save_rflags;
- rflags = vmcs_readl(GUEST_RFLAGS);
- if (to_vmx(vcpu)->rmode.vm86_active) {
- rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
- save_rflags = to_vmx(vcpu)->rmode.save_rflags;
- rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
+ if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
+ __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
+ rflags = vmcs_readl(GUEST_RFLAGS);
+ if (to_vmx(vcpu)->rmode.vm86_active) {
+ rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
+ save_rflags = to_vmx(vcpu)->rmode.save_rflags;
+ rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
+ }
+ to_vmx(vcpu)->rflags = rflags;
}
- return rflags;
+ return to_vmx(vcpu)->rflags;
}
static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
+ __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
+ __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
+ to_vmx(vcpu)->rflags = rflags;
if (to_vmx(vcpu)->rmode.vm86_active) {
to_vmx(vcpu)->rmode.save_rflags = rflags;
rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
@@ -1053,7 +1132,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
}
if (vmx->rmode.vm86_active) {
- if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE)
+ int inc_eip = 0;
+ if (kvm_exception_is_soft(nr))
+ inc_eip = vcpu->arch.event_exit_inst_len;
+ if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
@@ -1151,6 +1233,16 @@ static u64 guest_read_tsc(void)
}
/*
+ * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
+ * ioctl. In this case the call-back should update internal vmx state to make
+ * the changes effective.
+ */
+static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
+{
+ /* Nothing to do here */
+}
+
+/*
* writes 'offset' into guest's timestamp counter offset register
*/
static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
@@ -1164,6 +1256,11 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
vmcs_write64(TSC_OFFSET, offset + adjustment);
}
+static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
+{
+ return target_tsc - native_read_tsc();
+}
+
/*
* Reads an msr value (of 'msr_index') into 'pdata'.
* Returns 0 on success, non-0 otherwise.
@@ -1243,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
break;
#ifdef CONFIG_X86_64
case MSR_FS_BASE:
+ vmx_segment_cache_clear(vmx);
vmcs_writel(GUEST_FS_BASE, data);
break;
case MSR_GS_BASE:
+ vmx_segment_cache_clear(vmx);
vmcs_writel(GUEST_GS_BASE, data);
break;
case MSR_KERNEL_GS_BASE:
@@ -1689,6 +1788,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
vmx->emulation_required = 1;
vmx->rmode.vm86_active = 0;
+ vmx_segment_cache_clear(vmx);
+
vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector);
vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
@@ -1712,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs);
fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs);
+ vmx_segment_cache_clear(vmx);
+
vmcs_write16(GUEST_SS_SELECTOR, 0);
vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
@@ -1775,6 +1878,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
}
+ vmx_segment_cache_clear(vmx);
+
vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
@@ -1851,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
{
u32 guest_tr_ar;
+ vmx_segment_cache_clear(to_vmx(vcpu));
+
guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
@@ -1998,6 +2105,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
vmcs_writel(CR0_READ_SHADOW, cr0);
vmcs_writel(GUEST_CR0, hw_cr0);
vcpu->arch.cr0 = cr0;
+ __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
}
static u64 construct_eptp(unsigned long root_hpa)
@@ -2053,7 +2161,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
struct kvm_save_segment *save;
u32 ar;
@@ -2075,13 +2182,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
var->limit = save->limit;
ar = save->ar;
if (seg == VCPU_SREG_TR
- || var->selector == vmcs_read16(sf->selector))
+ || var->selector == vmx_read_guest_seg_selector(vmx, seg))
goto use_saved_rmode_seg;
}
- var->base = vmcs_readl(sf->base);
- var->limit = vmcs_read32(sf->limit);
- var->selector = vmcs_read16(sf->selector);
- ar = vmcs_read32(sf->ar_bytes);
+ var->base = vmx_read_guest_seg_base(vmx, seg);
+ var->limit = vmx_read_guest_seg_limit(vmx, seg);
+ var->selector = vmx_read_guest_seg_selector(vmx, seg);
+ ar = vmx_read_guest_seg_ar(vmx, seg);
use_saved_rmode_seg:
if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
ar = 0;
@@ -2098,27 +2205,37 @@ use_saved_rmode_seg:
static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
- struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
struct kvm_segment s;
if (to_vmx(vcpu)->rmode.vm86_active) {
vmx_get_segment(vcpu, &s, seg);
return s.base;
}
- return vmcs_readl(sf->base);
+ return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
}
-static int vmx_get_cpl(struct kvm_vcpu *vcpu)
+static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
{
if (!is_protmode(vcpu))
return 0;
- if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
+ if (!is_long_mode(vcpu)
+ && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
return 3;
- return vmcs_read16(GUEST_CS_SELECTOR) & 3;
+ return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3;
}
+static int vmx_get_cpl(struct kvm_vcpu *vcpu)
+{
+ if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
+ __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
+ to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu);
+ }
+ return to_vmx(vcpu)->cpl;
+}
+
+
static u32 vmx_segment_access_rights(struct kvm_segment *var)
{
u32 ar;
@@ -2148,6 +2265,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
u32 ar;
+ vmx_segment_cache_clear(vmx);
+
if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
vmcs_write16(sf->selector, var->selector);
vmx->rmode.tr.selector = var->selector;
@@ -2184,11 +2303,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
ar |= 0x1; /* Accessed */
vmcs_write32(sf->ar_bytes, ar);
+ __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
}
static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
{
- u32 ar = vmcs_read32(GUEST_CS_AR_BYTES);
+ u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
*db = (ar >> 14) & 1;
*l = (ar >> 13) & 1;
@@ -2775,6 +2895,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
if (ret != 0)
goto out;
+ vmx_segment_cache_clear(vmx);
+
seg_setup(VCPU_SREG_CS);
/*
* GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
@@ -2904,7 +3026,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
++vcpu->stat.irq_injections;
if (vmx->rmode.vm86_active) {
- if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE)
+ int inc_eip = 0;
+ if (vcpu->arch.interrupt.soft)
+ inc_eip = vcpu->arch.event_exit_inst_len;
+ if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
@@ -2937,8 +3062,9 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
}
++vcpu->stat.nmi_injections;
+ vmx->nmi_known_unmasked = false;
if (vmx->rmode.vm86_active) {
- if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE)
+ if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
@@ -2961,6 +3087,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
{
if (!cpu_has_virtual_nmis())
return to_vmx(vcpu)->soft_vnmi_blocked;
+ if (to_vmx(vcpu)->nmi_known_unmasked)
+ return false;
return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
}
@@ -2974,6 +3102,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
vmx->vnmi_blocked_time = 0;
}
} else {
+ vmx->nmi_known_unmasked = !masked;
if (masked)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
@@ -3091,7 +3220,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
enum emulation_result er;
vect_info = vmx->idt_vectoring_info;
- intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ intr_info = vmx->exit_intr_info;
if (is_machine_check(intr_info))
return handle_machine_check(vcpu);
@@ -3122,7 +3251,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
}
error_code = 0;
- rip = kvm_rip_read(vcpu);
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
if (is_page_fault(intr_info)) {
@@ -3169,6 +3297,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.event_exit_inst_len =
vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ rip = kvm_rip_read(vcpu);
kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
kvm_run->debug.arch.exception = ex_no;
break;
@@ -3505,9 +3634,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
switch (type) {
case INTR_TYPE_NMI_INTR:
vcpu->arch.nmi_injected = false;
- if (cpu_has_virtual_nmis())
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
+ vmx_set_nmi_mask(vcpu, true);
break;
case INTR_TYPE_EXT_INTR:
case INTR_TYPE_SOFT_INTR:
@@ -3867,12 +3994,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
{
- u32 exit_intr_info = vmx->exit_intr_info;
+ u32 exit_intr_info;
+
+ if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
+ || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
+ return;
+
+ vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ exit_intr_info = vmx->exit_intr_info;
/* Handle machine checks before interrupts are enabled */
- if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
- || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI
- && is_machine_check(exit_intr_info)))
+ if (is_machine_check(exit_intr_info))
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
@@ -3886,7 +4018,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
{
- u32 exit_intr_info = vmx->exit_intr_info;
+ u32 exit_intr_info;
bool unblock_nmi;
u8 vector;
bool idtv_info_valid;
@@ -3894,6 +4026,13 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
if (cpu_has_virtual_nmis()) {
+ if (vmx->nmi_known_unmasked)
+ return;
+ /*
+ * Can't use vmx->exit_intr_info since we're not sure what
+ * the exit reason is.
+ */
+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
/*
@@ -3910,6 +4049,10 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
vector != DF_VECTOR && !idtv_info_valid)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
+ else
+ vmx->nmi_known_unmasked =
+ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+ & GUEST_INTR_STATE_NMI);
} else if (unlikely(vmx->soft_vnmi_blocked))
vmx->vnmi_blocked_time +=
ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
@@ -3946,8 +4089,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
* Clear bit "block by NMI" before VM entry if a NMI
* delivery faulted.
*/
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
+ vmx_set_nmi_mask(&vmx->vcpu, false);
break;
case INTR_TYPE_SOFT_EXCEPTION:
vmx->vcpu.arch.event_exit_inst_len =
@@ -4124,7 +4266,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
);
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
+ | (1 << VCPU_EXREG_RFLAGS)
+ | (1 << VCPU_EXREG_CPL)
| (1 << VCPU_EXREG_PDPTR)
+ | (1 << VCPU_EXREG_SEGMENTS)
| (1 << VCPU_EXREG_CR3));
vcpu->arch.regs_dirty = 0;
@@ -4134,7 +4279,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->launched = 1;
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
- vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
vmx_complete_atomic_exit(vmx);
vmx_recover_nmi_blocking(vmx);
@@ -4195,8 +4339,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
goto free_vcpu;
vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ err = -ENOMEM;
if (!vmx->guest_msrs) {
- err = -ENOMEM;
goto uninit_vcpu;
}
@@ -4215,7 +4359,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (err)
goto free_vmcs;
if (vm_need_virtualize_apic_accesses(kvm))
- if (alloc_apic_access_page(kvm) != 0)
+ err = alloc_apic_access_page(kvm);
+ if (err)
goto free_vmcs;
if (enable_ept) {
@@ -4368,6 +4513,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
{
}
+static int vmx_check_intercept(struct kvm_vcpu *vcpu,
+ struct x86_instruction_info *info,
+ enum x86_intercept_stage stage)
+{
+ return X86EMUL_CONTINUE;
+}
+
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -4449,10 +4601,14 @@ static struct kvm_x86_ops vmx_x86_ops = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
+ .set_tsc_khz = vmx_set_tsc_khz,
.write_tsc_offset = vmx_write_tsc_offset,
.adjust_tsc_offset = vmx_adjust_tsc_offset,
+ .compute_tsc_offset = vmx_compute_tsc_offset,
.set_tdp_cr3 = vmx_set_cr3,
+
+ .check_intercept = vmx_check_intercept,
};
static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 934b4c6b0bf..77c9d8673dc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -60,22 +60,12 @@
#include <asm/div64.h>
#define MAX_IO_MSRS 256
-#define CR0_RESERVED_BITS \
- (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
- | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
- | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
-#define CR4_RESERVED_BITS \
- (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
- | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
- | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
- | X86_CR4_OSXSAVE \
- | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
-
-#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
-
#define KVM_MAX_MCE_BANKS 32
#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
+#define emul_to_vcpu(ctxt) \
+ container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
+
/* EFER defaults:
* - enable syscall per default because its emulated by KVM
* - enable LME and LMA per default on 64 bit KVM
@@ -100,6 +90,11 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
int ignore_msrs = 0;
module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
+bool kvm_has_tsc_control;
+EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
+u32 kvm_max_guest_tsc_khz;
+EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
+
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
@@ -157,6 +152,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
u64 __read_mostly host_xcr0;
+int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
+
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
{
int i;
@@ -361,8 +358,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
void kvm_inject_nmi(struct kvm_vcpu *vcpu)
{
- kvm_make_request(KVM_REQ_NMI, vcpu);
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ vcpu->arch.nmi_pending = 1;
}
EXPORT_SYMBOL_GPL(kvm_inject_nmi);
@@ -982,7 +979,15 @@ static inline int kvm_tsc_changes_freq(void)
return ret;
}
-static inline u64 nsec_to_cycles(u64 nsec)
+static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.virtual_tsc_khz)
+ return vcpu->arch.virtual_tsc_khz;
+ else
+ return __this_cpu_read(cpu_tsc_khz);
+}
+
+static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
{
u64 ret;
@@ -990,25 +995,24 @@ static inline u64 nsec_to_cycles(u64 nsec)
if (kvm_tsc_changes_freq())
printk_once(KERN_WARNING
"kvm: unreliable cycle conversion on adjustable rate TSC\n");
- ret = nsec * __this_cpu_read(cpu_tsc_khz);
+ ret = nsec * vcpu_tsc_khz(vcpu);
do_div(ret, USEC_PER_SEC);
return ret;
}
-static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz)
+static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
{
/* Compute a scale to convert nanoseconds in TSC cycles */
kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
- &kvm->arch.virtual_tsc_shift,
- &kvm->arch.virtual_tsc_mult);
- kvm->arch.virtual_tsc_khz = this_tsc_khz;
+ &vcpu->arch.tsc_catchup_shift,
+ &vcpu->arch.tsc_catchup_mult);
}
static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
{
u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
- vcpu->kvm->arch.virtual_tsc_mult,
- vcpu->kvm->arch.virtual_tsc_shift);
+ vcpu->arch.tsc_catchup_mult,
+ vcpu->arch.tsc_catchup_shift);
tsc += vcpu->arch.last_tsc_write;
return tsc;
}
@@ -1021,7 +1025,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
s64 sdiff;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
- offset = data - native_read_tsc();
+ offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
ns = get_kernel_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;
sdiff = data - kvm->arch.last_tsc_write;
@@ -1037,13 +1041,13 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
* In that case, for a reliable TSC, we can match TSC offsets,
* or make a best guest using elapsed value.
*/
- if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) &&
+ if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) &&
elapsed < 5ULL * NSEC_PER_SEC) {
if (!check_tsc_unstable()) {
offset = kvm->arch.last_tsc_offset;
pr_debug("kvm: matched tsc offset for %llu\n", data);
} else {
- u64 delta = nsec_to_cycles(elapsed);
+ u64 delta = nsec_to_cycles(vcpu, elapsed);
offset += delta;
pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
}
@@ -1075,8 +1079,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
local_irq_save(flags);
kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
kernel_ns = get_kernel_ns();
- this_tsc_khz = __this_cpu_read(cpu_tsc_khz);
-
+ this_tsc_khz = vcpu_tsc_khz(v);
if (unlikely(this_tsc_khz == 0)) {
local_irq_restore(flags);
kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
@@ -1993,6 +1996,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_X86_ROBUST_SINGLESTEP:
case KVM_CAP_XSAVE:
case KVM_CAP_ASYNC_PF:
+ case KVM_CAP_GET_TSC_KHZ:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -2019,6 +2023,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_XCRS:
r = cpu_has_xsave;
break;
+ case KVM_CAP_TSC_CONTROL:
+ r = kvm_has_tsc_control;
+ break;
default:
r = 0;
break;
@@ -2120,8 +2127,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_x86_ops->vcpu_load(vcpu, cpu);
if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
/* Make sure TSC doesn't go backwards */
- s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
- native_read_tsc() - vcpu->arch.last_host_tsc;
+ s64 tsc_delta;
+ u64 tsc;
+
+ kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc);
+ tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
+ tsc - vcpu->arch.last_guest_tsc;
+
if (tsc_delta < 0)
mark_tsc_unstable("KVM discovered backwards TSC");
if (check_tsc_unstable()) {
@@ -2139,7 +2151,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
- vcpu->arch.last_host_tsc = native_read_tsc();
+ kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
}
static int is_efer_nx(void)
@@ -2324,6 +2336,12 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
+ /* cpuid 0xC0000001.edx */
+ const u32 kvm_supported_word5_x86_features =
+ F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
+ F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
+ F(PMM) | F(PMM_EN);
+
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
do_cpuid_1_ent(entry, function, index);
@@ -2418,6 +2436,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
(1 << KVM_FEATURE_NOP_IO_DELAY) |
(1 << KVM_FEATURE_CLOCKSOURCE2) |
+ (1 << KVM_FEATURE_ASYNC_PF) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
entry->ebx = 0;
entry->ecx = 0;
@@ -2432,6 +2451,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ecx &= kvm_supported_word6_x86_features;
cpuid_mask(&entry->ecx, 6);
break;
+ /*Add support for Centaur's CPUID instruction*/
+ case 0xC0000000:
+ /*Just support up to 0xC0000004 now*/
+ entry->eax = min(entry->eax, 0xC0000004);
+ break;
+ case 0xC0000001:
+ entry->edx &= kvm_supported_word5_x86_features;
+ cpuid_mask(&entry->edx, 5);
+ break;
+ case 0xC0000002:
+ case 0xC0000003:
+ case 0xC0000004:
+ /*Now nothing to do, reserved for the future*/
+ break;
}
kvm_x86_ops->set_supported_cpuid(function, entry);
@@ -2478,6 +2511,26 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
if (nent >= cpuid->nent)
goto out_free;
+ /* Add support for Centaur's CPUID instruction. */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) {
+ do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0,
+ &nent, cpuid->nent);
+
+ r = -E2BIG;
+ if (nent >= cpuid->nent)
+ goto out_free;
+
+ limit = cpuid_entries[nent - 1].eax;
+ for (func = 0xC0000001;
+ func <= limit && nent < cpuid->nent; ++func)
+ do_cpuid_ent(&cpuid_entries[nent], func, 0,
+ &nent, cpuid->nent);
+
+ r = -E2BIG;
+ if (nent >= cpuid->nent)
+ goto out_free;
+ }
+
do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent,
cpuid->nent);
@@ -3046,6 +3099,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
break;
}
+ case KVM_SET_TSC_KHZ: {
+ u32 user_tsc_khz;
+
+ r = -EINVAL;
+ if (!kvm_has_tsc_control)
+ break;
+
+ user_tsc_khz = (u32)arg;
+
+ if (user_tsc_khz >= kvm_max_guest_tsc_khz)
+ goto out;
+
+ kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz);
+
+ r = 0;
+ goto out;
+ }
+ case KVM_GET_TSC_KHZ: {
+ r = -EIO;
+ if (check_tsc_unstable())
+ goto out;
+
+ r = vcpu_tsc_khz(vcpu);
+
+ goto out;
+ }
default:
r = -EINVAL;
}
@@ -3595,20 +3674,43 @@ static void kvm_init_msr_list(void)
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
const void *v)
{
- if (vcpu->arch.apic &&
- !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
- return 0;
+ int handled = 0;
+ int n;
- return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
+ do {
+ n = min(len, 8);
+ if (!(vcpu->arch.apic &&
+ !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
+ && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+ break;
+ handled += n;
+ addr += n;
+ len -= n;
+ v += n;
+ } while (len);
+
+ return handled;
}
static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
{
- if (vcpu->arch.apic &&
- !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
- return 0;
+ int handled = 0;
+ int n;
+
+ do {
+ n = min(len, 8);
+ if (!(vcpu->arch.apic &&
+ !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
+ && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+ break;
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+ handled += n;
+ addr += n;
+ len -= n;
+ v += n;
+ } while (len);
- return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
+ return handled;
}
static void kvm_set_segment(struct kvm_vcpu *vcpu,
@@ -3703,37 +3805,43 @@ out:
}
/* used for instruction fetching */
-static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
- struct kvm_vcpu *vcpu,
+static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
+ gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
access | PFERR_FETCH_MASK,
exception);
}
-static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
- struct kvm_vcpu *vcpu,
+static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
+ gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
exception);
}
-static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
- struct kvm_vcpu *vcpu,
+static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
+ gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
}
-static int kvm_write_guest_virt_system(gva_t addr, void *val,
+static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
+ gva_t addr, void *val,
unsigned int bytes,
- struct kvm_vcpu *vcpu,
struct x86_exception *exception)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
void *data = val;
int r = X86EMUL_CONTINUE;
@@ -3761,13 +3869,15 @@ out:
return r;
}
-static int emulator_read_emulated(unsigned long addr,
+static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
+ unsigned long addr,
void *val,
unsigned int bytes,
- struct x86_exception *exception,
- struct kvm_vcpu *vcpu)
+ struct x86_exception *exception)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
gpa_t gpa;
+ int handled;
if (vcpu->mmio_read_completed) {
memcpy(val, vcpu->mmio_data, bytes);
@@ -3786,7 +3896,7 @@ static int emulator_read_emulated(unsigned long addr,
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto mmio;
- if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception)
+ if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception)
== X86EMUL_CONTINUE)
return X86EMUL_CONTINUE;
@@ -3794,18 +3904,24 @@ mmio:
/*
* Is this MMIO handled locally?
*/
- if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
+ handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
+
+ if (handled == bytes)
return X86EMUL_CONTINUE;
- }
+
+ gpa += handled;
+ bytes -= handled;
+ val += handled;
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
vcpu->mmio_needed = 1;
vcpu->run->exit_reason = KVM_EXIT_MMIO;
vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
- vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+ vcpu->mmio_size = bytes;
+ vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
+ vcpu->mmio_index = 0;
return X86EMUL_IO_NEEDED;
}
@@ -3829,6 +3945,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
struct kvm_vcpu *vcpu)
{
gpa_t gpa;
+ int handled;
gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
@@ -3847,25 +3964,35 @@ mmio:
/*
* Is this MMIO handled locally?
*/
- if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
+ handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
+ if (handled == bytes)
return X86EMUL_CONTINUE;
+ gpa += handled;
+ bytes -= handled;
+ val += handled;
+
vcpu->mmio_needed = 1;
+ memcpy(vcpu->mmio_data, val, bytes);
vcpu->run->exit_reason = KVM_EXIT_MMIO;
vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
- vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+ vcpu->mmio_size = bytes;
+ vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
- memcpy(vcpu->run->mmio.data, val, bytes);
+ memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+ vcpu->mmio_index = 0;
return X86EMUL_CONTINUE;
}
-int emulator_write_emulated(unsigned long addr,
+int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
+ unsigned long addr,
const void *val,
unsigned int bytes,
- struct x86_exception *exception,
- struct kvm_vcpu *vcpu)
+ struct x86_exception *exception)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
/* Crossing a page boundary? */
if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
int rc, now;
@@ -3893,13 +4020,14 @@ int emulator_write_emulated(unsigned long addr,
(cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
#endif
-static int emulator_cmpxchg_emulated(unsigned long addr,
+static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
+ unsigned long addr,
const void *old,
const void *new,
unsigned int bytes,
- struct x86_exception *exception,
- struct kvm_vcpu *vcpu)
+ struct x86_exception *exception)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
gpa_t gpa;
struct page *page;
char *kaddr;
@@ -3955,7 +4083,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
emul_write:
printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
- return emulator_write_emulated(addr, new, bytes, exception, vcpu);
+ return emulator_write_emulated(ctxt, addr, new, bytes, exception);
}
static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
@@ -3974,9 +4102,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
}
-static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
- unsigned int count, struct kvm_vcpu *vcpu)
+static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
+ int size, unsigned short port, void *val,
+ unsigned int count)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
if (vcpu->arch.pio.count)
goto data_avail;
@@ -4004,10 +4135,12 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
return 0;
}
-static int emulator_pio_out_emulated(int size, unsigned short port,
- const void *val, unsigned int count,
- struct kvm_vcpu *vcpu)
+static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
+ int size, unsigned short port,
+ const void *val, unsigned int count)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
trace_kvm_pio(1, port, size, count);
vcpu->arch.pio.port = port;
@@ -4037,10 +4170,9 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
return kvm_x86_ops->get_segment_base(vcpu, seg);
}
-int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
+static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
{
- kvm_mmu_invlpg(vcpu, address);
- return X86EMUL_CONTINUE;
+ kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
}
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
@@ -4062,22 +4194,20 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
-int emulate_clts(struct kvm_vcpu *vcpu)
+static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
{
- kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
- kvm_x86_ops->fpu_activate(vcpu);
- return X86EMUL_CONTINUE;
+ kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
}
-int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu)
+int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
{
- return _kvm_get_dr(vcpu, dr, dest);
+ return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
}
-int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu)
+int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
{
- return __kvm_set_dr(vcpu, dr, value);
+ return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
}
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
@@ -4085,8 +4215,9 @@ static u64 mk_cr_64(u64 curr_cr, u32 new_val)
return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
}
-static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
+static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
unsigned long value;
switch (cr) {
@@ -4113,8 +4244,9 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
return value;
}
-static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
+static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
int res = 0;
switch (cr) {
@@ -4141,33 +4273,45 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
return res;
}
-static int emulator_get_cpl(struct kvm_vcpu *vcpu)
+static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
+{
+ return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
+}
+
+static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
+{
+ kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
+}
+
+static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
{
- return kvm_x86_ops->get_cpl(vcpu);
+ kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
}
-static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
+static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
{
- kvm_x86_ops->get_gdt(vcpu, dt);
+ kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
}
-static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
+static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
{
- kvm_x86_ops->get_idt(vcpu, dt);
+ kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
}
-static unsigned long emulator_get_cached_segment_base(int seg,
- struct kvm_vcpu *vcpu)
+static unsigned long emulator_get_cached_segment_base(
+ struct x86_emulate_ctxt *ctxt, int seg)
{
- return get_segment_base(vcpu, seg);
+ return get_segment_base(emul_to_vcpu(ctxt), seg);
}
-static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3,
- int seg, struct kvm_vcpu *vcpu)
+static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
+ struct desc_struct *desc, u32 *base3,
+ int seg)
{
struct kvm_segment var;
- kvm_get_segment(vcpu, &var, seg);
+ kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
+ *selector = var.selector;
if (var.unusable)
return false;
@@ -4192,14 +4336,14 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3,
return true;
}
-static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3,
- int seg, struct kvm_vcpu *vcpu)
+static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
+ struct desc_struct *desc, u32 base3,
+ int seg)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
struct kvm_segment var;
- /* needed to preserve selector */
- kvm_get_segment(vcpu, &var, seg);
-
+ var.selector = selector;
var.base = get_desc_base(desc);
#ifdef CONFIG_X86_64
var.base |= ((u64)base3) << 32;
@@ -4223,22 +4367,44 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3,
return;
}
-static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu)
+static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
+ u32 msr_index, u64 *pdata)
{
- struct kvm_segment kvm_seg;
+ return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
+}
- kvm_get_segment(vcpu, &kvm_seg, seg);
- return kvm_seg.selector;
+static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
+ u32 msr_index, u64 data)
+{
+ return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
}
-static void emulator_set_segment_selector(u16 sel, int seg,
- struct kvm_vcpu *vcpu)
+static void emulator_halt(struct x86_emulate_ctxt *ctxt)
{
- struct kvm_segment kvm_seg;
+ emul_to_vcpu(ctxt)->arch.halt_request = 1;
+}
- kvm_get_segment(vcpu, &kvm_seg, seg);
- kvm_seg.selector = sel;
- kvm_set_segment(vcpu, &kvm_seg, seg);
+static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
+{
+ preempt_disable();
+ kvm_load_guest_fpu(emul_to_vcpu(ctxt));
+ /*
+ * CR0.TS may reference the host fpu state, not the guest fpu state,
+ * so it may be clear at this point.
+ */
+ clts();
+}
+
+static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
+{
+ preempt_enable();
+}
+
+static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
+ struct x86_instruction_info *info,
+ enum x86_intercept_stage stage)
+{
+ return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
}
static struct x86_emulate_ops emulate_ops = {
@@ -4248,22 +4414,29 @@ static struct x86_emulate_ops emulate_ops = {
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated,
+ .invlpg = emulator_invlpg,
.pio_in_emulated = emulator_pio_in_emulated,
.pio_out_emulated = emulator_pio_out_emulated,
- .get_cached_descriptor = emulator_get_cached_descriptor,
- .set_cached_descriptor = emulator_set_cached_descriptor,
- .get_segment_selector = emulator_get_segment_selector,
- .set_segment_selector = emulator_set_segment_selector,
+ .get_segment = emulator_get_segment,
+ .set_segment = emulator_set_segment,
.get_cached_segment_base = emulator_get_cached_segment_base,
.get_gdt = emulator_get_gdt,
.get_idt = emulator_get_idt,
+ .set_gdt = emulator_set_gdt,
+ .set_idt = emulator_set_idt,
.get_cr = emulator_get_cr,
.set_cr = emulator_set_cr,
.cpl = emulator_get_cpl,
.get_dr = emulator_get_dr,
.set_dr = emulator_set_dr,
- .set_msr = kvm_set_msr,
- .get_msr = kvm_get_msr,
+ .set_msr = emulator_set_msr,
+ .get_msr = emulator_get_msr,
+ .halt = emulator_halt,
+ .wbinvd = emulator_wbinvd,
+ .fix_hypercall = emulator_fix_hypercall,
+ .get_fpu = emulator_get_fpu,
+ .put_fpu = emulator_put_fpu,
+ .intercept = emulator_intercept,
};
static void cache_all_regs(struct kvm_vcpu *vcpu)
@@ -4305,12 +4478,17 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
int cs_db, cs_l;
+ /*
+ * TODO: fix emulate.c to use guest_read/write_register
+ * instead of direct ->regs accesses, can save hundred cycles
+ * on Intel for instructions that don't read/change RSP, for
+ * for example.
+ */
cache_all_regs(vcpu);
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
- vcpu->arch.emulate_ctxt.vcpu = vcpu;
- vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
+ vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
vcpu->arch.emulate_ctxt.mode =
(!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
@@ -4318,11 +4496,13 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
? X86EMUL_MODE_VM86 : cs_l
? X86EMUL_MODE_PROT64 : cs_db
? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+ vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu);
memset(c, 0, sizeof(struct decode_cache));
memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
+ vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
}
-int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq)
+int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
{
struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
int ret;
@@ -4331,7 +4511,8 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq)
vcpu->arch.emulate_ctxt.decode.op_bytes = 2;
vcpu->arch.emulate_ctxt.decode.ad_bytes = 2;
- vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip;
+ vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip +
+ inc_eip;
ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq);
if (ret != X86EMUL_CONTINUE)
@@ -4340,7 +4521,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq)
vcpu->arch.emulate_ctxt.eip = c->eip;
memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
- kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+ kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
if (irq == NMI_VECTOR)
vcpu->arch.nmi_pending = false;
@@ -4402,16 +4583,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
{
int r;
struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
+ bool writeback = true;
kvm_clear_exception_queue(vcpu);
- vcpu->arch.mmio_fault_cr2 = cr2;
- /*
- * TODO: fix emulate.c to use guest_read/write_register
- * instead of direct ->regs accesses, can save hundred cycles
- * on Intel for instructions that don't read/change RSP, for
- * for example.
- */
- cache_all_regs(vcpu);
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
init_emulate_ctxt(vcpu);
@@ -4442,13 +4616,19 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
return EMULATE_DONE;
}
- /* this is needed for vmware backdor interface to work since it
+ /* this is needed for vmware backdoor interface to work since it
changes registers values during IO operation */
- memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
+ if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
+ vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
+ memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
+ }
restart:
r = x86_emulate_insn(&vcpu->arch.emulate_ctxt);
+ if (r == EMULATION_INTERCEPTED)
+ return EMULATE_DONE;
+
if (r == EMULATION_FAILED) {
if (reexecute_instruction(vcpu, cr2))
return EMULATE_DONE;
@@ -4462,21 +4642,28 @@ restart:
} else if (vcpu->arch.pio.count) {
if (!vcpu->arch.pio.in)
vcpu->arch.pio.count = 0;
+ else
+ writeback = false;
r = EMULATE_DO_MMIO;
} else if (vcpu->mmio_needed) {
- if (vcpu->mmio_is_write)
- vcpu->mmio_needed = 0;
+ if (!vcpu->mmio_is_write)
+ writeback = false;
r = EMULATE_DO_MMIO;
} else if (r == EMULATION_RESTART)
goto restart;
else
r = EMULATE_DONE;
- toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility);
- kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
- memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
- kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
+ if (writeback) {
+ toggle_interruptibility(vcpu,
+ vcpu->arch.emulate_ctxt.interruptibility);
+ kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
+ vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
+ kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
+ } else
+ vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
return r;
}
@@ -4485,7 +4672,8 @@ EXPORT_SYMBOL_GPL(x86_emulate_instruction);
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
{
unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
- int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu);
+ int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
+ size, port, &val, 1);
/* do not return to emulator after return from userspace */
vcpu->arch.pio.count = 0;
return ret;
@@ -4879,8 +5067,9 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
-int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
+int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
char instruction[3];
unsigned long rip = kvm_rip_read(vcpu);
@@ -4893,21 +5082,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
kvm_x86_ops->patch_hypercall(vcpu, instruction);
- return emulator_write_emulated(rip, instruction, 3, NULL, vcpu);
-}
-
-void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
-{
- struct desc_ptr dt = { limit, base };
-
- kvm_x86_ops->set_gdt(vcpu, &dt);
-}
-
-void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
-{
- struct desc_ptr dt = { limit, base };
-
- kvm_x86_ops->set_idt(vcpu, &dt);
+ return emulator_write_emulated(&vcpu->arch.emulate_ctxt,
+ rip, instruction, 3, NULL);
}
static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
@@ -5170,6 +5346,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
int r;
+ bool nmi_pending;
bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
vcpu->run->request_interrupt_window;
@@ -5207,19 +5384,25 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
r = 1;
goto out;
}
- if (kvm_check_request(KVM_REQ_NMI, vcpu))
- vcpu->arch.nmi_pending = true;
}
r = kvm_mmu_reload(vcpu);
if (unlikely(r))
goto out;
+ /*
+ * An NMI can be injected between local nmi_pending read and
+ * vcpu->arch.nmi_pending read inside inject_pending_event().
+ * But in that case, KVM_REQ_EVENT will be set, which makes
+ * the race described above benign.
+ */
+ nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending);
+
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
inject_pending_event(vcpu);
/* enable NMI/IRQ window open exits if needed */
- if (vcpu->arch.nmi_pending)
+ if (nmi_pending)
kvm_x86_ops->enable_nmi_window(vcpu);
else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
kvm_x86_ops->enable_irq_window(vcpu);
@@ -5399,6 +5582,41 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return r;
}
+static int complete_mmio(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int r;
+
+ if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
+ return 1;
+
+ if (vcpu->mmio_needed) {
+ vcpu->mmio_needed = 0;
+ if (!vcpu->mmio_is_write)
+ memcpy(vcpu->mmio_data + vcpu->mmio_index,
+ run->mmio.data, 8);
+ vcpu->mmio_index += 8;
+ if (vcpu->mmio_index < vcpu->mmio_size) {
+ run->exit_reason = KVM_EXIT_MMIO;
+ run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
+ memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
+ run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
+ run->mmio.is_write = vcpu->mmio_is_write;
+ vcpu->mmio_needed = 1;
+ return 0;
+ }
+ if (vcpu->mmio_is_write)
+ return 1;
+ vcpu->mmio_read_completed = 1;
+ }
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ if (r != EMULATE_DONE)
+ return 0;
+ return 1;
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
@@ -5425,20 +5643,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
}
- if (vcpu->arch.pio.count || vcpu->mmio_needed) {
- if (vcpu->mmio_needed) {
- memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
- vcpu->mmio_read_completed = 1;
- vcpu->mmio_needed = 0;
- }
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
- if (r != EMULATE_DONE) {
- r = 0;
- goto out;
- }
- }
+ r = complete_mmio(vcpu);
+ if (r <= 0)
+ goto out;
+
if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
kvm_register_write(vcpu, VCPU_REGS_RAX,
kvm_run->hypercall.ret);
@@ -5455,6 +5663,18 @@ out:
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
+ /*
+ * We are here if userspace calls get_regs() in the middle of
+ * instruction emulation. Registers state needs to be copied
+ * back from emulation context to vcpu. Usrapace shouldn't do
+ * that usually, but some bad designed PV devices (vmware
+ * backdoor interface) need this to work
+ */
+ struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
+ memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
+ vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
+ }
regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -5482,6 +5702,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
+ vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
+
kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
@@ -5592,7 +5815,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
- kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+ kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
kvm_make_request(KVM_REQ_EVENT, vcpu);
return EMULATE_DONE;
}
@@ -5974,8 +6197,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
}
vcpu->arch.pio_data = page_address(page);
- if (!kvm->arch.virtual_tsc_khz)
- kvm_arch_set_tsc_khz(kvm, max_tsc_khz);
+ kvm_init_tsc_catchup(vcpu, max_tsc_khz);
r = kvm_mmu_create(vcpu);
if (r < 0)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index c600da830ce..e407ed3df81 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -77,7 +77,7 @@ static inline u32 bit(int bitno)
void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
-int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq);
+int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 20e3f8702d1..bcb394dfbb3 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -12,6 +12,7 @@
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
#include <linux/perf_event.h> /* perf_sw_event */
#include <linux/hugetlb.h> /* hstate_index_to_shift */
+#include <linux/prefetch.h> /* prefetchw */
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
new file mode 100644
index 00000000000..90568c33ddb
--- /dev/null
+++ b/arch/x86/net/Makefile
@@ -0,0 +1,4 @@
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
new file mode 100644
index 00000000000..66870223f8c
--- /dev/null
+++ b/arch/x86/net/bpf_jit.S
@@ -0,0 +1,140 @@
+/* bpf_jit.S : BPF JIT helper functions
+ *
+ * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+/*
+ * Calling convention :
+ * rdi : skb pointer
+ * esi : offset of byte(s) to fetch in skb (can be scratched)
+ * r8 : copy of skb->data
+ * r9d : hlen = skb->len - skb->data_len
+ */
+#define SKBDATA %r8
+
+sk_load_word_ind:
+ .globl sk_load_word_ind
+
+ add %ebx,%esi /* offset += X */
+# test %esi,%esi /* if (offset < 0) goto bpf_error; */
+ js bpf_error
+
+sk_load_word:
+ .globl sk_load_word
+
+ mov %r9d,%eax # hlen
+ sub %esi,%eax # hlen - offset
+ cmp $3,%eax
+ jle bpf_slow_path_word
+ mov (SKBDATA,%rsi),%eax
+ bswap %eax /* ntohl() */
+ ret
+
+
+sk_load_half_ind:
+ .globl sk_load_half_ind
+
+ add %ebx,%esi /* offset += X */
+ js bpf_error
+
+sk_load_half:
+ .globl sk_load_half
+
+ mov %r9d,%eax
+ sub %esi,%eax # hlen - offset
+ cmp $1,%eax
+ jle bpf_slow_path_half
+ movzwl (SKBDATA,%rsi),%eax
+ rol $8,%ax # ntohs()
+ ret
+
+sk_load_byte_ind:
+ .globl sk_load_byte_ind
+ add %ebx,%esi /* offset += X */
+ js bpf_error
+
+sk_load_byte:
+ .globl sk_load_byte
+
+ cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
+ jle bpf_slow_path_byte
+ movzbl (SKBDATA,%rsi),%eax
+ ret
+
+/**
+ * sk_load_byte_msh - BPF_S_LDX_B_MSH helper
+ *
+ * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
+ * Must preserve A accumulator (%eax)
+ * Inputs : %esi is the offset value, already known positive
+ */
+ENTRY(sk_load_byte_msh)
+ CFI_STARTPROC
+ cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
+ jle bpf_slow_path_byte_msh
+ movzbl (SKBDATA,%rsi),%ebx
+ and $15,%bl
+ shl $2,%bl
+ ret
+ CFI_ENDPROC
+ENDPROC(sk_load_byte_msh)
+
+bpf_error:
+# force a return 0 from jit handler
+ xor %eax,%eax
+ mov -8(%rbp),%rbx
+ leaveq
+ ret
+
+/* rsi contains offset and can be scratched */
+#define bpf_slow_path_common(LEN) \
+ push %rdi; /* save skb */ \
+ push %r9; \
+ push SKBDATA; \
+/* rsi already has offset */ \
+ mov $LEN,%ecx; /* len */ \
+ lea -12(%rbp),%rdx; \
+ call skb_copy_bits; \
+ test %eax,%eax; \
+ pop SKBDATA; \
+ pop %r9; \
+ pop %rdi
+
+
+bpf_slow_path_word:
+ bpf_slow_path_common(4)
+ js bpf_error
+ mov -12(%rbp),%eax
+ bswap %eax
+ ret
+
+bpf_slow_path_half:
+ bpf_slow_path_common(2)
+ js bpf_error
+ mov -12(%rbp),%ax
+ rol $8,%ax
+ movzwl %ax,%eax
+ ret
+
+bpf_slow_path_byte:
+ bpf_slow_path_common(1)
+ js bpf_error
+ movzbl -12(%rbp),%eax
+ ret
+
+bpf_slow_path_byte_msh:
+ xchg %eax,%ebx /* dont lose A , X is about to be scratched */
+ bpf_slow_path_common(1)
+ js bpf_error
+ movzbl -12(%rbp),%eax
+ and $15,%al
+ shl $2,%al
+ xchg %eax,%ebx
+ ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
new file mode 100644
index 00000000000..bfab3fa10ed
--- /dev/null
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -0,0 +1,654 @@
+/* bpf_jit_comp.c : BPF JIT compiler
+ *
+ * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+
+/*
+ * Conventions :
+ * EAX : BPF A accumulator
+ * EBX : BPF X accumulator
+ * RDI : pointer to skb (first argument given to JIT function)
+ * RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
+ * ECX,EDX,ESI : scratch registers
+ * r9d : skb->len - skb->data_len (headlen)
+ * r8 : skb->data
+ * -8(RBP) : saved RBX value
+ * -16(RBP)..-80(RBP) : BPF_MEMWORDS values
+ */
+int bpf_jit_enable __read_mostly;
+
+/*
+ * assembly code in arch/x86/net/bpf_jit.S
+ */
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+
+static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
+{
+ if (len == 1)
+ *ptr = bytes;
+ else if (len == 2)
+ *(u16 *)ptr = bytes;
+ else {
+ *(u32 *)ptr = bytes;
+ barrier();
+ }
+ return ptr + len;
+}
+
+#define EMIT(bytes, len) do { prog = emit_code(prog, bytes, len); } while (0)
+
+#define EMIT1(b1) EMIT(b1, 1)
+#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
+#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
+#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+#define EMIT1_off32(b1, off) do { EMIT1(b1); EMIT(off, 4);} while (0)
+
+#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */
+#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */
+
+static inline bool is_imm8(int value)
+{
+ return value <= 127 && value >= -128;
+}
+
+static inline bool is_near(int offset)
+{
+ return offset <= 127 && offset >= -128;
+}
+
+#define EMIT_JMP(offset) \
+do { \
+ if (offset) { \
+ if (is_near(offset)) \
+ EMIT2(0xeb, offset); /* jmp .+off8 */ \
+ else \
+ EMIT1_off32(0xe9, offset); /* jmp .+off32 */ \
+ } \
+} while (0)
+
+/* list of x86 cond jumps opcodes (. + s8)
+ * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
+ */
+#define X86_JB 0x72
+#define X86_JAE 0x73
+#define X86_JE 0x74
+#define X86_JNE 0x75
+#define X86_JBE 0x76
+#define X86_JA 0x77
+
+#define EMIT_COND_JMP(op, offset) \
+do { \
+ if (is_near(offset)) \
+ EMIT2(op, offset); /* jxx .+off8 */ \
+ else { \
+ EMIT2(0x0f, op + 0x10); \
+ EMIT(offset, 4); /* jxx .+off32 */ \
+ } \
+} while (0)
+
+#define COND_SEL(CODE, TOP, FOP) \
+ case CODE: \
+ t_op = TOP; \
+ f_op = FOP; \
+ goto cond_branch
+
+
+#define SEEN_DATAREF 1 /* might call external helpers */
+#define SEEN_XREG 2 /* ebx is used */
+#define SEEN_MEM 4 /* use mem[] for temporary storage */
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+ mm_segment_t old_fs = get_fs();
+
+ set_fs(KERNEL_DS);
+ smp_wmb();
+ flush_icache_range((unsigned long)start, (unsigned long)end);
+ set_fs(old_fs);
+}
+
+
+void bpf_jit_compile(struct sk_filter *fp)
+{
+ u8 temp[64];
+ u8 *prog;
+ unsigned int proglen, oldproglen = 0;
+ int ilen, i;
+ int t_offset, f_offset;
+ u8 t_op, f_op, seen = 0, pass;
+ u8 *image = NULL;
+ u8 *func;
+ int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
+ unsigned int cleanup_addr; /* epilogue code offset */
+ unsigned int *addrs;
+ const struct sock_filter *filter = fp->insns;
+ int flen = fp->len;
+
+ if (!bpf_jit_enable)
+ return;
+
+ addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
+ if (addrs == NULL)
+ return;
+
+ /* Before first pass, make a rough estimation of addrs[]
+ * each bpf instruction is translated to less than 64 bytes
+ */
+ for (proglen = 0, i = 0; i < flen; i++) {
+ proglen += 64;
+ addrs[i] = proglen;
+ }
+ cleanup_addr = proglen; /* epilogue address */
+
+ for (pass = 0; pass < 10; pass++) {
+ /* no prologue/epilogue for trivial filters (RET something) */
+ proglen = 0;
+ prog = temp;
+
+ if (seen) {
+ EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
+ EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */
+ /* note : must save %rbx in case bpf_error is hit */
+ if (seen & (SEEN_XREG | SEEN_DATAREF))
+ EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
+ if (seen & SEEN_XREG)
+ CLEAR_X(); /* make sure we dont leek kernel memory */
+
+ /*
+ * If this filter needs to access skb data,
+ * loads r9 and r8 with :
+ * r9 = skb->len - skb->data_len
+ * r8 = skb->data
+ */
+ if (seen & SEEN_DATAREF) {
+ if (offsetof(struct sk_buff, len) <= 127)
+ /* mov off8(%rdi),%r9d */
+ EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
+ else {
+ /* mov off32(%rdi),%r9d */
+ EMIT3(0x44, 0x8b, 0x8f);
+ EMIT(offsetof(struct sk_buff, len), 4);
+ }
+ if (is_imm8(offsetof(struct sk_buff, data_len)))
+ /* sub off8(%rdi),%r9d */
+ EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len));
+ else {
+ EMIT3(0x44, 0x2b, 0x8f);
+ EMIT(offsetof(struct sk_buff, data_len), 4);
+ }
+
+ if (is_imm8(offsetof(struct sk_buff, data)))
+ /* mov off8(%rdi),%r8 */
+ EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data));
+ else {
+ /* mov off32(%rdi),%r8 */
+ EMIT3(0x4c, 0x8b, 0x87);
+ EMIT(offsetof(struct sk_buff, data), 4);
+ }
+ }
+ }
+
+ switch (filter[0].code) {
+ case BPF_S_RET_K:
+ case BPF_S_LD_W_LEN:
+ case BPF_S_ANC_PROTOCOL:
+ case BPF_S_ANC_IFINDEX:
+ case BPF_S_ANC_MARK:
+ case BPF_S_ANC_RXHASH:
+ case BPF_S_ANC_CPU:
+ case BPF_S_ANC_QUEUE:
+ case BPF_S_LD_W_ABS:
+ case BPF_S_LD_H_ABS:
+ case BPF_S_LD_B_ABS:
+ /* first instruction sets A register (or is RET 'constant') */
+ break;
+ default:
+ /* make sure we dont leak kernel information to user */
+ CLEAR_A(); /* A = 0 */
+ }
+
+ for (i = 0; i < flen; i++) {
+ unsigned int K = filter[i].k;
+
+ switch (filter[i].code) {
+ case BPF_S_ALU_ADD_X: /* A += X; */
+ seen |= SEEN_XREG;
+ EMIT2(0x01, 0xd8); /* add %ebx,%eax */
+ break;
+ case BPF_S_ALU_ADD_K: /* A += K; */
+ if (!K)
+ break;
+ if (is_imm8(K))
+ EMIT3(0x83, 0xc0, K); /* add imm8,%eax */
+ else
+ EMIT1_off32(0x05, K); /* add imm32,%eax */
+ break;
+ case BPF_S_ALU_SUB_X: /* A -= X; */
+ seen |= SEEN_XREG;
+ EMIT2(0x29, 0xd8); /* sub %ebx,%eax */
+ break;
+ case BPF_S_ALU_SUB_K: /* A -= K */
+ if (!K)
+ break;
+ if (is_imm8(K))
+ EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */
+ else
+ EMIT1_off32(0x2d, K); /* sub imm32,%eax */
+ break;
+ case BPF_S_ALU_MUL_X: /* A *= X; */
+ seen |= SEEN_XREG;
+ EMIT3(0x0f, 0xaf, 0xc3); /* imul %ebx,%eax */
+ break;
+ case BPF_S_ALU_MUL_K: /* A *= K */
+ if (is_imm8(K))
+ EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */
+ else {
+ EMIT2(0x69, 0xc0); /* imul imm32,%eax */
+ EMIT(K, 4);
+ }
+ break;
+ case BPF_S_ALU_DIV_X: /* A /= X; */
+ seen |= SEEN_XREG;
+ EMIT2(0x85, 0xdb); /* test %ebx,%ebx */
+ if (pc_ret0 != -1)
+ EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4));
+ else {
+ EMIT_COND_JMP(X86_JNE, 2 + 5);
+ CLEAR_A();
+ EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
+ }
+ EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
+ break;
+ case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
+ EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */
+ EMIT(K, 4);
+ EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */
+ break;
+ case BPF_S_ALU_AND_X:
+ seen |= SEEN_XREG;
+ EMIT2(0x21, 0xd8); /* and %ebx,%eax */
+ break;
+ case BPF_S_ALU_AND_K:
+ if (K >= 0xFFFFFF00) {
+ EMIT2(0x24, K & 0xFF); /* and imm8,%al */
+ } else if (K >= 0xFFFF0000) {
+ EMIT2(0x66, 0x25); /* and imm16,%ax */
+ EMIT2(K, 2);
+ } else {
+ EMIT1_off32(0x25, K); /* and imm32,%eax */
+ }
+ break;
+ case BPF_S_ALU_OR_X:
+ seen |= SEEN_XREG;
+ EMIT2(0x09, 0xd8); /* or %ebx,%eax */
+ break;
+ case BPF_S_ALU_OR_K:
+ if (is_imm8(K))
+ EMIT3(0x83, 0xc8, K); /* or imm8,%eax */
+ else
+ EMIT1_off32(0x0d, K); /* or imm32,%eax */
+ break;
+ case BPF_S_ALU_LSH_X: /* A <<= X; */
+ seen |= SEEN_XREG;
+ EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */
+ break;
+ case BPF_S_ALU_LSH_K:
+ if (K == 0)
+ break;
+ else if (K == 1)
+ EMIT2(0xd1, 0xe0); /* shl %eax */
+ else
+ EMIT3(0xc1, 0xe0, K);
+ break;
+ case BPF_S_ALU_RSH_X: /* A >>= X; */
+ seen |= SEEN_XREG;
+ EMIT4(0x89, 0xd9, 0xd3, 0xe8); /* mov %ebx,%ecx; shr %cl,%eax */
+ break;
+ case BPF_S_ALU_RSH_K: /* A >>= K; */
+ if (K == 0)
+ break;
+ else if (K == 1)
+ EMIT2(0xd1, 0xe8); /* shr %eax */
+ else
+ EMIT3(0xc1, 0xe8, K);
+ break;
+ case BPF_S_ALU_NEG:
+ EMIT2(0xf7, 0xd8); /* neg %eax */
+ break;
+ case BPF_S_RET_K:
+ if (!K) {
+ if (pc_ret0 == -1)
+ pc_ret0 = i;
+ CLEAR_A();
+ } else {
+ EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
+ }
+ /* fallinto */
+ case BPF_S_RET_A:
+ if (seen) {
+ if (i != flen - 1) {
+ EMIT_JMP(cleanup_addr - addrs[i]);
+ break;
+ }
+ if (seen & SEEN_XREG)
+ EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */
+ EMIT1(0xc9); /* leaveq */
+ }
+ EMIT1(0xc3); /* ret */
+ break;
+ case BPF_S_MISC_TAX: /* X = A */
+ seen |= SEEN_XREG;
+ EMIT2(0x89, 0xc3); /* mov %eax,%ebx */
+ break;
+ case BPF_S_MISC_TXA: /* A = X */
+ seen |= SEEN_XREG;
+ EMIT2(0x89, 0xd8); /* mov %ebx,%eax */
+ break;
+ case BPF_S_LD_IMM: /* A = K */
+ if (!K)
+ CLEAR_A();
+ else
+ EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
+ break;
+ case BPF_S_LDX_IMM: /* X = K */
+ seen |= SEEN_XREG;
+ if (!K)
+ CLEAR_X();
+ else
+ EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */
+ break;
+ case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */
+ seen |= SEEN_MEM;
+ EMIT3(0x8b, 0x45, 0xf0 - K*4);
+ break;
+ case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */
+ seen |= SEEN_XREG | SEEN_MEM;
+ EMIT3(0x8b, 0x5d, 0xf0 - K*4);
+ break;
+ case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */
+ seen |= SEEN_MEM;
+ EMIT3(0x89, 0x45, 0xf0 - K*4);
+ break;
+ case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
+ seen |= SEEN_XREG | SEEN_MEM;
+ EMIT3(0x89, 0x5d, 0xf0 - K*4);
+ break;
+ case BPF_S_LD_W_LEN: /* A = skb->len; */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+ if (is_imm8(offsetof(struct sk_buff, len)))
+ /* mov off8(%rdi),%eax */
+ EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len));
+ else {
+ EMIT2(0x8b, 0x87);
+ EMIT(offsetof(struct sk_buff, len), 4);
+ }
+ break;
+ case BPF_S_LDX_W_LEN: /* X = skb->len; */
+ seen |= SEEN_XREG;
+ if (is_imm8(offsetof(struct sk_buff, len)))
+ /* mov off8(%rdi),%ebx */
+ EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len));
+ else {
+ EMIT2(0x8b, 0x9f);
+ EMIT(offsetof(struct sk_buff, len), 4);
+ }
+ break;
+ case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+ if (is_imm8(offsetof(struct sk_buff, protocol))) {
+ /* movzwl off8(%rdi),%eax */
+ EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol));
+ } else {
+ EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
+ EMIT(offsetof(struct sk_buff, protocol), 4);
+ }
+ EMIT2(0x86, 0xc4); /* ntohs() : xchg %al,%ah */
+ break;
+ case BPF_S_ANC_IFINDEX:
+ if (is_imm8(offsetof(struct sk_buff, dev))) {
+ /* movq off8(%rdi),%rax */
+ EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev));
+ } else {
+ EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */
+ EMIT(offsetof(struct sk_buff, dev), 4);
+ }
+ EMIT3(0x48, 0x85, 0xc0); /* test %rax,%rax */
+ EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6));
+ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+ EMIT2(0x8b, 0x80); /* mov off32(%rax),%eax */
+ EMIT(offsetof(struct net_device, ifindex), 4);
+ break;
+ case BPF_S_ANC_MARK:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+ if (is_imm8(offsetof(struct sk_buff, mark))) {
+ /* mov off8(%rdi),%eax */
+ EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark));
+ } else {
+ EMIT2(0x8b, 0x87);
+ EMIT(offsetof(struct sk_buff, mark), 4);
+ }
+ break;
+ case BPF_S_ANC_RXHASH:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
+ if (is_imm8(offsetof(struct sk_buff, rxhash))) {
+ /* mov off8(%rdi),%eax */
+ EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash));
+ } else {
+ EMIT2(0x8b, 0x87);
+ EMIT(offsetof(struct sk_buff, rxhash), 4);
+ }
+ break;
+ case BPF_S_ANC_QUEUE:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+ if (is_imm8(offsetof(struct sk_buff, queue_mapping))) {
+ /* movzwl off8(%rdi),%eax */
+ EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping));
+ } else {
+ EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
+ EMIT(offsetof(struct sk_buff, queue_mapping), 4);
+ }
+ break;
+ case BPF_S_ANC_CPU:
+#ifdef CONFIG_SMP
+ EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */
+ EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */
+#else
+ CLEAR_A();
+#endif
+ break;
+ case BPF_S_LD_W_ABS:
+ func = sk_load_word;
+common_load: seen |= SEEN_DATAREF;
+ if ((int)K < 0)
+ goto out;
+ t_offset = func - (image + addrs[i]);
+ EMIT1_off32(0xbe, K); /* mov imm32,%esi */
+ EMIT1_off32(0xe8, t_offset); /* call */
+ break;
+ case BPF_S_LD_H_ABS:
+ func = sk_load_half;
+ goto common_load;
+ case BPF_S_LD_B_ABS:
+ func = sk_load_byte;
+ goto common_load;
+ case BPF_S_LDX_B_MSH:
+ if ((int)K < 0) {
+ if (pc_ret0 != -1) {
+ EMIT_JMP(addrs[pc_ret0] - addrs[i]);
+ break;
+ }
+ CLEAR_A();
+ EMIT_JMP(cleanup_addr - addrs[i]);
+ break;
+ }
+ seen |= SEEN_DATAREF | SEEN_XREG;
+ t_offset = sk_load_byte_msh - (image + addrs[i]);
+ EMIT1_off32(0xbe, K); /* mov imm32,%esi */
+ EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
+ break;
+ case BPF_S_LD_W_IND:
+ func = sk_load_word_ind;
+common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
+ t_offset = func - (image + addrs[i]);
+ EMIT1_off32(0xbe, K); /* mov imm32,%esi */
+ EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
+ break;
+ case BPF_S_LD_H_IND:
+ func = sk_load_half_ind;
+ goto common_load_ind;
+ case BPF_S_LD_B_IND:
+ func = sk_load_byte_ind;
+ goto common_load_ind;
+ case BPF_S_JMP_JA:
+ t_offset = addrs[i + K] - addrs[i];
+ EMIT_JMP(t_offset);
+ break;
+ COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE);
+ COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB);
+ COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE);
+ COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE);
+ COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE);
+ COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB);
+ COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE);
+ COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE);
+
+cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
+ t_offset = addrs[i + filter[i].jt] - addrs[i];
+
+ /* same targets, can avoid doing the test :) */
+ if (filter[i].jt == filter[i].jf) {
+ EMIT_JMP(t_offset);
+ break;
+ }
+
+ switch (filter[i].code) {
+ case BPF_S_JMP_JGT_X:
+ case BPF_S_JMP_JGE_X:
+ case BPF_S_JMP_JEQ_X:
+ seen |= SEEN_XREG;
+ EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */
+ break;
+ case BPF_S_JMP_JSET_X:
+ seen |= SEEN_XREG;
+ EMIT2(0x85, 0xd8); /* test %ebx,%eax */
+ break;
+ case BPF_S_JMP_JEQ_K:
+ if (K == 0) {
+ EMIT2(0x85, 0xc0); /* test %eax,%eax */
+ break;
+ }
+ case BPF_S_JMP_JGT_K:
+ case BPF_S_JMP_JGE_K:
+ if (K <= 127)
+ EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */
+ else
+ EMIT1_off32(0x3d, K); /* cmp imm32,%eax */
+ break;
+ case BPF_S_JMP_JSET_K:
+ if (K <= 0xFF)
+ EMIT2(0xa8, K); /* test imm8,%al */
+ else if (!(K & 0xFFFF00FF))
+ EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */
+ else if (K <= 0xFFFF) {
+ EMIT2(0x66, 0xa9); /* test imm16,%ax */
+ EMIT(K, 2);
+ } else {
+ EMIT1_off32(0xa9, K); /* test imm32,%eax */
+ }
+ break;
+ }
+ if (filter[i].jt != 0) {
+ if (filter[i].jf)
+ t_offset += is_near(f_offset) ? 2 : 6;
+ EMIT_COND_JMP(t_op, t_offset);
+ if (filter[i].jf)
+ EMIT_JMP(f_offset);
+ break;
+ }
+ EMIT_COND_JMP(f_op, f_offset);
+ break;
+ default:
+ /* hmm, too complex filter, give up with jit compiler */
+ goto out;
+ }
+ ilen = prog - temp;
+ if (image) {
+ if (unlikely(proglen + ilen > oldproglen)) {
+ pr_err("bpb_jit_compile fatal error\n");
+ kfree(addrs);
+ module_free(NULL, image);
+ return;
+ }
+ memcpy(image + proglen, temp, ilen);
+ }
+ proglen += ilen;
+ addrs[i] = proglen;
+ prog = temp;
+ }
+ /* last bpf instruction is always a RET :
+ * use it to give the cleanup instruction(s) addr
+ */
+ cleanup_addr = proglen - 1; /* ret */
+ if (seen)
+ cleanup_addr -= 1; /* leaveq */
+ if (seen & SEEN_XREG)
+ cleanup_addr -= 4; /* mov -8(%rbp),%rbx */
+
+ if (image) {
+ WARN_ON(proglen != oldproglen);
+ break;
+ }
+ if (proglen == oldproglen) {
+ image = module_alloc(max_t(unsigned int,
+ proglen,
+ sizeof(struct work_struct)));
+ if (!image)
+ goto out;
+ }
+ oldproglen = proglen;
+ }
+ if (bpf_jit_enable > 1)
+ pr_err("flen=%d proglen=%u pass=%d image=%p\n",
+ flen, proglen, pass, image);
+
+ if (image) {
+ if (bpf_jit_enable > 1)
+ print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS,
+ 16, 1, image, proglen, false);
+
+ bpf_flush_icache(image, image + proglen);
+
+ fp->bpf_func = (void *)image;
+ }
+out:
+ kfree(addrs);
+ return;
+}
+
+static void jit_free_defer(struct work_struct *arg)
+{
+ module_free(NULL, arg);
+}
+
+/* run from softirq, we must use a work_struct to call
+ * module_free() from process context
+ */
+void bpf_jit_free(struct sk_filter *fp)
+{
+ if (fp->bpf_func != sk_run_filter) {
+ struct work_struct *work = (struct work_struct *)fp->bpf_func;
+
+ INIT_WORK(work, jit_free_defer);
+ schedule_work(work);
+ }
+}