From 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 Apr 2005 15:20:36 -0700 Subject: Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! --- arch/ppc64/Kconfig | 423 +++ arch/ppc64/Kconfig.debug | 57 + arch/ppc64/Makefile | 122 + arch/ppc64/boot/Makefile | 117 + arch/ppc64/boot/README | 11 + arch/ppc64/boot/addRamDisk.c | 300 ++ arch/ppc64/boot/addnote.c | 165 ++ arch/ppc64/boot/crt0.S | 48 + arch/ppc64/boot/div64.S | 58 + arch/ppc64/boot/install.sh | 42 + arch/ppc64/boot/main.c | 331 +++ arch/ppc64/boot/mknote.c | 43 + arch/ppc64/boot/piggyback.c | 83 + arch/ppc64/boot/ppc32-types.h | 36 + arch/ppc64/boot/prom.c | 637 ++++ arch/ppc64/boot/start.c | 654 ++++ arch/ppc64/boot/string.S | 216 ++ arch/ppc64/boot/zImage.lds | 90 + arch/ppc64/boot/zlib.c | 2194 ++++++++++++++ arch/ppc64/boot/zlib.h | 432 +++ arch/ppc64/configs/g5_defconfig | 1335 +++++++++ arch/ppc64/configs/iSeries_defconfig | 911 ++++++ arch/ppc64/configs/maple_defconfig | 1000 +++++++ arch/ppc64/configs/pSeries_defconfig | 1273 ++++++++ arch/ppc64/defconfig | 1341 +++++++++ arch/ppc64/kernel/HvCall.c | 36 + arch/ppc64/kernel/HvLpConfig.c | 27 + arch/ppc64/kernel/HvLpEvent.c | 88 + arch/ppc64/kernel/ItLpQueue.c | 167 ++ arch/ppc64/kernel/LparData.c | 250 ++ arch/ppc64/kernel/Makefile | 68 + arch/ppc64/kernel/XmPciLpEvent.c | 190 ++ arch/ppc64/kernel/align.c | 396 +++ arch/ppc64/kernel/asm-offsets.c | 193 ++ arch/ppc64/kernel/binfmt_elf32.c | 78 + arch/ppc64/kernel/bitops.c | 147 + arch/ppc64/kernel/btext.c | 751 +++++ arch/ppc64/kernel/cpu_setup_power4.S | 214 ++ arch/ppc64/kernel/cputable.c | 197 ++ arch/ppc64/kernel/dma.c | 147 + arch/ppc64/kernel/eeh.c | 937 ++++++ arch/ppc64/kernel/entry.S | 845 ++++++ arch/ppc64/kernel/head.S | 2139 +++++++++++++ arch/ppc64/kernel/hvCall.S | 98 + arch/ppc64/kernel/hvconsole.c | 121 + arch/ppc64/kernel/hvcserver.c | 249 ++ arch/ppc64/kernel/i8259.c | 177 ++ arch/ppc64/kernel/i8259.h | 17 + arch/ppc64/kernel/iSeries_VpdInfo.c | 277 ++ arch/ppc64/kernel/iSeries_htab.c | 242 ++ arch/ppc64/kernel/iSeries_iommu.c | 175 ++ arch/ppc64/kernel/iSeries_irq.c | 209 ++ arch/ppc64/kernel/iSeries_pci.c | 912 ++++++ arch/ppc64/kernel/iSeries_pci_reset.c | 104 + arch/ppc64/kernel/iSeries_proc.c | 162 + arch/ppc64/kernel/iSeries_setup.c | 877 ++++++ arch/ppc64/kernel/iSeries_setup.h | 26 + arch/ppc64/kernel/iSeries_smp.c | 151 + arch/ppc64/kernel/idle.c | 380 +++ arch/ppc64/kernel/idle_power4.S | 79 + arch/ppc64/kernel/init_task.c | 36 + arch/ppc64/kernel/ioctl32.c | 51 + arch/ppc64/kernel/iomap.c | 126 + arch/ppc64/kernel/iommu.c | 567 ++++ arch/ppc64/kernel/irq.c | 519 ++++ arch/ppc64/kernel/kprobes.c | 290 ++ arch/ppc64/kernel/lmb.c | 372 +++ arch/ppc64/kernel/lparcfg.c | 611 ++++ arch/ppc64/kernel/maple_pci.c | 521 ++++ arch/ppc64/kernel/maple_setup.c | 240 ++ arch/ppc64/kernel/maple_time.c | 226 ++ arch/ppc64/kernel/mf.c | 1239 ++++++++ arch/ppc64/kernel/misc.S | 1234 ++++++++ arch/ppc64/kernel/module.c | 442 +++ arch/ppc64/kernel/mpic.c | 859 ++++++ arch/ppc64/kernel/mpic.h | 267 ++ arch/ppc64/kernel/nvram.c | 746 +++++ arch/ppc64/kernel/of_device.c | 272 ++ arch/ppc64/kernel/pSeries_hvCall.S | 123 + arch/ppc64/kernel/pSeries_iommu.c | 570 ++++ arch/ppc64/kernel/pSeries_lpar.c | 531 ++++ arch/ppc64/kernel/pSeries_nvram.c | 148 + arch/ppc64/kernel/pSeries_pci.c | 602 ++++ arch/ppc64/kernel/pSeries_reconfig.c | 434 +++ arch/ppc64/kernel/pSeries_setup.c | 612 ++++ arch/ppc64/kernel/pSeries_smp.c | 451 +++ arch/ppc64/kernel/pacaData.c | 224 ++ arch/ppc64/kernel/pci.c | 942 ++++++ arch/ppc64/kernel/pci.h | 51 + arch/ppc64/kernel/pci_direct_iommu.c | 95 + arch/ppc64/kernel/pci_dn.c | 198 ++ arch/ppc64/kernel/pci_iommu.c | 139 + arch/ppc64/kernel/pmac.h | 31 + arch/ppc64/kernel/pmac_feature.c | 676 +++++ arch/ppc64/kernel/pmac_low_i2c.c | 523 ++++ arch/ppc64/kernel/pmac_nvram.c | 495 ++++ arch/ppc64/kernel/pmac_pci.c | 793 +++++ arch/ppc64/kernel/pmac_setup.c | 511 ++++ arch/ppc64/kernel/pmac_smp.c | 316 ++ arch/ppc64/kernel/pmac_time.c | 201 ++ arch/ppc64/kernel/pmc.c | 67 + arch/ppc64/kernel/ppc_ksyms.c | 95 + arch/ppc64/kernel/proc_ppc64.c | 128 + arch/ppc64/kernel/process.c | 688 +++++ arch/ppc64/kernel/prom.c | 1820 ++++++++++++ arch/ppc64/kernel/prom_init.c | 1838 ++++++++++++ arch/ppc64/kernel/ptrace.c | 328 ++ arch/ppc64/kernel/ptrace32.c | 420 +++ arch/ppc64/kernel/ras.c | 356 +++ arch/ppc64/kernel/rtas-proc.c | 807 +++++ arch/ppc64/kernel/rtas.c | 657 ++++ arch/ppc64/kernel/rtas_flash.c | 725 +++++ arch/ppc64/kernel/rtasd.c | 527 ++++ arch/ppc64/kernel/rtc.c | 440 +++ arch/ppc64/kernel/scanlog.c | 245 ++ arch/ppc64/kernel/semaphore.c | 136 + arch/ppc64/kernel/setup.c | 1392 +++++++++ arch/ppc64/kernel/signal.c | 575 ++++ arch/ppc64/kernel/signal32.c | 989 ++++++ arch/ppc64/kernel/smp-tbsync.c | 179 ++ arch/ppc64/kernel/smp.c | 622 ++++ arch/ppc64/kernel/sys_ppc32.c | 1329 +++++++++ arch/ppc64/kernel/syscalls.c | 258 ++ arch/ppc64/kernel/sysfs.c | 431 +++ arch/ppc64/kernel/time.c | 827 ++++++ arch/ppc64/kernel/traps.c | 565 ++++ arch/ppc64/kernel/u3_iommu.c | 349 +++ arch/ppc64/kernel/udbg.c | 360 +++ arch/ppc64/kernel/vdso.c | 614 ++++ arch/ppc64/kernel/vdso32/Makefile | 36 + arch/ppc64/kernel/vdso32/cacheflush.S | 65 + arch/ppc64/kernel/vdso32/datapage.S | 68 + arch/ppc64/kernel/vdso32/gettimeofday.S | 139 + arch/ppc64/kernel/vdso32/sigtramp.S | 300 ++ arch/ppc64/kernel/vdso32/vdso32.lds.S | 111 + arch/ppc64/kernel/vdso32/vdso32_wrapper.S | 13 + arch/ppc64/kernel/vdso64/Makefile | 35 + arch/ppc64/kernel/vdso64/cacheflush.S | 64 + arch/ppc64/kernel/vdso64/datapage.S | 68 + arch/ppc64/kernel/vdso64/gettimeofday.S | 91 + arch/ppc64/kernel/vdso64/sigtramp.S | 294 ++ arch/ppc64/kernel/vdso64/vdso64.lds.S | 110 + arch/ppc64/kernel/vdso64/vdso64_wrapper.S | 13 + arch/ppc64/kernel/vecemu.c | 346 +++ arch/ppc64/kernel/vector.S | 172 ++ arch/ppc64/kernel/vio.c | 640 ++++ arch/ppc64/kernel/viopath.c | 675 +++++ arch/ppc64/kernel/vmlinux.lds.S | 145 + arch/ppc64/kernel/xics.c | 713 +++++ arch/ppc64/lib/Makefile | 18 + arch/ppc64/lib/checksum.S | 229 ++ arch/ppc64/lib/copypage.S | 121 + arch/ppc64/lib/copyuser.S | 576 ++++ arch/ppc64/lib/dec_and_lock.c | 55 + arch/ppc64/lib/e2a.c | 108 + arch/ppc64/lib/locks.c | 95 + arch/ppc64/lib/memcpy.S | 172 ++ arch/ppc64/lib/sstep.c | 141 + arch/ppc64/lib/strcase.c | 31 + arch/ppc64/lib/string.S | 285 ++ arch/ppc64/lib/usercopy.c | 41 + arch/ppc64/mm/Makefile | 11 + arch/ppc64/mm/fault.c | 312 ++ arch/ppc64/mm/hash_low.S | 287 ++ arch/ppc64/mm/hash_native.c | 423 +++ arch/ppc64/mm/hash_utils.c | 439 +++ arch/ppc64/mm/hugetlbpage.c | 904 ++++++ arch/ppc64/mm/imalloc.c | 312 ++ arch/ppc64/mm/init.c | 927 ++++++ arch/ppc64/mm/mmap.c | 86 + arch/ppc64/mm/numa.c | 734 +++++ arch/ppc64/mm/slb.c | 159 + arch/ppc64/mm/slb_low.S | 154 + arch/ppc64/mm/stab.c | 239 ++ arch/ppc64/mm/tlb.c | 180 ++ arch/ppc64/oprofile/Kconfig | 23 + arch/ppc64/oprofile/Makefile | 9 + arch/ppc64/oprofile/common.c | 186 ++ arch/ppc64/oprofile/op_impl.h | 108 + arch/ppc64/oprofile/op_model_power4.c | 313 ++ arch/ppc64/oprofile/op_model_rs64.c | 219 ++ arch/ppc64/xmon/Makefile | 5 + arch/ppc64/xmon/ansidecl.h | 141 + arch/ppc64/xmon/nonstdio.h | 22 + arch/ppc64/xmon/ppc-dis.c | 184 ++ arch/ppc64/xmon/ppc-opc.c | 4620 +++++++++++++++++++++++++++++ arch/ppc64/xmon/ppc.h | 307 ++ arch/ppc64/xmon/privinst.h | 65 + arch/ppc64/xmon/setjmp.S | 73 + arch/ppc64/xmon/start.c | 185 ++ arch/ppc64/xmon/subr_prf.c | 55 + arch/ppc64/xmon/xmon.c | 2506 ++++++++++++++++ 192 files changed, 78254 insertions(+) create mode 100644 arch/ppc64/Kconfig create mode 100644 arch/ppc64/Kconfig.debug create mode 100644 arch/ppc64/Makefile create mode 100644 arch/ppc64/boot/Makefile create mode 100644 arch/ppc64/boot/README create mode 100644 arch/ppc64/boot/addRamDisk.c create mode 100644 arch/ppc64/boot/addnote.c create mode 100644 arch/ppc64/boot/crt0.S create mode 100644 arch/ppc64/boot/div64.S create mode 100644 arch/ppc64/boot/install.sh create mode 100644 arch/ppc64/boot/main.c create mode 100644 arch/ppc64/boot/mknote.c create mode 100644 arch/ppc64/boot/piggyback.c create mode 100644 arch/ppc64/boot/ppc32-types.h create mode 100644 arch/ppc64/boot/prom.c create mode 100644 arch/ppc64/boot/start.c create mode 100644 arch/ppc64/boot/string.S create mode 100644 arch/ppc64/boot/zImage.lds create mode 100644 arch/ppc64/boot/zlib.c create mode 100644 arch/ppc64/boot/zlib.h create mode 100644 arch/ppc64/configs/g5_defconfig create mode 100644 arch/ppc64/configs/iSeries_defconfig create mode 100644 arch/ppc64/configs/maple_defconfig create mode 100644 arch/ppc64/configs/pSeries_defconfig create mode 100644 arch/ppc64/defconfig create mode 100644 arch/ppc64/kernel/HvCall.c create mode 100644 arch/ppc64/kernel/HvLpConfig.c create mode 100644 arch/ppc64/kernel/HvLpEvent.c create mode 100644 arch/ppc64/kernel/ItLpQueue.c create mode 100644 arch/ppc64/kernel/LparData.c create mode 100644 arch/ppc64/kernel/Makefile create mode 100644 arch/ppc64/kernel/XmPciLpEvent.c create mode 100644 arch/ppc64/kernel/align.c create mode 100644 arch/ppc64/kernel/asm-offsets.c create mode 100644 arch/ppc64/kernel/binfmt_elf32.c create mode 100644 arch/ppc64/kernel/bitops.c create mode 100644 arch/ppc64/kernel/btext.c create mode 100644 arch/ppc64/kernel/cpu_setup_power4.S create mode 100644 arch/ppc64/kernel/cputable.c create mode 100644 arch/ppc64/kernel/dma.c create mode 100644 arch/ppc64/kernel/eeh.c create mode 100644 arch/ppc64/kernel/entry.S create mode 100644 arch/ppc64/kernel/head.S create mode 100644 arch/ppc64/kernel/hvCall.S create mode 100644 arch/ppc64/kernel/hvconsole.c create mode 100644 arch/ppc64/kernel/hvcserver.c create mode 100644 arch/ppc64/kernel/i8259.c create mode 100644 arch/ppc64/kernel/i8259.h create mode 100644 arch/ppc64/kernel/iSeries_VpdInfo.c create mode 100644 arch/ppc64/kernel/iSeries_htab.c create mode 100644 arch/ppc64/kernel/iSeries_iommu.c create mode 100644 arch/ppc64/kernel/iSeries_irq.c create mode 100644 arch/ppc64/kernel/iSeries_pci.c create mode 100644 arch/ppc64/kernel/iSeries_pci_reset.c create mode 100644 arch/ppc64/kernel/iSeries_proc.c create mode 100644 arch/ppc64/kernel/iSeries_setup.c create mode 100644 arch/ppc64/kernel/iSeries_setup.h create mode 100644 arch/ppc64/kernel/iSeries_smp.c create mode 100644 arch/ppc64/kernel/idle.c create mode 100644 arch/ppc64/kernel/idle_power4.S create mode 100644 arch/ppc64/kernel/init_task.c create mode 100644 arch/ppc64/kernel/ioctl32.c create mode 100644 arch/ppc64/kernel/iomap.c create mode 100644 arch/ppc64/kernel/iommu.c create mode 100644 arch/ppc64/kernel/irq.c create mode 100644 arch/ppc64/kernel/kprobes.c create mode 100644 arch/ppc64/kernel/lmb.c create mode 100644 arch/ppc64/kernel/lparcfg.c create mode 100644 arch/ppc64/kernel/maple_pci.c create mode 100644 arch/ppc64/kernel/maple_setup.c create mode 100644 arch/ppc64/kernel/maple_time.c create mode 100644 arch/ppc64/kernel/mf.c create mode 100644 arch/ppc64/kernel/misc.S create mode 100644 arch/ppc64/kernel/module.c create mode 100644 arch/ppc64/kernel/mpic.c create mode 100644 arch/ppc64/kernel/mpic.h create mode 100644 arch/ppc64/kernel/nvram.c create mode 100644 arch/ppc64/kernel/of_device.c create mode 100644 arch/ppc64/kernel/pSeries_hvCall.S create mode 100644 arch/ppc64/kernel/pSeries_iommu.c create mode 100644 arch/ppc64/kernel/pSeries_lpar.c create mode 100644 arch/ppc64/kernel/pSeries_nvram.c create mode 100644 arch/ppc64/kernel/pSeries_pci.c create mode 100644 arch/ppc64/kernel/pSeries_reconfig.c create mode 100644 arch/ppc64/kernel/pSeries_setup.c create mode 100644 arch/ppc64/kernel/pSeries_smp.c create mode 100644 arch/ppc64/kernel/pacaData.c create mode 100644 arch/ppc64/kernel/pci.c create mode 100644 arch/ppc64/kernel/pci.h create mode 100644 arch/ppc64/kernel/pci_direct_iommu.c create mode 100644 arch/ppc64/kernel/pci_dn.c create mode 100644 arch/ppc64/kernel/pci_iommu.c create mode 100644 arch/ppc64/kernel/pmac.h create mode 100644 arch/ppc64/kernel/pmac_feature.c create mode 100644 arch/ppc64/kernel/pmac_low_i2c.c create mode 100644 arch/ppc64/kernel/pmac_nvram.c create mode 100644 arch/ppc64/kernel/pmac_pci.c create mode 100644 arch/ppc64/kernel/pmac_setup.c create mode 100644 arch/ppc64/kernel/pmac_smp.c create mode 100644 arch/ppc64/kernel/pmac_time.c create mode 100644 arch/ppc64/kernel/pmc.c create mode 100644 arch/ppc64/kernel/ppc_ksyms.c create mode 100644 arch/ppc64/kernel/proc_ppc64.c create mode 100644 arch/ppc64/kernel/process.c create mode 100644 arch/ppc64/kernel/prom.c create mode 100644 arch/ppc64/kernel/prom_init.c create mode 100644 arch/ppc64/kernel/ptrace.c create mode 100644 arch/ppc64/kernel/ptrace32.c create mode 100644 arch/ppc64/kernel/ras.c create mode 100644 arch/ppc64/kernel/rtas-proc.c create mode 100644 arch/ppc64/kernel/rtas.c create mode 100644 arch/ppc64/kernel/rtas_flash.c create mode 100644 arch/ppc64/kernel/rtasd.c create mode 100644 arch/ppc64/kernel/rtc.c create mode 100644 arch/ppc64/kernel/scanlog.c create mode 100644 arch/ppc64/kernel/semaphore.c create mode 100644 arch/ppc64/kernel/setup.c create mode 100644 arch/ppc64/kernel/signal.c create mode 100644 arch/ppc64/kernel/signal32.c create mode 100644 arch/ppc64/kernel/smp-tbsync.c create mode 100644 arch/ppc64/kernel/smp.c create mode 100644 arch/ppc64/kernel/sys_ppc32.c create mode 100644 arch/ppc64/kernel/syscalls.c create mode 100644 arch/ppc64/kernel/sysfs.c create mode 100644 arch/ppc64/kernel/time.c create mode 100644 arch/ppc64/kernel/traps.c create mode 100644 arch/ppc64/kernel/u3_iommu.c create mode 100644 arch/ppc64/kernel/udbg.c create mode 100644 arch/ppc64/kernel/vdso.c create mode 100644 arch/ppc64/kernel/vdso32/Makefile create mode 100644 arch/ppc64/kernel/vdso32/cacheflush.S create mode 100644 arch/ppc64/kernel/vdso32/datapage.S create mode 100644 arch/ppc64/kernel/vdso32/gettimeofday.S create mode 100644 arch/ppc64/kernel/vdso32/sigtramp.S create mode 100644 arch/ppc64/kernel/vdso32/vdso32.lds.S create mode 100644 arch/ppc64/kernel/vdso32/vdso32_wrapper.S create mode 100644 arch/ppc64/kernel/vdso64/Makefile create mode 100644 arch/ppc64/kernel/vdso64/cacheflush.S create mode 100644 arch/ppc64/kernel/vdso64/datapage.S create mode 100644 arch/ppc64/kernel/vdso64/gettimeofday.S create mode 100644 arch/ppc64/kernel/vdso64/sigtramp.S create mode 100644 arch/ppc64/kernel/vdso64/vdso64.lds.S create mode 100644 arch/ppc64/kernel/vdso64/vdso64_wrapper.S create mode 100644 arch/ppc64/kernel/vecemu.c create mode 100644 arch/ppc64/kernel/vector.S create mode 100644 arch/ppc64/kernel/vio.c create mode 100644 arch/ppc64/kernel/viopath.c create mode 100644 arch/ppc64/kernel/vmlinux.lds.S create mode 100644 arch/ppc64/kernel/xics.c create mode 100644 arch/ppc64/lib/Makefile create mode 100644 arch/ppc64/lib/checksum.S create mode 100644 arch/ppc64/lib/copypage.S create mode 100644 arch/ppc64/lib/copyuser.S create mode 100644 arch/ppc64/lib/dec_and_lock.c create mode 100644 arch/ppc64/lib/e2a.c create mode 100644 arch/ppc64/lib/locks.c create mode 100644 arch/ppc64/lib/memcpy.S create mode 100644 arch/ppc64/lib/sstep.c create mode 100644 arch/ppc64/lib/strcase.c create mode 100644 arch/ppc64/lib/string.S create mode 100644 arch/ppc64/lib/usercopy.c create mode 100644 arch/ppc64/mm/Makefile create mode 100644 arch/ppc64/mm/fault.c create mode 100644 arch/ppc64/mm/hash_low.S create mode 100644 arch/ppc64/mm/hash_native.c create mode 100644 arch/ppc64/mm/hash_utils.c create mode 100644 arch/ppc64/mm/hugetlbpage.c create mode 100644 arch/ppc64/mm/imalloc.c create mode 100644 arch/ppc64/mm/init.c create mode 100644 arch/ppc64/mm/mmap.c create mode 100644 arch/ppc64/mm/numa.c create mode 100644 arch/ppc64/mm/slb.c create mode 100644 arch/ppc64/mm/slb_low.S create mode 100644 arch/ppc64/mm/stab.c create mode 100644 arch/ppc64/mm/tlb.c create mode 100644 arch/ppc64/oprofile/Kconfig create mode 100644 arch/ppc64/oprofile/Makefile create mode 100644 arch/ppc64/oprofile/common.c create mode 100644 arch/ppc64/oprofile/op_impl.h create mode 100644 arch/ppc64/oprofile/op_model_power4.c create mode 100644 arch/ppc64/oprofile/op_model_rs64.c create mode 100644 arch/ppc64/xmon/Makefile create mode 100644 arch/ppc64/xmon/ansidecl.h create mode 100644 arch/ppc64/xmon/nonstdio.h create mode 100644 arch/ppc64/xmon/ppc-dis.c create mode 100644 arch/ppc64/xmon/ppc-opc.c create mode 100644 arch/ppc64/xmon/ppc.h create mode 100644 arch/ppc64/xmon/privinst.h create mode 100644 arch/ppc64/xmon/setjmp.S create mode 100644 arch/ppc64/xmon/start.c create mode 100644 arch/ppc64/xmon/subr_prf.c create mode 100644 arch/ppc64/xmon/xmon.c (limited to 'arch/ppc64') diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig new file mode 100644 index 00000000000..15c4a1455ca --- /dev/null +++ b/arch/ppc64/Kconfig @@ -0,0 +1,423 @@ +# +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/kconfig-language.txt. +# + +config 64BIT + def_bool y + +config MMU + bool + default y + +config UID16 + bool + +config RWSEM_GENERIC_SPINLOCK + bool + +config RWSEM_XCHGADD_ALGORITHM + bool + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config GENERIC_ISA_DMA + bool + default y + +config HAVE_DEC_LOCK + bool + default y + +config EARLY_PRINTK + bool + default y + +config COMPAT + bool + default y + +config FRAME_POINTER + bool + default y + +# We optimistically allocate largepages from the VM, so make the limit +# large enough (16MB). This badly named config option is actually +# max order + 1 +config FORCE_MAX_ZONEORDER + int + default "13" + +source "init/Kconfig" + +config SYSVIPC_COMPAT + bool + depends on COMPAT && SYSVIPC + default y + +menu "Platform support" + +choice + prompt "Platform Type" + default PPC_MULTIPLATFORM + +config PPC_ISERIES + bool "IBM Legacy iSeries" + +config PPC_MULTIPLATFORM + bool "Generic" + +endchoice + +config PPC_PSERIES + depends on PPC_MULTIPLATFORM + bool " IBM pSeries & new iSeries" + default y + +config PPC_PMAC + depends on PPC_MULTIPLATFORM + bool " Apple G5 based machines" + default y + select U3_DART + +config PPC_MAPLE + depends on PPC_MULTIPLATFORM + bool " Maple 970FX Evaluation Board" + select U3_DART + select MPIC_BROKEN_U3 + default n + help + This option enables support for the Maple 970FX Evaluation Board. + For more informations, refer to + +config PPC + bool + default y + +config PPC64 + bool + default y + +config PPC_OF + depends on PPC_MULTIPLATFORM + bool + default y + +# VMX is pSeries only for now until somebody writes the iSeries +# exception vectors for it +config ALTIVEC + bool "Support for VMX (Altivec) vector unit" + depends on PPC_MULTIPLATFORM + default y + +config PPC_SPLPAR + depends on PPC_PSERIES + bool "Support for shared-processor logical partitions" + default n + help + Enabling this option will make the kernel run more efficiently + on logically-partitioned pSeries systems which use shared + processors, that is, which share physical processors between + two or more partitions. + +config IBMVIO + depends on PPC_PSERIES || PPC_ISERIES + bool + default y + +config U3_DART + bool + depends on PPC_MULTIPLATFORM + default n + +config MPIC_BROKEN_U3 + bool + depends on PPC_MAPLE + default y + +config PPC_PMAC64 + bool + depends on PPC_PMAC + default y + +config BOOTX_TEXT + bool "Support for early boot text console" + depends PPC_OF + help + Say Y here to see progress messages from the boot firmware in text + mode. Requires an Open Firmware compatible video card. + +config POWER4_ONLY + bool "Optimize for POWER4" + default n + ---help--- + Cause the compiler to optimize for POWER4 processors. The resulting + binary will not work on POWER3 or RS64 processors when compiled with + binutils 2.15 or later. + +config IOMMU_VMERGE + bool "Enable IOMMU virtual merging (EXPERIMENTAL)" + depends on EXPERIMENTAL + default n + help + Cause IO segments sent to a device for DMA to be merged virtually + by the IOMMU when they happen to have been allocated contiguously. + This doesn't add pressure to the IOMMU allocator. However, some + drivers don't support getting large merged segments coming back + from *_map_sg(). Say Y if you know the drivers you are using are + properly handling this case. + +config SMP + bool "Symmetric multi-processing support" + ---help--- + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. + + If you say N here, the kernel will run on single and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on single-processor machines. + On a single-processor machine, the kernel will run faster if you say + N here. + + If you don't know what to do here, say Y. + +config NR_CPUS + int "Maximum number of CPUs (2-128)" + range 2 128 + depends on SMP + default "32" + +config HMT + bool "Hardware multithreading" + depends on SMP && PPC_PSERIES && BROKEN + help + This option enables hardware multithreading on RS64 cpus. + pSeries systems p620 and p660 have such a cpu type. + +config DISCONTIGMEM + bool "Discontiguous Memory Support" + depends on SMP && PPC_PSERIES + +config NUMA + bool "NUMA support" + depends on DISCONTIGMEM + +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on SMP + default off + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with POWER5 cpus at a cost of slightly increased + overhead in some places. If unsure say N here. + +config PREEMPT + bool "Preemptible Kernel" + help + This option reduces the latency of the kernel when reacting to + real-time or interactive events by allowing a low priority process to + be preempted even if it is in kernel mode executing a system call. + + Say Y here if you are building a kernel for a desktop, embedded + or real-time system. Say N if you are unsure. + +config PREEMPT_BKL + bool "Preempt The Big Kernel Lock" + depends on PREEMPT + default y + help + This option reduces the latency of the kernel by making the + big kernel lock preemptible. + + Say Y here if you are building a kernel for a desktop system. + Say N if you are unsure. + +config EEH + bool "PCI Extended Error Handling (EEH)" if EMBEDDED + depends on PPC_PSERIES + default y if !EMBEDDED + +# +# Use the generic interrupt handling code in kernel/irq/: +# +config GENERIC_HARDIRQS + bool + default y + +config MSCHUNKS + bool + depends on PPC_ISERIES + default y + + +config PPC_RTAS + bool + depends on PPC_PSERIES + default y + +config RTAS_PROC + bool "Proc interface to RTAS" + depends on PPC_RTAS + +config RTAS_FLASH + tristate "Firmware flash interface" + depends on RTAS_PROC + +config SCANLOG + tristate "Scanlog dump interface" + depends on RTAS_PROC && PPC_PSERIES + +config LPARCFG + tristate "LPAR Configuration Data" + depends on PPC_PSERIES || PPC_ISERIES + help + Provide system capacity information via human readable + = pairs through a /proc/ppc64/lparcfg interface. + +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + depends on PROC_FS + default y + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via /proc//seccomp, it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. Only embedded should say N here. + +endmenu + + +menu "General setup" + +config ISA + bool + help + Find out whether you have ISA slots on your motherboard. ISA is the + name of a bus system, i.e. the way the CPU talks to the other stuff + inside your box. If you have an Apple machine, say N here; if you + have an IBM RS/6000 or pSeries machine or a PReP machine, say Y. If + you have an embedded board, consult your board documentation. + +config SBUS + bool + +config MCA + bool + +config EISA + bool + +config PCI + bool + default y + help + Find out whether your system includes a PCI bus. PCI is the name of + a bus system, i.e. the way the CPU talks to the other stuff inside + your box. If you say Y here, the kernel will include drivers and + infrastructure code to support PCI bus devices. + +config PCI_DOMAINS + bool + default PCI + +source "fs/Kconfig.binfmt" + +source "drivers/pci/Kconfig" + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC) + select HOTPLUG + ---help--- + Say Y here to be able to turn CPUs off and on. + + Say N if you are unsure. + +source "drivers/pcmcia/Kconfig" + +source "drivers/pci/hotplug/Kconfig" + +config PROC_DEVICETREE + bool "Support for Open Firmware device tree in /proc" + depends on !PPC_ISERIES + help + This option adds a device-tree directory under /proc which contains + an image of the device tree that the kernel copies from Open + Firmware. If unsure, say Y here. + +config CMDLINE_BOOL + bool "Default bootloader kernel arguments" + depends on !PPC_ISERIES + +config CMDLINE + string "Initial kernel command string" + depends on CMDLINE_BOOL + default "console=ttyS0,9600 console=tty0 root=/dev/sda2" + help + On some platforms, there is currently no way for the boot loader to + pass arguments to the kernel. For these platforms, you can supply + some command-line options at build time by entering them here. In + most cases you will need to specify the root device here. + +endmenu + +source "drivers/Kconfig" + +source "fs/Kconfig" + +menu "iSeries device drivers" + depends on PPC_ISERIES + +config VIOCONS + tristate "iSeries Virtual Console Support" + +config VIODASD + tristate "iSeries Virtual I/O disk support" + help + If you are running on an iSeries system and you want to use + virtual disks created and managed by OS/400, say Y. + +config VIOCD + tristate "iSeries Virtual I/O CD support" + help + If you are running Linux on an IBM iSeries system and you want to + read a CD drive owned by OS/400, say Y here. + +config VIOTAPE + tristate "iSeries Virtual Tape Support" + help + If you are running Linux on an iSeries system and you want Linux + to read and/or write a tape drive owned by OS/400, say Y here. + +endmenu + +config VIOPATH + bool + depends on VIOCONS || VIODASD || VIOCD || VIOTAPE || VETH + default y + +source "arch/ppc64/oprofile/Kconfig" + +source "arch/ppc64/Kconfig.debug" + +source "security/Kconfig" + +config KEYS_COMPAT + bool + depends on COMPAT && KEYS + default y + +source "crypto/Kconfig" + +source "lib/Kconfig" diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug new file mode 100644 index 00000000000..e341a129da8 --- /dev/null +++ b/arch/ppc64/Kconfig.debug @@ -0,0 +1,57 @@ +menu "Kernel hacking" + +source "lib/Kconfig.debug" + +config DEBUG_STACKOVERFLOW + bool "Check for stack overflows" + depends on DEBUG_KERNEL + +config KPROBES + bool "Kprobes" + depends on DEBUG_KERNEL + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + +config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL + help + Enables the display of the minimum amount of free stack which each + task has ever had available in the sysrq-T and sysrq-P debug output. + + This option will slow down process creation somewhat. + +config DEBUGGER + bool "Enable debugger hooks" + depends on DEBUG_KERNEL + help + Include in-kernel hooks for kernel debuggers. Unless you are + intending to debug the kernel, say N here. + +config XMON + bool "Include xmon kernel debugger" + depends on DEBUGGER && !PPC_ISERIES + help + Include in-kernel hooks for the xmon kernel monitor/debugger. + Unless you are intending to debug the kernel, say N here. + +config XMON_DEFAULT + bool "Enable xmon by default" + depends on XMON + +config PPCDBG + bool "Include PPCDBG realtime debugging" + depends on DEBUG_KERNEL + +config IRQSTACKS + bool "Use separate kernel stacks when processing interrupts" + help + If you say Y here the kernel will use separate kernel stacks + for handling hard and soft interrupts. This can help avoid + overflowing the process kernel stacks. + +endmenu diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile new file mode 100644 index 00000000000..d33e20bcc52 --- /dev/null +++ b/arch/ppc64/Makefile @@ -0,0 +1,122 @@ +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. Remember to do have actions +# for "archclean" and "archdep" for cleaning up and making dependencies for +# this architecture +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# Changes for PPC by Gary Thomas +# Rewritten by Cort Dougan and Paul Mackerras +# Adjusted for PPC64 by Tom Gall +# + +KERNELLOAD := 0xc000000000000000 + +# Set default 32 bits cross compilers for vdso and boot wrapper +CROSS32_COMPILE ?= + +CROSS32CC := $(CROSS32_COMPILE)gcc +CROSS32AS := $(CROSS32_COMPILE)as +CROSS32LD := $(CROSS32_COMPILE)ld +CROSS32OBJCOPY := $(CROSS32_COMPILE)objcopy + +# If we have a biarch compiler, use it for 32 bits cross compile if +# CROSS32_COMPILE wasn't explicitely defined, and add proper explicit +# target type to target compilers + +HAS_BIARCH := $(call cc-option-yn, -m64) +ifeq ($(HAS_BIARCH),y) +ifeq ($(CROSS32_COMPILE),) +CROSS32CC := $(CC) -m32 +CROSS32AS := $(AS) -a32 +CROSS32LD := $(LD) -m elf32ppc +CROSS32OBJCOPY := $(OBJCOPY) +endif +AS := $(AS) -a64 +LD := $(LD) -m elf64ppc +CC := $(CC) -m64 +endif + +export CROSS32CC CROSS32AS CROSS32LD CROSS32OBJCOPY + +new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi) + +ifeq ($(new_nm),y) +NM := $(NM) --synthetic + +endif + +CHECKFLAGS += -m64 -D__powerpc__ + +LDFLAGS := -m elf64ppc +LDFLAGS_vmlinux := -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD) +CFLAGS += -msoft-float -pipe -mminimal-toc -mtraceback=none \ + -mcall-aixdesc + +ifeq ($(CONFIG_POWER4_ONLY),y) +ifeq ($(CONFIG_ALTIVEC),y) + CFLAGS += $(call cc-option,-mcpu=970) +else + CFLAGS += $(call cc-option,-mcpu=power4) +endif +else + CFLAGS += $(call cc-option,-mtune=power4) +endif + +# Enable unit-at-a-time mode when possible. It shrinks the +# kernel considerably. +CFLAGS += $(call cc-option,-funit-at-a-time) + +head-y := arch/ppc64/kernel/head.o + +libs-y += arch/ppc64/lib/ +core-y += arch/ppc64/kernel/ +core-y += arch/ppc64/mm/ +core-$(CONFIG_XMON) += arch/ppc64/xmon/ +drivers-$(CONFIG_OPROFILE) += arch/ppc64/oprofile/ + +boot := arch/ppc64/boot + +boottarget-$(CONFIG_PPC_PSERIES) := zImage zImage.initrd +boottarget-$(CONFIG_PPC_MAPLE) := zImage zImage.initrd +boottarget-$(CONFIG_PPC_ISERIES) := vmlinux.sminitrd vmlinux.initrd vmlinux.sm +$(boottarget-y): vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +bootimage-$(CONFIG_PPC_PSERIES) := $(boot)/zImage +bootimage-$(CONFIG_PPC_PMAC) := vmlinux +bootimage-$(CONFIG_PPC_MAPLE) := $(boot)/zImage +bootimage-$(CONFIG_PPC_ISERIES) := vmlinux +BOOTIMAGE := $(bootimage-y) +install: vmlinux + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ + +defaultimage-$(CONFIG_PPC_PSERIES) := zImage +defaultimage-$(CONFIG_PPC_PMAC) := vmlinux +defaultimage-$(CONFIG_PPC_MAPLE) := zImage +defaultimage-$(CONFIG_PPC_ISERIES) := vmlinux +KBUILD_IMAGE := $(defaultimage-y) +all: $(KBUILD_IMAGE) + +archclean: + $(Q)$(MAKE) $(clean)=$(boot) + +prepare: include/asm-ppc64/offsets.h + +arch/ppc64/kernel/asm-offsets.s: include/asm include/linux/version.h \ + include/config/MARKER + +include/asm-ppc64/offsets.h: arch/ppc64/kernel/asm-offsets.s + $(call filechk,gen-asm-offsets) + +define archhelp + echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/zImage)' + echo ' zImage.initrd- Compressed kernel image with initrd attached,' + echo ' sourced from arch/$(ARCH)/boot/ramdisk.image.gz' + echo ' (arch/$(ARCH)/boot/zImage.initrd)' +endef + +CLEAN_FILES += include/asm-ppc64/offsets.h diff --git a/arch/ppc64/boot/Makefile b/arch/ppc64/boot/Makefile new file mode 100644 index 00000000000..d3e1d6af920 --- /dev/null +++ b/arch/ppc64/boot/Makefile @@ -0,0 +1,117 @@ +# Makefile for making ELF bootable images for booting on CHRP +# using Open Firmware. +# +# Geert Uytterhoeven September 1997 +# +# Based on coffboot by Paul Mackerras +# Simplified for ppc64 by Todd Inglett +# +# NOTE: this code is built for 32 bit in ELF32 format even though +# it packages a 64 bit kernel. We do this to simplify the +# bootloader and increase compatibility with OpenFirmware. +# +# To this end we need to define BOOTCC, etc, as the tools +# needed to build the 32 bit image. These are normally HOSTCC, +# but may be a third compiler if, for example, you are cross +# compiling from an intel box. Once the 64bit ppc gcc is +# stable it will probably simply be a compiler switch to +# compile for 32bit mode. +# To make it easier to setup a cross compiler, +# CROSS32_COMPILE is setup as a prefix just like CROSS_COMPILE +# in the toplevel makefile. + + +HOSTCC := gcc +BOOTCFLAGS := $(HOSTCFLAGS) $(LINUXINCLUDE) -fno-builtin +BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional +BOOTLFLAGS := -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds +OBJCOPYFLAGS := contents,alloc,load,readonly,data + +src-boot := crt0.S string.S prom.c main.c zlib.c imagesize.c div64.S +src-boot := $(addprefix $(obj)/, $(src-boot)) +obj-boot := $(addsuffix .o, $(basename $(src-boot))) + +quiet_cmd_bootcc = BOOTCC $@ + cmd_bootcc = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< + +quiet_cmd_bootas = BOOTAS $@ + cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< + +$(patsubst %.c,%.o, $(filter %.c, $(src-boot))): %.o: %.c + $(call if_changed_dep,bootcc) +$(patsubst %.S,%.o, $(filter %.S, $(src-boot))): %.o: %.S + $(call if_changed_dep,bootas) + +#----------------------------------------------------------- +# ELF sections within the zImage bootloader/wrapper +#----------------------------------------------------------- +required := vmlinux.strip +initrd := initrd + +obj-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.o, $(section))) +src-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.c, $(section))) +gz-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.gz, $(section))) + +hostprogs-y := piggy addnote addRamDisk +targets += zImage zImage.initrd imagesize.c \ + $(patsubst $(obj)/%,%, $(call obj-sec, $(required) $(initrd))) \ + $(patsubst $(obj)/%,%, $(call src-sec, $(required) $(initrd))) \ + $(patsubst $(obj)/%,%, $(call gz-sec, $(required) $(initrd))) \ + vmlinux.initrd +extra-y := initrd.o + +quiet_cmd_ramdisk = RAMDISK $@ + cmd_ramdisk = $(obj)/addRamDisk $(obj)/ramdisk.image.gz $< $@ + +quiet_cmd_stripvm = STRIP $@ + cmd_stripvm = $(STRIP) -s $< -o $@ + +vmlinux.strip: vmlinux FORCE + $(call if_changed,stripvm) +$(obj)/vmlinux.initrd: vmlinux.strip $(obj)/addRamDisk $(obj)/ramdisk.image.gz FORCE + $(call if_changed,ramdisk) + +addsection = $(CROSS32OBJCOPY) $(1) \ + --add-section=.kernel:$(strip $(patsubst $(obj)/kernel-%.o,%, $(1)))=$(patsubst %.o,%.gz, $(1)) \ + --set-section-flags=.kernel:$(strip $(patsubst $(obj)/kernel-%.o,%, $(1)))=$(OBJCOPYFLAGS) + +quiet_cmd_addnote = ADDNOTE $@ + cmd_addnote = $(CROSS32LD) $(BOOTLFLAGS) -o $@ $(obj-boot) && $(obj)/addnote $@ + +quiet_cmd_piggy = PIGGY $@ + cmd_piggy = $(obj)/piggyback $(@:.o=) < $< | $(CROSS32AS) -o $@ + +$(call gz-sec, $(required)): $(obj)/kernel-%.gz: % FORCE + $(call if_changed,gzip) + +$(obj)/kernel-initrd.gz: $(obj)/ramdisk.image.gz + cp -f $(obj)/ramdisk.image.gz $@ + +$(call src-sec, $(required) $(initrd)): $(obj)/kernel-%.c: $(obj)/kernel-%.gz FORCE + touch $@ + +$(call obj-sec, $(required) $(initrd)): $(obj)/kernel-%.o: $(obj)/kernel-%.c FORCE + $(call if_changed_dep,bootcc) + $(call addsection, $@) + +$(obj)/zImage: obj-boot += $(call obj-sec, $(required)) +$(obj)/zImage: $(call obj-sec, $(required)) $(obj-boot) $(obj)/addnote FORCE + $(call if_changed,addnote) + +$(obj)/zImage.initrd: obj-boot += $(call obj-sec, $(required) $(initrd)) +$(obj)/zImage.initrd: $(call obj-sec, $(required) $(initrd)) $(obj-boot) $(obj)/addnote FORCE + $(call if_changed,addnote) + +$(obj)/imagesize.c: vmlinux.strip + @echo Generating $@ + ls -l vmlinux.strip | \ + awk '{printf "/* generated -- do not edit! */\n" \ + "unsigned long vmlinux_filesize = %d;\n", $$5}' > $(obj)/imagesize.c + $(CROSS_COMPILE)nm -n vmlinux | tail -n 1 | \ + awk '{printf "unsigned long vmlinux_memsize = 0x%s;\n", substr($$1,8)}' \ + >> $(obj)/imagesize.c + +install: $(CONFIGURE) $(BOOTIMAGE) + sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" "$(BOOTIMAGE)" + +clean-files := $(addprefix $(objtree)/, $(obj-boot) vmlinux.strip) diff --git a/arch/ppc64/boot/README b/arch/ppc64/boot/README new file mode 100644 index 00000000000..3e11058760e --- /dev/null +++ b/arch/ppc64/boot/README @@ -0,0 +1,11 @@ + +To extract the kernel vmlinux, System.map, .config or initrd from the zImage binary: + +objcopy -j .kernel:vmlinux -O binary zImage vmlinux.gz +objcopy -j .kernel:System.map -O binary zImage System.map.gz +objcopy -j .kernel:.config -O binary zImage config.gz +objcopy -j .kernel:initrd -O binary zImage.initrd initrd.gz + + + Peter + diff --git a/arch/ppc64/boot/addRamDisk.c b/arch/ppc64/boot/addRamDisk.c new file mode 100644 index 00000000000..7f2c0947339 --- /dev/null +++ b/arch/ppc64/boot/addRamDisk.c @@ -0,0 +1,300 @@ +#include +#include +#include +#include +#include +#include +#include + +#define ElfHeaderSize (64 * 1024) +#define ElfPages (ElfHeaderSize / 4096) +#define KERNELBASE (0xc000000000000000) + +void get4k(FILE *file, char *buf ) +{ + unsigned j; + unsigned num = fread(buf, 1, 4096, file); + for ( j=num; j<4096; ++j ) + buf[j] = 0; +} + +void put4k(FILE *file, char *buf ) +{ + fwrite(buf, 1, 4096, file); +} + +void death(const char *msg, FILE *fdesc, const char *fname) +{ + fprintf(stderr, msg); + fclose(fdesc); + unlink(fname); + exit(1); +} + +int main(int argc, char **argv) +{ + char inbuf[4096]; + FILE *ramDisk = NULL; + FILE *sysmap = NULL; + FILE *inputVmlinux = NULL; + FILE *outputVmlinux = NULL; + + unsigned i = 0; + unsigned long ramFileLen = 0; + unsigned long ramLen = 0; + unsigned long roundR = 0; + + unsigned long sysmapFileLen = 0; + unsigned long sysmapLen = 0; + unsigned long sysmapPages = 0; + char* ptr_end = NULL; + unsigned long offset_end = 0; + + unsigned long kernelLen = 0; + unsigned long actualKernelLen = 0; + unsigned long round = 0; + unsigned long roundedKernelLen = 0; + unsigned long ramStartOffs = 0; + unsigned long ramPages = 0; + unsigned long roundedKernelPages = 0; + unsigned long hvReleaseData = 0; + u_int32_t eyeCatcher = 0xc8a5d9c4; + unsigned long naca = 0; + unsigned long xRamDisk = 0; + unsigned long xRamDiskSize = 0; + long padPages = 0; + + + if (argc < 2) { + fprintf(stderr, "Name of RAM disk file missing.\n"); + exit(1); + } + + if (argc < 3) { + fprintf(stderr, "Name of System Map input file is missing.\n"); + exit(1); + } + + if (argc < 4) { + fprintf(stderr, "Name of vmlinux file missing.\n"); + exit(1); + } + + if (argc < 5) { + fprintf(stderr, "Name of vmlinux output file missing.\n"); + exit(1); + } + + + ramDisk = fopen(argv[1], "r"); + if ( ! ramDisk ) { + fprintf(stderr, "RAM disk file \"%s\" failed to open.\n", argv[1]); + exit(1); + } + + sysmap = fopen(argv[2], "r"); + if ( ! sysmap ) { + fprintf(stderr, "System Map file \"%s\" failed to open.\n", argv[2]); + exit(1); + } + + inputVmlinux = fopen(argv[3], "r"); + if ( ! inputVmlinux ) { + fprintf(stderr, "vmlinux file \"%s\" failed to open.\n", argv[3]); + exit(1); + } + + outputVmlinux = fopen(argv[4], "w+"); + if ( ! outputVmlinux ) { + fprintf(stderr, "output vmlinux file \"%s\" failed to open.\n", argv[4]); + exit(1); + } + + + + /* Input Vmlinux file */ + fseek(inputVmlinux, 0, SEEK_END); + kernelLen = ftell(inputVmlinux); + fseek(inputVmlinux, 0, SEEK_SET); + printf("kernel file size = %d\n", kernelLen); + if ( kernelLen == 0 ) { + fprintf(stderr, "You must have a linux kernel specified as argv[3]\n"); + exit(1); + } + + actualKernelLen = kernelLen - ElfHeaderSize; + + printf("actual kernel length (minus ELF header) = %d\n", actualKernelLen); + + round = actualKernelLen % 4096; + roundedKernelLen = actualKernelLen; + if ( round ) + roundedKernelLen += (4096 - round); + printf("Vmlinux length rounded up to a 4k multiple = %ld/0x%lx \n", roundedKernelLen, roundedKernelLen); + roundedKernelPages = roundedKernelLen / 4096; + printf("Vmlinux pages to copy = %ld/0x%lx \n", roundedKernelPages, roundedKernelPages); + + + + /* Input System Map file */ + /* (needs to be processed simply to determine if we need to add pad pages due to the static variables not being included in the vmlinux) */ + fseek(sysmap, 0, SEEK_END); + sysmapFileLen = ftell(sysmap); + fseek(sysmap, 0, SEEK_SET); + printf("%s file size = %ld/0x%lx \n", argv[2], sysmapFileLen, sysmapFileLen); + + sysmapLen = sysmapFileLen; + + roundR = 4096 - (sysmapLen % 4096); + if (roundR) { + printf("Rounding System Map file up to a multiple of 4096, adding %ld/0x%lx \n", roundR, roundR); + sysmapLen += roundR; + } + printf("Rounded System Map size is %ld/0x%lx \n", sysmapLen, sysmapLen); + + /* Process the Sysmap file to determine where _end is */ + sysmapPages = sysmapLen / 4096; + /* read the whole file line by line, expect that it doesn't fail */ + while ( fgets(inbuf, 4096, sysmap) ) ; + /* search for _end in the last page of the system map */ + ptr_end = strstr(inbuf, " _end"); + if (!ptr_end) { + fprintf(stderr, "Unable to find _end in the sysmap file \n"); + fprintf(stderr, "inbuf: \n"); + fprintf(stderr, "%s \n", inbuf); + exit(1); + } + printf("Found _end in the last page of the sysmap - backing up 10 characters it looks like %s", ptr_end-10); + /* convert address of _end in system map to hex offset. */ + offset_end = (unsigned int)strtol(ptr_end-10, NULL, 16); + /* calc how many pages we need to insert between the vmlinux and the start of the ram disk */ + padPages = offset_end/4096 - roundedKernelPages; + + /* Check and see if the vmlinux is already larger than _end in System.map */ + if (padPages < 0) { + /* vmlinux is larger than _end - adjust the offset to the start of the embedded ram disk */ + offset_end = roundedKernelLen; + printf("vmlinux is larger than _end indicates it needs to be - offset_end = %lx \n", offset_end); + padPages = 0; + printf("will insert %lx pages between the vmlinux and the start of the ram disk \n", padPages); + } + else { + /* _end is larger than vmlinux - use the offset to _end that we calculated from the system map */ + printf("vmlinux is smaller than _end indicates is needed - offset_end = %lx \n", offset_end); + printf("will insert %lx pages between the vmlinux and the start of the ram disk \n", padPages); + } + + + + /* Input Ram Disk file */ + // Set the offset that the ram disk will be started at. + ramStartOffs = offset_end; /* determined from the input vmlinux file and the system map */ + printf("Ram Disk will start at offset = 0x%lx \n", ramStartOffs); + + fseek(ramDisk, 0, SEEK_END); + ramFileLen = ftell(ramDisk); + fseek(ramDisk, 0, SEEK_SET); + printf("%s file size = %ld/0x%lx \n", argv[1], ramFileLen, ramFileLen); + + ramLen = ramFileLen; + + roundR = 4096 - (ramLen % 4096); + if ( roundR ) { + printf("Rounding RAM disk file up to a multiple of 4096, adding %ld/0x%lx \n", roundR, roundR); + ramLen += roundR; + } + + printf("Rounded RAM disk size is %ld/0x%lx \n", ramLen, ramLen); + ramPages = ramLen / 4096; + printf("RAM disk pages to copy = %ld/0x%lx\n", ramPages, ramPages); + + + + // Copy 64K ELF header + for (i=0; i<(ElfPages); ++i) { + get4k( inputVmlinux, inbuf ); + put4k( outputVmlinux, inbuf ); + } + + /* Copy the vmlinux (as full pages). */ + fseek(inputVmlinux, ElfHeaderSize, SEEK_SET); + for ( i=0; i +#include +#include +#include +#include + +char arch[] = "PowerPC"; + +#define N_DESCR 6 +unsigned int descr[N_DESCR] = { + 0xffffffff, /* real-mode = true */ + 0x00c00000, /* real-base, i.e. where we expect OF to be */ + 0xffffffff, /* real-size */ + 0xffffffff, /* virt-base */ + 0xffffffff, /* virt-size */ + 0x4000, /* load-base */ +}; + +unsigned char buf[512]; + +#define GET_16BE(off) ((buf[off] << 8) + (buf[(off)+1])) +#define GET_32BE(off) ((GET_16BE(off) << 16) + GET_16BE((off)+2)) + +#define PUT_16BE(off, v) (buf[off] = ((v) >> 8) & 0xff, \ + buf[(off) + 1] = (v) & 0xff) +#define PUT_32BE(off, v) (PUT_16BE((off), (v) >> 16), \ + PUT_16BE((off) + 2, (v))) + +/* Structure of an ELF file */ +#define E_IDENT 0 /* ELF header */ +#define E_PHOFF 28 +#define E_PHENTSIZE 42 +#define E_PHNUM 44 +#define E_HSIZE 52 /* size of ELF header */ + +#define EI_MAGIC 0 /* offsets in E_IDENT area */ +#define EI_CLASS 4 +#define EI_DATA 5 + +#define PH_TYPE 0 /* ELF program header */ +#define PH_OFFSET 4 +#define PH_FILESZ 16 +#define PH_HSIZE 32 /* size of program header */ + +#define PT_NOTE 4 /* Program header type = note */ + +#define ELFCLASS32 1 +#define ELFDATA2MSB 2 + +unsigned char elf_magic[4] = { 0x7f, 'E', 'L', 'F' }; + +int +main(int ac, char **av) +{ + int fd, n, i; + int ph, ps, np; + int nnote, ns; + + if (ac != 2) { + fprintf(stderr, "Usage: %s elf-file\n", av[0]); + exit(1); + } + fd = open(av[1], O_RDWR); + if (fd < 0) { + perror(av[1]); + exit(1); + } + + nnote = strlen(arch) + 1 + (N_DESCR + 3) * 4; + + n = read(fd, buf, sizeof(buf)); + if (n < 0) { + perror("read"); + exit(1); + } + + if (n < E_HSIZE || memcmp(&buf[E_IDENT+EI_MAGIC], elf_magic, 4) != 0) + goto notelf; + + if (buf[E_IDENT+EI_CLASS] != ELFCLASS32 + || buf[E_IDENT+EI_DATA] != ELFDATA2MSB) { + fprintf(stderr, "%s is not a big-endian 32-bit ELF image\n", + av[1]); + exit(1); + } + + ph = GET_32BE(E_PHOFF); + ps = GET_16BE(E_PHENTSIZE); + np = GET_16BE(E_PHNUM); + if (ph < E_HSIZE || ps < PH_HSIZE || np < 1) + goto notelf; + if (ph + (np + 1) * ps + nnote > n) + goto nospace; + + for (i = 0; i < np; ++i) { + if (GET_32BE(ph + PH_TYPE) == PT_NOTE) { + fprintf(stderr, "%s already has a note entry\n", + av[1]); + exit(0); + } + ph += ps; + } + + /* XXX check that the area we want to use is all zeroes */ + for (i = 0; i < ps + nnote; ++i) + if (buf[ph + i] != 0) + goto nospace; + + /* fill in the program header entry */ + ns = ph + ps; + PUT_32BE(ph + PH_TYPE, PT_NOTE); + PUT_32BE(ph + PH_OFFSET, ns); + PUT_32BE(ph + PH_FILESZ, nnote); + + /* fill in the note area we point to */ + /* XXX we should probably make this a proper section */ + PUT_32BE(ns, strlen(arch) + 1); + PUT_32BE(ns + 4, N_DESCR * 4); + PUT_32BE(ns + 8, 0x1275); + strcpy(&buf[ns + 12], arch); + ns += 12 + strlen(arch) + 1; + for (i = 0; i < N_DESCR; ++i) + PUT_32BE(ns + i * 4, descr[i]); + + /* Update the number of program headers */ + PUT_16BE(E_PHNUM, np + 1); + + /* write back */ + lseek(fd, (long) 0, SEEK_SET); + i = write(fd, buf, n); + if (i < 0) { + perror("write"); + exit(1); + } + if (i < n) { + fprintf(stderr, "%s: write truncated\n", av[1]); + exit(1); + } + + exit(0); + + notelf: + fprintf(stderr, "%s does not appear to be an ELF file\n", av[0]); + exit(1); + + nospace: + fprintf(stderr, "sorry, I can't find space in %s to put the note\n", + av[0]); + exit(1); +} diff --git a/arch/ppc64/boot/crt0.S b/arch/ppc64/boot/crt0.S new file mode 100644 index 00000000000..04d3e74cd72 --- /dev/null +++ b/arch/ppc64/boot/crt0.S @@ -0,0 +1,48 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * NOTE: this code runs in 32 bit mode and is packaged as ELF32. + */ + +#include + + .text + .globl _start +_start: + lis r9,_start@h + lis r8,_etext@ha + addi r8,r8,_etext@l +1: dcbf r0,r9 + icbi r0,r9 + addi r9,r9,0x20 + cmplwi 0,r9,8 + blt 1b + sync + isync + + ## Clear out the BSS as per ANSI C requirements + + lis r7,_end@ha + addi r7,r7,_end@l # r7 = &_end + lis r8,__bss_start@ha # + addi r8,r8,__bss_start@l # r8 = &_bss_start + + ## Determine how large an area, in number of words, to clear + + subf r7,r8,r7 # r7 = &_end - &_bss_start + 1 + addi r7,r7,3 # r7 += 3 + srwi. r7,r7,2 # r7 = size in words. + beq 3f # If the size is zero, don't bother + addi r8,r8,-4 # r8 -= 4 + mtctr r7 # SPRN_CTR = number of words to clear + li r0,0 # r0 = 0 +2: stwu r0,4(r8) # Clear out a word + bdnz 2b # Keep clearing until done +3: + b start + diff --git a/arch/ppc64/boot/div64.S b/arch/ppc64/boot/div64.S new file mode 100644 index 00000000000..38f7e466d7d --- /dev/null +++ b/arch/ppc64/boot/div64.S @@ -0,0 +1,58 @@ +/* + * Divide a 64-bit unsigned number by a 32-bit unsigned number. + * This routine assumes that the top 32 bits of the dividend are + * non-zero to start with. + * On entry, r3 points to the dividend, which get overwritten with + * the 64-bit quotient, and r4 contains the divisor. + * On exit, r3 contains the remainder. + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include + + .globl __div64_32 +__div64_32: + lwz r5,0(r3) # get the dividend into r5/r6 + lwz r6,4(r3) + cmplw r5,r4 + li r7,0 + li r8,0 + blt 1f + divwu r7,r5,r4 # if dividend.hi >= divisor, + mullw r0,r7,r4 # quotient.hi = dividend.hi / divisor + subf. r5,r0,r5 # dividend.hi %= divisor + beq 3f +1: mr r11,r5 # here dividend.hi != 0 + andis. r0,r5,0xc000 + bne 2f + cntlzw r0,r5 # we are shifting the dividend right + li r10,-1 # to make it < 2^32, and shifting + srw r10,r10,r0 # the divisor right the same amount, + add r9,r4,r10 # rounding up (so the estimate cannot + andc r11,r6,r10 # ever be too large, only too small) + andc r9,r9,r10 + or r11,r5,r11 + rotlw r9,r9,r0 + rotlw r11,r11,r0 + divwu r11,r11,r9 # then we divide the shifted quantities +2: mullw r10,r11,r4 # to get an estimate of the quotient, + mulhwu r9,r11,r4 # multiply the estimate by the divisor, + subfc r6,r10,r6 # take the product from the divisor, + add r8,r8,r11 # and add the estimate to the accumulated + subfe. r5,r9,r5 # quotient + bne 1b +3: cmplw r6,r4 + blt 4f + divwu r0,r6,r4 # perform the remaining 32-bit division + mullw r10,r0,r4 # and get the remainder + add r8,r8,r0 + subf r6,r10,r6 +4: stw r7,0(r3) # return the quotient in *r3 + stw r8,4(r3) + mr r3,r6 # return the remainder in r3 + blr diff --git a/arch/ppc64/boot/install.sh b/arch/ppc64/boot/install.sh new file mode 100644 index 00000000000..955c5681db6 --- /dev/null +++ b/arch/ppc64/boot/install.sh @@ -0,0 +1,42 @@ +#!/bin/sh +# +# arch/ppc64/boot/install.sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Blatantly stolen from in arch/i386/boot/install.sh by Dave Hansen +# +# "make install" script for ppc64 architecture +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) +# $5 - kernel boot file, the zImage +# + +# User may have a custom install script + +if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi +if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi + +# Default install + +# this should work for both the pSeries zImage and the iSeries vmlinux.sm +image_name=`basename $5` + +if [ -f $4/$image_name ]; then + mv $4/$image_name $4/$image_name.old +fi + +if [ -f $4/System.map ]; then + mv $4/System.map $4/System.old +fi + +cat $2 > $4/$image_name +cp $3 $4/System.map diff --git a/arch/ppc64/boot/main.c b/arch/ppc64/boot/main.c new file mode 100644 index 00000000000..b0fa86ad8b1 --- /dev/null +++ b/arch/ppc64/boot/main.c @@ -0,0 +1,331 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * Updates for PPC64 by Todd Inglett, Dave Engebretsen & Peter Bergner. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include "ppc32-types.h" +#include "zlib.h" +#include +#include +#include +#include +#include + +extern void *finddevice(const char *); +extern int getprop(void *, const char *, void *, int); +extern void printk(char *fmt, ...); +extern void printf(const char *fmt, ...); +extern int sprintf(char *buf, const char *fmt, ...); +void gunzip(void *, int, unsigned char *, int *); +void *claim(unsigned int, unsigned int, unsigned int); +void flush_cache(void *, unsigned long); +void pause(void); +extern void exit(void); + +unsigned long strlen(const char *s); +void *memmove(void *dest, const void *src, unsigned long n); +void *memcpy(void *dest, const void *src, unsigned long n); + +/* Value picked to match that used by yaboot */ +#define PROG_START 0x01400000 +#define RAM_END (256<<20) // Fixme: use OF */ + +char *avail_ram; +char *begin_avail, *end_avail; +char *avail_high; +unsigned int heap_use; +unsigned int heap_max; + +extern char _start[]; +extern char _vmlinux_start[]; +extern char _vmlinux_end[]; +extern char _initrd_start[]; +extern char _initrd_end[]; +extern unsigned long vmlinux_filesize; +extern unsigned long vmlinux_memsize; + +struct addr_range { + unsigned long addr; + unsigned long size; + unsigned long memsize; +}; +struct addr_range vmlinux = {0, 0, 0}; +struct addr_range vmlinuz = {0, 0, 0}; +struct addr_range initrd = {0, 0, 0}; + +static char scratch[128<<10]; /* 128kB of scratch space for gunzip */ + +typedef void (*kernel_entry_t)( unsigned long, + unsigned long, + void *, + void *); + + +int (*prom)(void *); + +void *chosen_handle; +void *stdin; +void *stdout; +void *stderr; + +#undef DEBUG + +static unsigned long claim_base = PROG_START; + +static unsigned long try_claim(unsigned long size) +{ + unsigned long addr = 0; + + for(; claim_base < RAM_END; claim_base += 0x100000) { +#ifdef DEBUG + printf(" trying: 0x%08lx\n\r", claim_base); +#endif + addr = (unsigned long)claim(claim_base, size, 0); + if ((void *)addr != (void *)-1) + break; + } + if (addr == 0) + return 0; + claim_base = PAGE_ALIGN(claim_base + size); + return addr; +} + +void start(unsigned long a1, unsigned long a2, void *promptr) +{ + unsigned long i; + kernel_entry_t kernel_entry; + Elf64_Ehdr *elf64; + Elf64_Phdr *elf64ph; + + prom = (int (*)(void *)) promptr; + chosen_handle = finddevice("/chosen"); + if (chosen_handle == (void *) -1) + exit(); + if (getprop(chosen_handle, "stdout", &stdout, sizeof(stdout)) != 4) + exit(); + stderr = stdout; + if (getprop(chosen_handle, "stdin", &stdin, sizeof(stdin)) != 4) + exit(); + + printf("\n\rzImage starting: loaded at 0x%x\n\r", (unsigned)_start); + + /* + * Now we try to claim some memory for the kernel itself + * our "vmlinux_memsize" is the memory footprint in RAM, _HOWEVER_, what + * our Makefile stuffs in is an image containing all sort of junk including + * an ELF header. We need to do some calculations here to find the right + * size... In practice we add 1Mb, that is enough, but we should really + * consider fixing the Makefile to put a _raw_ kernel in there ! + */ + vmlinux_memsize += 0x100000; + printf("Allocating 0x%lx bytes for kernel ...\n\r", vmlinux_memsize); + vmlinux.addr = try_claim(vmlinux_memsize); + if (vmlinux.addr == 0) { + printf("Can't allocate memory for kernel image !\n\r"); + exit(); + } + vmlinuz.addr = (unsigned long)_vmlinux_start; + vmlinuz.size = (unsigned long)(_vmlinux_end - _vmlinux_start); + vmlinux.size = PAGE_ALIGN(vmlinux_filesize); + vmlinux.memsize = vmlinux_memsize; + + /* + * Now we try to claim memory for the initrd (and copy it there) + */ + initrd.size = (unsigned long)(_initrd_end - _initrd_start); + initrd.memsize = initrd.size; + if ( initrd.size > 0 ) { + printf("Allocating 0x%lx bytes for initrd ...\n\r", initrd.size); + initrd.addr = try_claim(initrd.size); + if (initrd.addr == 0) { + printf("Can't allocate memory for initial ramdisk !\n\r"); + exit(); + } + a1 = initrd.addr; + a2 = initrd.size; + printf("initial ramdisk moving 0x%lx <- 0x%lx (%lx bytes)\n\r", + initrd.addr, (unsigned long)_initrd_start, initrd.size); + memmove((void *)initrd.addr, (void *)_initrd_start, initrd.size); + printf("initrd head: 0x%lx\n\r", *((u32 *)initrd.addr)); + } + + /* Eventually gunzip the kernel */ + if (*(unsigned short *)vmlinuz.addr == 0x1f8b) { + int len; + avail_ram = scratch; + begin_avail = avail_high = avail_ram; + end_avail = scratch + sizeof(scratch); + printf("gunzipping (0x%lx <- 0x%lx:0x%0lx)...", + vmlinux.addr, vmlinuz.addr, vmlinuz.addr+vmlinuz.size); + len = vmlinuz.size; + gunzip((void *)vmlinux.addr, vmlinux.size, + (unsigned char *)vmlinuz.addr, &len); + printf("done 0x%lx bytes\n\r", len); + printf("0x%x bytes of heap consumed, max in use 0x%x\n\r", + (unsigned)(avail_high - begin_avail), heap_max); + } else { + memmove((void *)vmlinux.addr,(void *)vmlinuz.addr,vmlinuz.size); + } + + /* Skip over the ELF header */ + elf64 = (Elf64_Ehdr *)vmlinux.addr; + if ( elf64->e_ident[EI_MAG0] != ELFMAG0 || + elf64->e_ident[EI_MAG1] != ELFMAG1 || + elf64->e_ident[EI_MAG2] != ELFMAG2 || + elf64->e_ident[EI_MAG3] != ELFMAG3 || + elf64->e_ident[EI_CLASS] != ELFCLASS64 || + elf64->e_ident[EI_DATA] != ELFDATA2MSB || + elf64->e_type != ET_EXEC || + elf64->e_machine != EM_PPC64 ) + { + printf("Error: not a valid PPC64 ELF file!\n\r"); + exit(); + } + + elf64ph = (Elf64_Phdr *)((unsigned long)elf64 + + (unsigned long)elf64->e_phoff); + for(i=0; i < (unsigned int)elf64->e_phnum ;i++,elf64ph++) { + if (elf64ph->p_type == PT_LOAD && elf64ph->p_offset != 0) + break; + } +#ifdef DEBUG + printf("... skipping 0x%lx bytes of ELF header\n\r", + (unsigned long)elf64ph->p_offset); +#endif + vmlinux.addr += (unsigned long)elf64ph->p_offset; + vmlinux.size -= (unsigned long)elf64ph->p_offset; + + flush_cache((void *)vmlinux.addr, vmlinux.size); + + if (a1) + printf("initrd head: 0x%lx\n\r", *((u32 *)initrd.addr)); + + kernel_entry = (kernel_entry_t)vmlinux.addr; +#ifdef DEBUG + printf( "kernel:\n\r" + " entry addr = 0x%lx\n\r" + " a1 = 0x%lx,\n\r" + " a2 = 0x%lx,\n\r" + " prom = 0x%lx,\n\r" + " bi_recs = 0x%lx,\n\r", + (unsigned long)kernel_entry, a1, a2, + (unsigned long)prom, NULL); +#endif + + kernel_entry( a1, a2, prom, NULL ); + + printf("Error: Linux kernel returned to zImage bootloader!\n\r"); + + exit(); +} + +struct memchunk { + unsigned int size; + unsigned int pad; + struct memchunk *next; +}; + +static struct memchunk *freechunks; + +void *zalloc(void *x, unsigned items, unsigned size) +{ + void *p; + struct memchunk **mpp, *mp; + + size *= items; + size = _ALIGN(size, sizeof(struct memchunk)); + heap_use += size; + if (heap_use > heap_max) + heap_max = heap_use; + for (mpp = &freechunks; (mp = *mpp) != 0; mpp = &mp->next) { + if (mp->size == size) { + *mpp = mp->next; + return mp; + } + } + p = avail_ram; + avail_ram += size; + if (avail_ram > avail_high) + avail_high = avail_ram; + if (avail_ram > end_avail) { + printf("oops... out of memory\n\r"); + pause(); + } + return p; +} + +void zfree(void *x, void *addr, unsigned nb) +{ + struct memchunk *mp = addr; + + nb = _ALIGN(nb, sizeof(struct memchunk)); + heap_use -= nb; + if (avail_ram == addr + nb) { + avail_ram = addr; + return; + } + mp->size = nb; + mp->next = freechunks; + freechunks = mp; +} + +#define HEAD_CRC 2 +#define EXTRA_FIELD 4 +#define ORIG_NAME 8 +#define COMMENT 0x10 +#define RESERVED 0xe0 + +#define DEFLATED 8 + +void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) +{ + z_stream s; + int r, i, flags; + + /* skip header */ + i = 10; + flags = src[3]; + if (src[2] != DEFLATED || (flags & RESERVED) != 0) { + printf("bad gzipped data\n\r"); + exit(); + } + if ((flags & EXTRA_FIELD) != 0) + i = 12 + src[10] + (src[11] << 8); + if ((flags & ORIG_NAME) != 0) + while (src[i++] != 0) + ; + if ((flags & COMMENT) != 0) + while (src[i++] != 0) + ; + if ((flags & HEAD_CRC) != 0) + i += 2; + if (i >= *lenp) { + printf("gunzip: ran out of data in header\n\r"); + exit(); + } + + s.zalloc = zalloc; + s.zfree = zfree; + r = inflateInit2(&s, -MAX_WBITS); + if (r != Z_OK) { + printf("inflateInit2 returned %d\n\r", r); + exit(); + } + s.next_in = src + i; + s.avail_in = *lenp - i; + s.next_out = dst; + s.avail_out = dstlen; + r = inflate(&s, Z_FINISH); + if (r != Z_OK && r != Z_STREAM_END) { + printf("inflate returned %d msg: %s\n\r", r, s.msg); + exit(); + } + *lenp = s.next_out - (unsigned char *) dst; + inflateEnd(&s); +} + diff --git a/arch/ppc64/boot/mknote.c b/arch/ppc64/boot/mknote.c new file mode 100644 index 00000000000..120cc1d8973 --- /dev/null +++ b/arch/ppc64/boot/mknote.c @@ -0,0 +1,43 @@ +/* + * Copyright (C) Cort Dougan 1999. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Generate a note section as per the CHRP specification. + * + */ + +#include + +#define PL(x) printf("%c%c%c%c", ((x)>>24)&0xff, ((x)>>16)&0xff, ((x)>>8)&0xff, (x)&0xff ); + +int main(void) +{ +/* header */ + /* namesz */ + PL(strlen("PowerPC")+1); + /* descrsz */ + PL(6*4); + /* type */ + PL(0x1275); + /* name */ + printf("PowerPC"); printf("%c", 0); + +/* descriptor */ + /* real-mode */ + PL(0xffffffff); + /* real-base */ + PL(0x00c00000); + /* real-size */ + PL(0xffffffff); + /* virt-base */ + PL(0xffffffff); + /* virt-size */ + PL(0xffffffff); + /* load-base */ + PL(0x4000); + return 0; +} diff --git a/arch/ppc64/boot/piggyback.c b/arch/ppc64/boot/piggyback.c new file mode 100644 index 00000000000..235c7a87269 --- /dev/null +++ b/arch/ppc64/boot/piggyback.c @@ -0,0 +1,83 @@ +/* + * Copyright 2001 IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +extern long ce_exec_config[]; + +int main(int argc, char *argv[]) +{ + int i, cnt, pos, len; + unsigned int cksum, val; + unsigned char *lp; + unsigned char buf[8192]; + char *varname; + if (argc != 2) + { + fprintf(stderr, "usage: %s name out-file\n", + argv[0]); + exit(1); + } + + varname = strrchr(argv[1], '/'); + if (varname) + varname++; + else + varname = argv[1]; + + fprintf(stdout, "#\n"); + fprintf(stdout, "# Miscellaneous data structures:\n"); + fprintf(stdout, "# WARNING - this file is automatically generated!\n"); + fprintf(stdout, "#\n"); + fprintf(stdout, "\n"); + fprintf(stdout, "\t.data\n"); + fprintf(stdout, "\t.globl %s_data\n", varname); + fprintf(stdout, "%s_data:\n", varname); + pos = 0; + cksum = 0; + while ((len = read(0, buf, sizeof(buf))) > 0) + { + cnt = 0; + lp = (unsigned char *)buf; + len = (len + 3) & ~3; /* Round up to longwords */ + for (i = 0; i < len; i += 4) + { + if (cnt == 0) + { + fprintf(stdout, "\t.long\t"); + } + fprintf(stdout, "0x%02X%02X%02X%02X", lp[0], lp[1], lp[2], lp[3]); + val = *(unsigned long *)lp; + cksum ^= val; + lp += 4; + if (++cnt == 4) + { + cnt = 0; + fprintf(stdout, " # %x \n", pos+i-12); + fflush(stdout); + } else + { + fprintf(stdout, ","); + } + } + if (cnt) + { + fprintf(stdout, "0\n"); + } + pos += len; + } + fprintf(stdout, "\t.globl %s_len\n", varname); + fprintf(stdout, "%s_len:\t.long\t0x%x\n", varname, pos); + fflush(stdout); + fclose(stdout); + fprintf(stderr, "cksum = %x\n", cksum); + exit(0); +} + diff --git a/arch/ppc64/boot/ppc32-types.h b/arch/ppc64/boot/ppc32-types.h new file mode 100644 index 00000000000..f7b8884f8f7 --- /dev/null +++ b/arch/ppc64/boot/ppc32-types.h @@ -0,0 +1,36 @@ +#ifndef _PPC64_TYPES_H +#define _PPC64_TYPES_H + +typedef __signed__ char __s8; +typedef unsigned char __u8; + +typedef __signed__ short __s16; +typedef unsigned short __u16; + +typedef __signed__ int __s32; +typedef unsigned int __u32; + +typedef __signed__ long long __s64; +typedef unsigned long long __u64; + +typedef signed char s8; +typedef unsigned char u8; + +typedef signed short s16; +typedef unsigned short u16; + +typedef signed int s32; +typedef unsigned int u32; + +typedef signed long long s64; +typedef unsigned long long u64; + +typedef struct { + __u32 u[4]; +} __attribute((aligned(16))) __vector128; + +#define BITS_PER_LONG 32 + +typedef __vector128 vector128; + +#endif /* _PPC64_TYPES_H */ diff --git a/arch/ppc64/boot/prom.c b/arch/ppc64/boot/prom.c new file mode 100644 index 00000000000..7b607d1862c --- /dev/null +++ b/arch/ppc64/boot/prom.c @@ -0,0 +1,637 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include + +int (*prom)(void *); + +void *chosen_handle; +void *stdin; +void *stdout; +void *stderr; + +void exit(void); +void *finddevice(const char *name); +int getprop(void *phandle, const char *name, void *buf, int buflen); +void chrpboot(int a1, int a2, void *prom); /* in main.c */ + +void printk(char *fmt, ...); + +/* there is no convenient header to get this from... -- paulus */ +extern unsigned long strlen(const char *); + +int +write(void *handle, void *ptr, int nb) +{ + struct prom_args { + char *service; + int nargs; + int nret; + void *ihandle; + void *addr; + int len; + int actual; + } args; + + args.service = "write"; + args.nargs = 3; + args.nret = 1; + args.ihandle = handle; + args.addr = ptr; + args.len = nb; + args.actual = -1; + (*prom)(&args); + return args.actual; +} + +int +read(void *handle, void *ptr, int nb) +{ + struct prom_args { + char *service; + int nargs; + int nret; + void *ihandle; + void *addr; + int len; + int actual; + } args; + + args.service = "read"; + args.nargs = 3; + args.nret = 1; + args.ihandle = handle; + args.addr = ptr; + args.len = nb; + args.actual = -1; + (*prom)(&args); + return args.actual; +} + +void +exit() +{ + struct prom_args { + char *service; + } args; + + for (;;) { + args.service = "exit"; + (*prom)(&args); + } +} + +void +pause(void) +{ + struct prom_args { + char *service; + } args; + + args.service = "enter"; + (*prom)(&args); +} + +void * +finddevice(const char *name) +{ + struct prom_args { + char *service; + int nargs; + int nret; + const char *devspec; + void *phandle; + } args; + + args.service = "finddevice"; + args.nargs = 1; + args.nret = 1; + args.devspec = name; + args.phandle = (void *) -1; + (*prom)(&args); + return args.phandle; +} + +void * +claim(unsigned long virt, unsigned long size, unsigned long align) +{ + struct prom_args { + char *service; + int nargs; + int nret; + unsigned int virt; + unsigned int size; + unsigned int align; + void *ret; + } args; + + args.service = "claim"; + args.nargs = 3; + args.nret = 1; + args.virt = virt; + args.size = size; + args.align = align; + (*prom)(&args); + return args.ret; +} + +int +getprop(void *phandle, const char *name, void *buf, int buflen) +{ + struct prom_args { + char *service; + int nargs; + int nret; + void *phandle; + const char *name; + void *buf; + int buflen; + int size; + } args; + + args.service = "getprop"; + args.nargs = 4; + args.nret = 1; + args.phandle = phandle; + args.name = name; + args.buf = buf; + args.buflen = buflen; + args.size = -1; + (*prom)(&args); + return args.size; +} + +int +putc(int c, void *f) +{ + char ch = c; + + if (c == '\n') + putc('\r', f); + return write(f, &ch, 1) == 1? c: -1; +} + +int +putchar(int c) +{ + return putc(c, stdout); +} + +int +fputs(char *str, void *f) +{ + int n = strlen(str); + + return write(f, str, n) == n? 0: -1; +} + +int +readchar(void) +{ + char ch; + + for (;;) { + switch (read(stdin, &ch, 1)) { + case 1: + return ch; + case -1: + printk("read(stdin) returned -1\r\n"); + return -1; + } + } +} + +static char line[256]; +static char *lineptr; +static int lineleft; + +int +getchar(void) +{ + int c; + + if (lineleft == 0) { + lineptr = line; + for (;;) { + c = readchar(); + if (c == -1 || c == 4) + break; + if (c == '\r' || c == '\n') { + *lineptr++ = '\n'; + putchar('\n'); + break; + } + switch (c) { + case 0177: + case '\b': + if (lineptr > line) { + putchar('\b'); + putchar(' '); + putchar('\b'); + --lineptr; + } + break; + case 'U' & 0x1F: + while (lineptr > line) { + putchar('\b'); + putchar(' '); + putchar('\b'); + --lineptr; + } + break; + default: + if (lineptr >= &line[sizeof(line) - 1]) + putchar('\a'); + else { + putchar(c); + *lineptr++ = c; + } + } + } + lineleft = lineptr - line; + lineptr = line; + } + if (lineleft == 0) + return -1; + --lineleft; + return *lineptr++; +} + + + +/* String functions lifted from lib/vsprintf.c and lib/ctype.c */ +unsigned char _ctype[] = { +_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ +_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ +_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ +_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ +_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ +_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ +_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ +_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ +_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ +_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ +_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ +_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ +_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ +_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ + +size_t strnlen(const char * s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) +{ + unsigned long result = 0,value; + + if (!base) { + base = 10; + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { + result = result*base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + return result; +} + +long simple_strtol(const char *cp,char **endp,unsigned int base) +{ + if(*cp=='-') + return -simple_strtoul(cp+1,endp,base); + return simple_strtoul(cp,endp,base); +} + +static int skip_atoi(const char **s) +{ + int i=0; + + while (isdigit(**s)) + i = i*10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SPECIAL 32 /* 0x */ +#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * str, long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits="0123456789abcdefghijklmnopqrstuvwxyz"; + int i; + + if (type & LARGE) + digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else while (num != 0) { + tmp[i++] = digits[num % base]; + num /= base; + } + if (i > precision) + precision = i; + size -= precision; + if (!(type&(ZEROPAD+LEFT))) + while(size-->0) + *str++ = ' '; + if (sign) + *str++ = sign; + if (type & SPECIAL) { + if (base==8) + *str++ = '0'; + else if (base==16) { + *str++ = '0'; + *str++ = digits[33]; + } + } + if (!(type & LEFT)) + while (size-- > 0) + *str++ = c; + while (i < precision--) + *str++ = '0'; + while (i-- > 0) + *str++ = tmp[i]; + while (size-- > 0) + *str++ = ' '; + return str; +} + +/* Forward decl. needed for IP address printing stuff... */ +int sprintf(char * buf, const char *fmt, ...); + +int vsprintf(char *buf, const char *fmt, va_list args) +{ + int len; + unsigned long num; + int i, base; + char * str; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + + for (str=buf ; *fmt ; ++fmt) { + if (*fmt != '%') { + *str++ = *fmt; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= LEFT; goto repeat; + case '+': flags |= PLUS; goto repeat; + case ' ': flags |= SPACE; goto repeat; + case '#': flags |= SPECIAL; goto repeat; + case '0': flags |= ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + *str++ = ' '; + *str++ = (unsigned char) va_arg(args, int); + while (--field_width > 0) + *str++ = ' '; + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = ""; + + len = strnlen(s, precision); + + if (!(flags & LEFT)) + while (len < field_width--) + *str++ = ' '; + for (i = 0; i < len; ++i) + *str++ = *s++; + while (len < field_width--) + *str++ = ' '; + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, + (unsigned long) va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + + case 'n': + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + *str++ = '%'; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + *str++ = '%'; + if (*fmt) + *str++ = *fmt; + else + --fmt; + continue; + } + if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & SIGN) + num = (signed long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & SIGN) + num = (signed int) num; + } + str = number(str, num, base, field_width, precision, flags); + } + *str = '\0'; + return str-buf; +} + +int sprintf(char * buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsprintf(buf,fmt,args); + va_end(args); + return i; +} + +static char sprint_buf[1024]; + +void +printk(char *fmt, ...) +{ + va_list args; + int n; + + va_start(args, fmt); + n = vsprintf(sprint_buf, fmt, args); + va_end(args); + write(stdout, sprint_buf, n); +} + +int +printf(char *fmt, ...) +{ + va_list args; + int n; + + va_start(args, fmt); + n = vsprintf(sprint_buf, fmt, args); + va_end(args); + write(stdout, sprint_buf, n); + return n; +} diff --git a/arch/ppc64/boot/start.c b/arch/ppc64/boot/start.c new file mode 100644 index 00000000000..ea247e79b55 --- /dev/null +++ b/arch/ppc64/boot/start.c @@ -0,0 +1,654 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include + +#include + +int (*prom)(void *); + +void *chosen_handle; +void *stdin; +void *stdout; +void *stderr; + +void exit(void); +void *finddevice(const char *name); +int getprop(void *phandle, const char *name, void *buf, int buflen); +void chrpboot(int a1, int a2, void *prom); /* in main.c */ + +void printk(char *fmt, ...); + +void +start(int a1, int a2, void *promptr) +{ + prom = (int (*)(void *)) promptr; + chosen_handle = finddevice("/chosen"); + if (chosen_handle == (void *) -1) + exit(); + if (getprop(chosen_handle, "stdout", &stdout, sizeof(stdout)) != 4) + exit(); + stderr = stdout; + if (getprop(chosen_handle, "stdin", &stdin, sizeof(stdin)) != 4) + exit(); + + chrpboot(a1, a2, promptr); + for (;;) + exit(); +} + +int +write(void *handle, void *ptr, int nb) +{ + struct prom_args { + char *service; + int nargs; + int nret; + void *ihandle; + void *addr; + int len; + int actual; + } args; + + args.service = "write"; + args.nargs = 3; + args.nret = 1; + args.ihandle = handle; + args.addr = ptr; + args.len = nb; + args.actual = -1; + (*prom)(&args); + return args.actual; +} + +int +read(void *handle, void *ptr, int nb) +{ + struct prom_args { + char *service; + int nargs; + int nret; + void *ihandle; + void *addr; + int len; + int actual; + } args; + + args.service = "read"; + args.nargs = 3; + args.nret = 1; + args.ihandle = handle; + args.addr = ptr; + args.len = nb; + args.actual = -1; + (*prom)(&args); + return args.actual; +} + +void +exit() +{ + struct prom_args { + char *service; + } args; + + for (;;) { + args.service = "exit"; + (*prom)(&args); + } +} + +void +pause(void) +{ + struct prom_args { + char *service; + } args; + + args.service = "enter"; + (*prom)(&args); +} + +void * +finddevice(const char *name) +{ + struct prom_args { + char *service; + int nargs; + int nret; + const char *devspec; + void *phandle; + } args; + + args.service = "finddevice"; + args.nargs = 1; + args.nret = 1; + args.devspec = name; + args.phandle = (void *) -1; + (*prom)(&args); + return args.phandle; +} + +void * +claim(unsigned long virt, unsigned long size, unsigned long align) +{ + struct prom_args { + char *service; + int nargs; + int nret; + unsigned int virt; + unsigned int size; + unsigned int align; + void *ret; + } args; + + args.service = "claim"; + args.nargs = 3; + args.nret = 1; + args.virt = virt; + args.size = size; + args.align = align; + (*prom)(&args); + return args.ret; +} + +int +getprop(void *phandle, const char *name, void *buf, int buflen) +{ + struct prom_args { + char *service; + int nargs; + int nret; + void *phandle; + const char *name; + void *buf; + int buflen; + int size; + } args; + + args.service = "getprop"; + args.nargs = 4; + args.nret = 1; + args.phandle = phandle; + args.name = name; + args.buf = buf; + args.buflen = buflen; + args.size = -1; + (*prom)(&args); + return args.size; +} + +int +putc(int c, void *f) +{ + char ch = c; + + if (c == '\n') + putc('\r', f); + return write(f, &ch, 1) == 1? c: -1; +} + +int +putchar(int c) +{ + return putc(c, stdout); +} + +int +fputs(char *str, void *f) +{ + int n = strlen(str); + + return write(f, str, n) == n? 0: -1; +} + +int +readchar(void) +{ + char ch; + + for (;;) { + switch (read(stdin, &ch, 1)) { + case 1: + return ch; + case -1: + printk("read(stdin) returned -1\r\n"); + return -1; + } + } +} + +static char line[256]; +static char *lineptr; +static int lineleft; + +int +getchar(void) +{ + int c; + + if (lineleft == 0) { + lineptr = line; + for (;;) { + c = readchar(); + if (c == -1 || c == 4) + break; + if (c == '\r' || c == '\n') { + *lineptr++ = '\n'; + putchar('\n'); + break; + } + switch (c) { + case 0177: + case '\b': + if (lineptr > line) { + putchar('\b'); + putchar(' '); + putchar('\b'); + --lineptr; + } + break; + case 'U' & 0x1F: + while (lineptr > line) { + putchar('\b'); + putchar(' '); + putchar('\b'); + --lineptr; + } + break; + default: + if (lineptr >= &line[sizeof(line) - 1]) + putchar('\a'); + else { + putchar(c); + *lineptr++ = c; + } + } + } + lineleft = lineptr - line; + lineptr = line; + } + if (lineleft == 0) + return -1; + --lineleft; + return *lineptr++; +} + + + +/* String functions lifted from lib/vsprintf.c and lib/ctype.c */ +unsigned char _ctype[] = { +_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ +_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ +_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ +_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ +_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ +_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ +_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ +_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ +_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ +_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ +_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ +_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ +_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ +_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ + +size_t strnlen(const char * s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) +{ + unsigned long result = 0,value; + + if (!base) { + base = 10; + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { + result = result*base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + return result; +} + +long simple_strtol(const char *cp,char **endp,unsigned int base) +{ + if(*cp=='-') + return -simple_strtoul(cp+1,endp,base); + return simple_strtoul(cp,endp,base); +} + +static int skip_atoi(const char **s) +{ + int i=0; + + while (isdigit(**s)) + i = i*10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SPECIAL 32 /* 0x */ +#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * str, long long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits="0123456789abcdefghijklmnopqrstuvwxyz"; + int i; + + if (type & LARGE) + digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else while (num != 0) + tmp[i++] = digits[do_div(num,base)]; + if (i > precision) + precision = i; + size -= precision; + if (!(type&(ZEROPAD+LEFT))) + while(size-->0) + *str++ = ' '; + if (sign) + *str++ = sign; + if (type & SPECIAL) { + if (base==8) + *str++ = '0'; + else if (base==16) { + *str++ = '0'; + *str++ = digits[33]; + } + } + if (!(type & LEFT)) + while (size-- > 0) + *str++ = c; + while (i < precision--) + *str++ = '0'; + while (i-- > 0) + *str++ = tmp[i]; + while (size-- > 0) + *str++ = ' '; + return str; +} + +/* Forward decl. needed for IP address printing stuff... */ +int sprintf(char * buf, const char *fmt, ...); + +int vsprintf(char *buf, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char * str; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + + for (str=buf ; *fmt ; ++fmt) { + if (*fmt != '%') { + *str++ = *fmt; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= LEFT; goto repeat; + case '+': flags |= PLUS; goto repeat; + case ' ': flags |= SPACE; goto repeat; + case '#': flags |= SPECIAL; goto repeat; + case '0': flags |= ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + *str++ = ' '; + *str++ = (unsigned char) va_arg(args, int); + while (--field_width > 0) + *str++ = ' '; + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = ""; + + len = strnlen(s, precision); + + if (!(flags & LEFT)) + while (len < field_width--) + *str++ = ' '; + for (i = 0; i < len; ++i) + *str++ = *s++; + while (len < field_width--) + *str++ = ' '; + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, + (unsigned long) va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + + case 'n': + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + *str++ = '%'; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + *str++ = '%'; + if (*fmt) + *str++ = *fmt; + else + --fmt; + continue; + } + if (qualifier == 'L') + num = va_arg(args, long long); + else if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & SIGN) + num = (signed long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & SIGN) + num = (signed int) num; + } + str = number(str, num, base, field_width, precision, flags); + } + *str = '\0'; + return str-buf; +} + +int sprintf(char * buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsprintf(buf,fmt,args); + va_end(args); + return i; +} + +static char sprint_buf[1024]; + +void +printk(char *fmt, ...) +{ + va_list args; + int n; + + va_start(args, fmt); + n = vsprintf(sprint_buf, fmt, args); + va_end(args); + write(stdout, sprint_buf, n); +} + +int +printf(char *fmt, ...) +{ + va_list args; + int n; + + va_start(args, fmt); + n = vsprintf(sprint_buf, fmt, args); + va_end(args); + write(stdout, sprint_buf, n); + return n; +} diff --git a/arch/ppc64/boot/string.S b/arch/ppc64/boot/string.S new file mode 100644 index 00000000000..ba5f2d21c9e --- /dev/null +++ b/arch/ppc64/boot/string.S @@ -0,0 +1,216 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * NOTE: this code runs in 32 bit mode and is packaged as ELF32. + */ + +#include + + .text + .globl strcpy +strcpy: + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + + .globl strncpy +strncpy: + cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r6) + bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ + blr + + .globl strcat +strcat: + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r5) + cmpwi 0,r0,0 + bne 1b + addi r5,r5,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + + .globl strcmp +strcmp: + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r5) + cmpwi 1,r3,0 + lbzu r0,1(r4) + subf. r3,r0,r3 + beqlr 1 + beq 1b + blr + + .globl strlen +strlen: + addi r4,r3,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + bne 1b + subf r3,r3,r4 + blr + + .globl memset +memset: + rlwimi r4,r4,8,16,23 + rlwimi r4,r4,16,0,15 + addi r6,r3,-4 + cmplwi 0,r5,4 + blt 7f + stwu r4,4(r6) + beqlr + andi. r0,r6,3 + add r5,r0,r5 + subf r6,r0,r6 + rlwinm r0,r5,32-2,2,31 + mtctr r0 + bdz 6f +1: stwu r4,4(r6) + bdnz 1b +6: andi. r5,r5,3 +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r6,3 +8: stbu r4,1(r6) + bdnz 8b + blr + + .globl memmove +memmove: + cmplw 0,r3,r4 + bgt backwards_memcpy + /* fall through */ + + .globl memcpy +memcpy: + rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + addi r6,r3,-4 + addi r4,r4,-4 + beq 2f /* if less than 8 bytes to do */ + andi. r0,r6,3 /* get dest word aligned */ + mtctr r7 + bne 5f +1: lwz r7,4(r4) + lwzu r8,8(r4) + stw r7,4(r6) + stwu r8,8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,4(r4) + addi r5,r5,-4 + stwu r0,4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r4,r4,3 + addi r6,r6,3 +4: lbzu r0,1(r4) + stbu r0,1(r6) + bdnz 4b + blr +5: subfic r0,r0,4 + mtctr r0 +6: lbz r7,4(r4) + addi r4,r4,1 + stb r7,4(r6) + addi r6,r6,1 + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + + .globl backwards_memcpy +backwards_memcpy: + rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + add r6,r3,r5 + add r4,r4,r5 + beq 2f + andi. r0,r6,3 + mtctr r7 + bne 5f +1: lwz r7,-4(r4) + lwzu r8,-8(r4) + stw r7,-4(r6) + stwu r8,-8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,-4(r4) + subi r5,r5,4 + stwu r0,-4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 +4: lbzu r0,-1(r4) + stbu r0,-1(r6) + bdnz 4b + blr +5: mtctr r0 +6: lbzu r7,-1(r4) + stbu r7,-1(r6) + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + + .globl memcmp +memcmp: + cmpwi 0,r5,0 + blelr + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r6) + lbzu r0,1(r4) + subf. r3,r0,r3 + bdnzt 2,1b + blr + + +/* + * Flush the dcache and invalidate the icache for a range of addresses. + * + * flush_cache(addr, len) + */ + .global flush_cache +flush_cache: + addi 4,4,0x1f /* len = (len + 0x1f) / 0x20 */ + rlwinm. 4,4,27,5,31 + mtctr 4 + beqlr +1: dcbf 0,3 + icbi 0,3 + addi 3,3,0x20 + bdnz 1b + sync + isync + blr + diff --git a/arch/ppc64/boot/zImage.lds b/arch/ppc64/boot/zImage.lds new file mode 100644 index 00000000000..8fe5e7071f5 --- /dev/null +++ b/arch/ppc64/boot/zImage.lds @@ -0,0 +1,90 @@ +OUTPUT_ARCH(powerpc:common) +SEARCH_DIR(/lib); SEARCH_DIR(/usr/lib); SEARCH_DIR(/usr/local/lib); SEARCH_DIR(/usr/local/powerpc-any-elf/lib); +/* Do we need any of these for elf? + __DYNAMIC = 0; */ +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + . = + SIZEOF_HEADERS; + .interp : { *(.interp) } + .hash : { *(.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .rel.text : { *(.rel.text) } + .rela.text : { *(.rela.text) } + .rel.data : { *(.rel.data) } + .rela.data : { *(.rela.data) } + .rel.rodata : { *(.rel.rodata) } + .rela.rodata : { *(.rela.rodata) } + .rel.got : { *(.rel.got) } + .rela.got : { *(.rela.got) } + .rel.ctors : { *(.rel.ctors) } + .rela.ctors : { *(.rela.ctors) } + .rel.dtors : { *(.rel.dtors) } + .rela.dtors : { *(.rela.dtors) } + .rel.bss : { *(.rel.bss) } + .rela.bss : { *(.rela.bss) } + .rel.plt : { *(.rel.plt) } + .rela.plt : { *(.rela.plt) } + .plt : { *(.plt) } + .text : + { + *(.text) + *(.fixup) + *(.got1) + } + . = ALIGN(4096); + _etext = .; + PROVIDE (etext = .); + .rodata : + { + *(.rodata) + *(.rodata1) + } + .kstrtab : { *(.kstrtab) } + __vermagic : { *(__vermagic) } + .fini : { *(.fini) } =0 + .ctors : { *(.ctors) } + .dtors : { *(.dtors) } + /* Read-write section, merged into data segment: */ + . = ALIGN(4096); + .data : + { + *(.data) + *(.data1) + *(.sdata) + *(.sdata2) + *(.got.plt) *(.got) + *(.dynamic) + CONSTRUCTORS + } + + . = ALIGN(4096); + _vmlinux_start = .; + .kernel:vmlinux.strip : { *(.kernel:vmlinux.strip) } + _vmlinux_end = .; + + . = ALIGN(4096); + _initrd_start = .; + .kernel:initrd : { *(.kernel:initrd) } + _initrd_end = .; + + . = ALIGN(4096); + _edata = .; + PROVIDE (edata = .); + + .fixup : { *(.fixup) } + + . = ALIGN(4096); + __bss_start = .; + .bss : + { + *(.sbss) *(.scommon) + *(.dynbss) + *(.bss) + *(COMMON) + } + . = ALIGN(4096); + _end = . ; + PROVIDE (end = .); +} diff --git a/arch/ppc64/boot/zlib.c b/arch/ppc64/boot/zlib.c new file mode 100644 index 00000000000..9d5e4e9832d --- /dev/null +++ b/arch/ppc64/boot/zlib.c @@ -0,0 +1,2194 @@ +/* + * This file is derived from various .h and .c files from the zlib-0.95 + * distribution by Jean-loup Gailly and Mark Adler, with some additions + * by Paul Mackerras to aid in implementing Deflate compression and + * decompression for PPP packets. See zlib.h for conditions of + * distribution and use. + * + * Changes that have been made include: + * - changed functions not used outside this file to "local" + * - added minCompression parameter to deflateInit2 + * - added Z_PACKET_FLUSH (see zlib.h for details) + * - added inflateIncomp + * + Copyright (C) 1995 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + gzip@prep.ai.mit.edu madler@alumni.caltech.edu + + * + * + */ + +/*+++++*/ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* From: zutil.h,v 1.9 1995/05/03 17:27:12 jloup Exp */ + +#define _Z_UTIL_H + +#include "zlib.h" + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +#define FAR + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern char *z_errmsg[]; /* indexed by 1-zlib_error */ + +#define ERR_RETURN(strm,err) return (strm->msg=z_errmsg[1-err], err) +/* To be used only when the state is known to be valid */ + +#ifndef NULL +#define NULL ((void *) 0) +#endif + + /* common constants */ + +#define DEFLATED 8 + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + + /* functions */ + +extern void *memcpy(void *, const void *, unsigned long); +#define zmemcpy memcpy + +/* Diagnostic functions */ +#ifdef DEBUG_ZLIB +# include +# ifndef verbose +# define verbose 0 +# endif +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) fprintf x +# define Tracev(x) {if (verbose) fprintf x ;} +# define Tracevv(x) {if (verbose>1) fprintf x ;} +# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} +# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + + +typedef uLong (*check_func) OF((uLong check, Bytef *buf, uInt len)); + +/* voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); */ +/* void zcfree OF((voidpf opaque, voidpf ptr)); */ + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr, size) \ + (*((strm)->zfree))((strm)->opaque, (voidpf)(addr), (size)) +#define TRY_FREE(s, p, n) {if (p) ZFREE(s, p, n);} + +/* deflate.h -- internal compression state + * Copyright (C) 1995 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/*+++++*/ +/* infblock.h -- header to use infblock.c + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +struct inflate_blocks_state; +typedef struct inflate_blocks_state FAR inflate_blocks_statef; + +local inflate_blocks_statef * inflate_blocks_new OF(( + z_stream *z, + check_func c, /* check function */ + uInt w)); /* window size */ + +local int inflate_blocks OF(( + inflate_blocks_statef *, + z_stream *, + int)); /* initial return code */ + +local void inflate_blocks_reset OF(( + inflate_blocks_statef *, + z_stream *, + uLongf *)); /* check value on output */ + +local int inflate_blocks_free OF(( + inflate_blocks_statef *, + z_stream *, + uLongf *)); /* check value on output */ + +local int inflate_addhistory OF(( + inflate_blocks_statef *, + z_stream *)); + +local int inflate_packet_flush OF(( + inflate_blocks_statef *)); + +/*+++++*/ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Huffman code lookup table entry--this entry is four bytes for machines + that have 16-bit pointers (e.g. PC's in the small or medium model). */ + +typedef struct inflate_huft_s FAR inflate_huft; + +struct inflate_huft_s { + union { + struct { + Byte Exop; /* number of extra bits or operation */ + Byte Bits; /* number of bits in this code or subcode */ + } what; + uInt Nalloc; /* number of these allocated here */ + Bytef *pad; /* pad structure to a power of 2 (4 bytes for */ + } word; /* 16-bit, 8 bytes for 32-bit machines) */ + union { + uInt Base; /* literal, length base, or distance base */ + inflate_huft *Next; /* pointer to next level of table */ + } more; +}; + +#ifdef DEBUG_ZLIB + local uInt inflate_hufts; +#endif + +local int inflate_trees_bits OF(( + uIntf *, /* 19 code lengths */ + uIntf *, /* bits tree desired/actual depth */ + inflate_huft * FAR *, /* bits tree result */ + z_stream *)); /* for zalloc, zfree functions */ + +local int inflate_trees_dynamic OF(( + uInt, /* number of literal/length codes */ + uInt, /* number of distance codes */ + uIntf *, /* that many (total) code lengths */ + uIntf *, /* literal desired/actual bit depth */ + uIntf *, /* distance desired/actual bit depth */ + inflate_huft * FAR *, /* literal/length tree result */ + inflate_huft * FAR *, /* distance tree result */ + z_stream *)); /* for zalloc, zfree functions */ + +local int inflate_trees_fixed OF(( + uIntf *, /* literal desired/actual bit depth */ + uIntf *, /* distance desired/actual bit depth */ + inflate_huft * FAR *, /* literal/length tree result */ + inflate_huft * FAR *)); /* distance tree result */ + +local int inflate_trees_free OF(( + inflate_huft *, /* tables to free */ + z_stream *)); /* for zfree function */ + + +/*+++++*/ +/* infcodes.h -- header to use infcodes.c + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +struct inflate_codes_state; +typedef struct inflate_codes_state FAR inflate_codes_statef; + +local inflate_codes_statef *inflate_codes_new OF(( + uInt, uInt, + inflate_huft *, inflate_huft *, + z_stream *)); + +local int inflate_codes OF(( + inflate_blocks_statef *, + z_stream *, + int)); + +local void inflate_codes_free OF(( + inflate_codes_statef *, + z_stream *)); + + +/*+++++*/ +/* inflate.c -- zlib interface to inflate modules + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* inflate private state */ +struct internal_state { + + /* mode */ + enum { + METHOD, /* waiting for method byte */ + FLAG, /* waiting for flag byte */ + BLOCKS, /* decompressing blocks */ + CHECK4, /* four check bytes to go */ + CHECK3, /* three check bytes to go */ + CHECK2, /* two check bytes to go */ + CHECK1, /* one check byte to go */ + DONE, /* finished check, done */ + BAD} /* got an error--stay here */ + mode; /* current inflate mode */ + + /* mode dependent information */ + union { + uInt method; /* if FLAGS, method byte */ + struct { + uLong was; /* computed check value */ + uLong need; /* stream check value */ + } check; /* if CHECK, check values to compare */ + uInt marker; /* if BAD, inflateSync's marker bytes count */ + } sub; /* submode */ + + /* mode independent information */ + int nowrap; /* flag for no wrapper */ + uInt wbits; /* log2(window size) (8..15, defaults to 15) */ + inflate_blocks_statef + *blocks; /* current inflate_blocks state */ + +}; + + +int inflateReset( + z_stream *z +) +{ + uLong c; + + if (z == Z_NULL || z->state == Z_NULL) + return Z_STREAM_ERROR; + z->total_in = z->total_out = 0; + z->msg = Z_NULL; + z->state->mode = z->state->nowrap ? BLOCKS : METHOD; + inflate_blocks_reset(z->state->blocks, z, &c); + Trace((stderr, "inflate: reset\n")); + return Z_OK; +} + + +int inflateEnd( + z_stream *z +) +{ + uLong c; + + if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL) + return Z_STREAM_ERROR; + if (z->state->blocks != Z_NULL) + inflate_blocks_free(z->state->blocks, z, &c); + ZFREE(z, z->state, sizeof(struct internal_state)); + z->state = Z_NULL; + Trace((stderr, "inflate: end\n")); + return Z_OK; +} + + +int inflateInit2( + z_stream *z, + int w +) +{ + /* initialize state */ + if (z == Z_NULL) + return Z_STREAM_ERROR; +/* if (z->zalloc == Z_NULL) z->zalloc = zcalloc; */ +/* if (z->zfree == Z_NULL) z->zfree = zcfree; */ + if ((z->state = (struct internal_state FAR *) + ZALLOC(z,1,sizeof(struct internal_state))) == Z_NULL) + return Z_MEM_ERROR; + z->state->blocks = Z_NULL; + + /* handle undocumented nowrap option (no zlib header or check) */ + z->state->nowrap = 0; + if (w < 0) + { + w = - w; + z->state->nowrap = 1; + } + + /* set window size */ + if (w < 8 || w > 15) + { + inflateEnd(z); + return Z_STREAM_ERROR; + } + z->state->wbits = (uInt)w; + + /* create inflate_blocks state */ + if ((z->state->blocks = + inflate_blocks_new(z, z->state->nowrap ? Z_NULL : adler32, 1 << w)) + == Z_NULL) + { + inflateEnd(z); + return Z_MEM_ERROR; + } + Trace((stderr, "inflate: allocated\n")); + + /* reset state */ + inflateReset(z); + return Z_OK; +} + + +int inflateInit( + z_stream *z +) +{ + return inflateInit2(z, DEF_WBITS); +} + + +#define NEEDBYTE {if(z->avail_in==0)goto empty;r=Z_OK;} +#define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++) + +int inflate( + z_stream *z, + int f +) +{ + int r; + uInt b; + + if (z == Z_NULL || z->next_in == Z_NULL) + return Z_STREAM_ERROR; + r = Z_BUF_ERROR; + while (1) switch (z->state->mode) + { + case METHOD: + NEEDBYTE + if (((z->state->sub.method = NEXTBYTE) & 0xf) != DEFLATED) + { + z->state->mode = BAD; + z->msg = "unknown compression method"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + if ((z->state->sub.method >> 4) + 8 > z->state->wbits) + { + z->state->mode = BAD; + z->msg = "invalid window size"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + z->state->mode = FLAG; + case FLAG: + NEEDBYTE + if ((b = NEXTBYTE) & 0x20) + { + z->state->mode = BAD; + z->msg = "invalid reserved bit"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + if (((z->state->sub.method << 8) + b) % 31) + { + z->state->mode = BAD; + z->msg = "incorrect header check"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + Trace((stderr, "inflate: zlib header ok\n")); + z->state->mode = BLOCKS; + case BLOCKS: + r = inflate_blocks(z->state->blocks, z, r); + if (f == Z_PACKET_FLUSH && z->avail_in == 0 && z->avail_out != 0) + r = inflate_packet_flush(z->state->blocks); + if (r == Z_DATA_ERROR) + { + z->state->mode = BAD; + z->state->sub.marker = 0; /* can try inflateSync */ + break; + } + if (r != Z_STREAM_END) + return r; + r = Z_OK; + inflate_blocks_reset(z->state->blocks, z, &z->state->sub.check.was); + if (z->state->nowrap) + { + z->state->mode = DONE; + break; + } + z->state->mode = CHECK4; + case CHECK4: + NEEDBYTE + z->state->sub.check.need = (uLong)NEXTBYTE << 24; + z->state->mode = CHECK3; + case CHECK3: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE << 16; + z->state->mode = CHECK2; + case CHECK2: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE << 8; + z->state->mode = CHECK1; + case CHECK1: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE; + + if (z->state->sub.check.was != z->state->sub.check.need) + { + z->state->mode = BAD; + z->msg = "incorrect data check"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + Trace((stderr, "inflate: zlib check ok\n")); + z->state->mode = DONE; + case DONE: + return Z_STREAM_END; + case BAD: + return Z_DATA_ERROR; + default: + return Z_STREAM_ERROR; + } + + empty: + if (f != Z_PACKET_FLUSH) + return r; + z->state->mode = BAD; + z->state->sub.marker = 0; /* can try inflateSync */ + return Z_DATA_ERROR; +} + +/* + * This subroutine adds the data at next_in/avail_in to the output history + * without performing any output. The output buffer must be "caught up"; + * i.e. no pending output (hence s->read equals s->write), and the state must + * be BLOCKS (i.e. we should be willing to see the start of a series of + * BLOCKS). On exit, the output will also be caught up, and the checksum + * will have been updated if need be. + */ + +int inflateIncomp( + z_stream *z +) +{ + if (z->state->mode != BLOCKS) + return Z_DATA_ERROR; + return inflate_addhistory(z->state->blocks, z); +} + + +int inflateSync( + z_stream *z +) +{ + uInt n; /* number of bytes to look at */ + Bytef *p; /* pointer to bytes */ + uInt m; /* number of marker bytes found in a row */ + uLong r, w; /* temporaries to save total_in and total_out */ + + /* set up */ + if (z == Z_NULL || z->state == Z_NULL) + return Z_STREAM_ERROR; + if (z->state->mode != BAD) + { + z->state->mode = BAD; + z->state->sub.marker = 0; + } + if ((n = z->avail_in) == 0) + return Z_BUF_ERROR; + p = z->next_in; + m = z->state->sub.marker; + + /* search */ + while (n && m < 4) + { + if (*p == (Byte)(m < 2 ? 0 : 0xff)) + m++; + else if (*p) + m = 0; + else + m = 4 - m; + p++, n--; + } + + /* restore */ + z->total_in += p - z->next_in; + z->next_in = p; + z->avail_in = n; + z->state->sub.marker = m; + + /* return no joy or set up to restart on a new block */ + if (m != 4) + return Z_DATA_ERROR; + r = z->total_in; w = z->total_out; + inflateReset(z); + z->total_in = r; z->total_out = w; + z->state->mode = BLOCKS; + return Z_OK; +} + +#undef NEEDBYTE +#undef NEXTBYTE + +/*+++++*/ +/* infutil.h -- types and macros common to blocks and codes + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* inflate blocks semi-private state */ +struct inflate_blocks_state { + + /* mode */ + enum { + TYPE, /* get type bits (3, including end bit) */ + LENS, /* get lengths for stored */ + STORED, /* processing stored block */ + TABLE, /* get table lengths */ + BTREE, /* get bit lengths tree for a dynamic block */ + DTREE, /* get length, distance trees for a dynamic block */ + CODES, /* processing fixed or dynamic block */ + DRY, /* output remaining window bytes */ + DONEB, /* finished last block, done */ + BADB} /* got a data error--stuck here */ + mode; /* current inflate_block mode */ + + /* mode dependent information */ + union { + uInt left; /* if STORED, bytes left to copy */ + struct { + uInt table; /* table lengths (14 bits) */ + uInt index; /* index into blens (or border) */ + uIntf *blens; /* bit lengths of codes */ + uInt bb; /* bit length tree depth */ + inflate_huft *tb; /* bit length decoding tree */ + int nblens; /* # elements allocated at blens */ + } trees; /* if DTREE, decoding info for trees */ + struct { + inflate_huft *tl, *td; /* trees to free */ + inflate_codes_statef + *codes; + } decode; /* if CODES, current state */ + } sub; /* submode */ + uInt last; /* true if this block is the last block */ + + /* mode independent information */ + uInt bitk; /* bits in bit buffer */ + uLong bitb; /* bit buffer */ + Bytef *window; /* sliding window */ + Bytef *end; /* one byte after sliding window */ + Bytef *read; /* window read pointer */ + Bytef *write; /* window write pointer */ + check_func checkfn; /* check function */ + uLong check; /* check on output */ + +}; + + +/* defines for inflate input/output */ +/* update pointers and return */ +#define UPDBITS {s->bitb=b;s->bitk=k;} +#define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;} +#define UPDOUT {s->write=q;} +#define UPDATE {UPDBITS UPDIN UPDOUT} +#define LEAVE {UPDATE return inflate_flush(s,z,r);} +/* get bytes and bits */ +#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;} +#define NEEDBYTE {if(n)r=Z_OK;else LEAVE} +#define NEXTBYTE (n--,*p++) +#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<>=(j);k-=(j);} +/* output bytes */ +#define WAVAIL (qread?s->read-q-1:s->end-q) +#define LOADOUT {q=s->write;m=WAVAIL;} +#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=WAVAIL;}} +#define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT} +#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE}}r=Z_OK;} +#define OUTBYTE(a) {*q++=(Byte)(a);m--;} +/* load local pointers */ +#define LOAD {LOADIN LOADOUT} + +/* And'ing with mask[n] masks the lower n bits */ +local uInt inflate_mask[] = { + 0x0000, + 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, + 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff +}; + +/* copy as much as possible from the sliding window to the output area */ +local int inflate_flush OF(( + inflate_blocks_statef *, + z_stream *, + int)); + +/*+++++*/ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +local int inflate_fast OF(( + uInt, + uInt, + inflate_huft *, + inflate_huft *, + inflate_blocks_statef *, + z_stream *)); + + +/*+++++*/ +/* infblock.c -- interpret and process block types to last block + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* Table for deflate from PKZIP's appnote.txt. */ +local uInt border[] = { /* Order of the bit length code lengths */ + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + +/* + Notes beyond the 1.93a appnote.txt: + + 1. Distance pointers never point before the beginning of the output + stream. + 2. Distance pointers can point back across blocks, up to 32k away. + 3. There is an implied maximum of 7 bits for the bit length table and + 15 bits for the actual data. + 4. If only one code exists, then it is encoded using one bit. (Zero + would be more efficient, but perhaps a little confusing.) If two + codes exist, they are coded using one bit each (0 and 1). + 5. There is no way of sending zero distance codes--a dummy must be + sent if there are none. (History: a pre 2.0 version of PKZIP would + store blocks with no distance codes, but this was discovered to be + too harsh a criterion.) Valid only for 1.93a. 2.04c does allow + zero distance codes, which is sent as one code of zero bits in + length. + 6. There are up to 286 literal/length codes. Code 256 represents the + end-of-block. Note however that the static length tree defines + 288 codes just to fill out the Huffman codes. Codes 286 and 287 + cannot be used though, since there is no length base or extra bits + defined for them. Similarily, there are up to 30 distance codes. + However, static trees define 32 codes (all 5 bits) to fill out the + Huffman codes, but the last two had better not show up in the data. + 7. Unzip can check dynamic Huffman blocks for complete code sets. + The exception is that a single code would not be complete (see #4). + 8. The five bits following the block type is really the number of + literal codes sent minus 257. + 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits + (1+6+6). Therefore, to output three times the length, you output + three codes (1+1+1), whereas to output four times the same length, + you only need two codes (1+3). Hmm. + 10. In the tree reconstruction algorithm, Code = Code + Increment + only if BitLength(i) is not zero. (Pretty obvious.) + 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) + 12. Note: length code 284 can represent 227-258, but length code 285 + really is 258. The last length deserves its own, short code + since it gets used a lot in very redundant files. The length + 258 is special since 258 - 3 (the min match length) is 255. + 13. The literal/length and distance code bit lengths are read as a + single stream of lengths. It is possible (and advantageous) for + a repeat code (16, 17, or 18) to go across the boundary between + the two sets of lengths. + */ + + +local void inflate_blocks_reset( + inflate_blocks_statef *s, + z_stream *z, + uLongf *c +) +{ + if (s->checkfn != Z_NULL) + *c = s->check; + if (s->mode == BTREE || s->mode == DTREE) + ZFREE(z, s->sub.trees.blens, s->sub.trees.nblens * sizeof(uInt)); + if (s->mode == CODES) + { + inflate_codes_free(s->sub.decode.codes, z); + inflate_trees_free(s->sub.decode.td, z); + inflate_trees_free(s->sub.decode.tl, z); + } + s->mode = TYPE; + s->bitk = 0; + s->bitb = 0; + s->read = s->write = s->window; + if (s->checkfn != Z_NULL) + s->check = (*s->checkfn)(0L, Z_NULL, 0); + Trace((stderr, "inflate: blocks reset\n")); +} + + +local inflate_blocks_statef *inflate_blocks_new( + z_stream *z, + check_func c, + uInt w +) +{ + inflate_blocks_statef *s; + + if ((s = (inflate_blocks_statef *)ZALLOC + (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL) + return s; + if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL) + { + ZFREE(z, s, sizeof(struct inflate_blocks_state)); + return Z_NULL; + } + s->end = s->window + w; + s->checkfn = c; + s->mode = TYPE; + Trace((stderr, "inflate: blocks allocated\n")); + inflate_blocks_reset(s, z, &s->check); + return s; +} + + +local int inflate_blocks( + inflate_blocks_statef *s, + z_stream *z, + int r +) +{ + uInt t; /* temporary storage */ + uLong b; /* bit buffer */ + uInt k; /* bits in bit buffer */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + + /* copy input/output information to locals (UPDATE macro restores) */ + LOAD + + /* process input based on current state */ + while (1) switch (s->mode) + { + case TYPE: + NEEDBITS(3) + t = (uInt)b & 7; + s->last = t & 1; + switch (t >> 1) + { + case 0: /* stored */ + Trace((stderr, "inflate: stored block%s\n", + s->last ? " (last)" : "")); + DUMPBITS(3) + t = k & 7; /* go to byte boundary */ + DUMPBITS(t) + s->mode = LENS; /* get length of stored block */ + break; + case 1: /* fixed */ + Trace((stderr, "inflate: fixed codes block%s\n", + s->last ? " (last)" : "")); + { + uInt bl, bd; + inflate_huft *tl, *td; + + inflate_trees_fixed(&bl, &bd, &tl, &td); + s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z); + if (s->sub.decode.codes == Z_NULL) + { + r = Z_MEM_ERROR; + LEAVE + } + s->sub.decode.tl = Z_NULL; /* don't try to free these */ + s->sub.decode.td = Z_NULL; + } + DUMPBITS(3) + s->mode = CODES; + break; + case 2: /* dynamic */ + Trace((stderr, "inflate: dynamic codes block%s\n", + s->last ? " (last)" : "")); + DUMPBITS(3) + s->mode = TABLE; + break; + case 3: /* illegal */ + DUMPBITS(3) + s->mode = BADB; + z->msg = "invalid block type"; + r = Z_DATA_ERROR; + LEAVE + } + break; + case LENS: + NEEDBITS(32) + if (((~b) >> 16) != (b & 0xffff)) + { + s->mode = BADB; + z->msg = "invalid stored block lengths"; + r = Z_DATA_ERROR; + LEAVE + } + s->sub.left = (uInt)b & 0xffff; + b = k = 0; /* dump bits */ + Tracev((stderr, "inflate: stored length %u\n", s->sub.left)); + s->mode = s->sub.left ? STORED : TYPE; + break; + case STORED: + if (n == 0) + LEAVE + NEEDOUT + t = s->sub.left; + if (t > n) t = n; + if (t > m) t = m; + zmemcpy(q, p, t); + p += t; n -= t; + q += t; m -= t; + if ((s->sub.left -= t) != 0) + break; + Tracev((stderr, "inflate: stored end, %lu total out\n", + z->total_out + (q >= s->read ? q - s->read : + (s->end - s->read) + (q - s->window)))); + s->mode = s->last ? DRY : TYPE; + break; + case TABLE: + NEEDBITS(14) + s->sub.trees.table = t = (uInt)b & 0x3fff; +#ifndef PKZIP_BUG_WORKAROUND + if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29) + { + s->mode = BADB; + z->msg = "too many length or distance symbols"; + r = Z_DATA_ERROR; + LEAVE + } +#endif + t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f); + if (t < 19) + t = 19; + if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL) + { + r = Z_MEM_ERROR; + LEAVE + } + s->sub.trees.nblens = t; + DUMPBITS(14) + s->sub.trees.index = 0; + Tracev((stderr, "inflate: table sizes ok\n")); + s->mode = BTREE; + case BTREE: + while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10)) + { + NEEDBITS(3) + s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7; + DUMPBITS(3) + } + while (s->sub.trees.index < 19) + s->sub.trees.blens[border[s->sub.trees.index++]] = 0; + s->sub.trees.bb = 7; + t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb, + &s->sub.trees.tb, z); + if (t != Z_OK) + { + r = t; + if (r == Z_DATA_ERROR) + s->mode = BADB; + LEAVE + } + s->sub.trees.index = 0; + Tracev((stderr, "inflate: bits tree ok\n")); + s->mode = DTREE; + case DTREE: + while (t = s->sub.trees.table, + s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f)) + { + inflate_huft *h; + uInt i, j, c; + + t = s->sub.trees.bb; + NEEDBITS(t) + h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]); + t = h->word.what.Bits; + c = h->more.Base; + if (c < 16) + { + DUMPBITS(t) + s->sub.trees.blens[s->sub.trees.index++] = c; + } + else /* c == 16..18 */ + { + i = c == 18 ? 7 : c - 14; + j = c == 18 ? 11 : 3; + NEEDBITS(t + i) + DUMPBITS(t) + j += (uInt)b & inflate_mask[i]; + DUMPBITS(i) + i = s->sub.trees.index; + t = s->sub.trees.table; + if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) || + (c == 16 && i < 1)) + { + s->mode = BADB; + z->msg = "invalid bit length repeat"; + r = Z_DATA_ERROR; + LEAVE + } + c = c == 16 ? s->sub.trees.blens[i - 1] : 0; + do { + s->sub.trees.blens[i++] = c; + } while (--j); + s->sub.trees.index = i; + } + } + inflate_trees_free(s->sub.trees.tb, z); + s->sub.trees.tb = Z_NULL; + { + uInt bl, bd; + inflate_huft *tl, *td; + inflate_codes_statef *c; + + bl = 9; /* must be <= 9 for lookahead assumptions */ + bd = 6; /* must be <= 9 for lookahead assumptions */ + t = s->sub.trees.table; + t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f), + s->sub.trees.blens, &bl, &bd, &tl, &td, z); + if (t != Z_OK) + { + if (t == (uInt)Z_DATA_ERROR) + s->mode = BADB; + r = t; + LEAVE + } + Tracev((stderr, "inflate: trees ok\n")); + if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL) + { + inflate_trees_free(td, z); + inflate_trees_free(tl, z); + r = Z_MEM_ERROR; + LEAVE + } + ZFREE(z, s->sub.trees.blens, s->sub.trees.nblens * sizeof(uInt)); + s->sub.decode.codes = c; + s->sub.decode.tl = tl; + s->sub.decode.td = td; + } + s->mode = CODES; + case CODES: + UPDATE + if ((r = inflate_codes(s, z, r)) != Z_STREAM_END) + return inflate_flush(s, z, r); + r = Z_OK; + inflate_codes_free(s->sub.decode.codes, z); + inflate_trees_free(s->sub.decode.td, z); + inflate_trees_free(s->sub.decode.tl, z); + LOAD + Tracev((stderr, "inflate: codes end, %lu total out\n", + z->total_out + (q >= s->read ? q - s->read : + (s->end - s->read) + (q - s->window)))); + if (!s->last) + { + s->mode = TYPE; + break; + } + if (k > 7) /* return unused byte, if any */ + { + Assert(k < 16, "inflate_codes grabbed too many bytes") + k -= 8; + n++; + p--; /* can always return one */ + } + s->mode = DRY; + case DRY: + FLUSH + if (s->read != s->write) + LEAVE + s->mode = DONEB; + case DONEB: + r = Z_STREAM_END; + LEAVE + case BADB: + r = Z_DATA_ERROR; + LEAVE + default: + r = Z_STREAM_ERROR; + LEAVE + } +} + + +local int inflate_blocks_free( + inflate_blocks_statef *s, + z_stream *z, + uLongf *c +) +{ + inflate_blocks_reset(s, z, c); + ZFREE(z, s->window, s->end - s->window); + ZFREE(z, s, sizeof(struct inflate_blocks_state)); + Trace((stderr, "inflate: blocks freed\n")); + return Z_OK; +} + +/* + * This subroutine adds the data at next_in/avail_in to the output history + * without performing any output. The output buffer must be "caught up"; + * i.e. no pending output (hence s->read equals s->write), and the state must + * be BLOCKS (i.e. we should be willing to see the start of a series of + * BLOCKS). On exit, the output will also be caught up, and the checksum + * will have been updated if need be. + */ +local int inflate_addhistory( + inflate_blocks_statef *s, + z_stream *z +) +{ + uLong b; /* bit buffer */ /* NOT USED HERE */ + uInt k; /* bits in bit buffer */ /* NOT USED HERE */ + uInt t; /* temporary storage */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + + if (s->read != s->write) + return Z_STREAM_ERROR; + if (s->mode != TYPE) + return Z_DATA_ERROR; + + /* we're ready to rock */ + LOAD + /* while there is input ready, copy to output buffer, moving + * pointers as needed. + */ + while (n) { + t = n; /* how many to do */ + /* is there room until end of buffer? */ + if (t > m) t = m; + /* update check information */ + if (s->checkfn != Z_NULL) + s->check = (*s->checkfn)(s->check, q, t); + zmemcpy(q, p, t); + q += t; + p += t; + n -= t; + z->total_out += t; + s->read = q; /* drag read pointer forward */ +/* WRAP */ /* expand WRAP macro by hand to handle s->read */ + if (q == s->end) { + s->read = q = s->window; + m = WAVAIL; + } + } + UPDATE + return Z_OK; +} + + +/* + * At the end of a Deflate-compressed PPP packet, we expect to have seen + * a `stored' block type value but not the (zero) length bytes. + */ +local int inflate_packet_flush( + inflate_blocks_statef *s +) +{ + if (s->mode != LENS) + return Z_DATA_ERROR; + s->mode = TYPE; + return Z_OK; +} + + +/*+++++*/ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* simplify the use of the inflate_huft type with some defines */ +#define base more.Base +#define next more.Next +#define exop word.what.Exop +#define bits word.what.Bits + + +local int huft_build OF(( + uIntf *, /* code lengths in bits */ + uInt, /* number of codes */ + uInt, /* number of "simple" codes */ + uIntf *, /* list of base values for non-simple codes */ + uIntf *, /* list of extra bits for non-simple codes */ + inflate_huft * FAR*,/* result: starting table */ + uIntf *, /* maximum lookup bits (returns actual) */ + z_stream *)); /* for zalloc function */ + +local voidpf falloc OF(( + voidpf, /* opaque pointer (not used) */ + uInt, /* number of items */ + uInt)); /* size of item */ + +local void ffree OF(( + voidpf q, /* opaque pointer (not used) */ + voidpf p, /* what to free (not used) */ + uInt n)); /* number of bytes (not used) */ + +/* Tables for deflate from PKZIP's appnote.txt. */ +local uInt cplens[] = { /* Copy lengths for literal codes 257..285 */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + /* actually lengths - 2; also see note #13 above about 258 */ +local uInt cplext[] = { /* Extra bits for literal codes 257..285 */ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 192, 192}; /* 192==invalid */ +local uInt cpdist[] = { /* Copy offsets for distance codes 0..29 */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577}; +local uInt cpdext[] = { /* Extra bits for distance codes */ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13}; + +/* + Huffman code decoding is performed using a multi-level table lookup. + The fastest way to decode is to simply build a lookup table whose + size is determined by the longest code. However, the time it takes + to build this table can also be a factor if the data being decoded + is not very long. The most common codes are necessarily the + shortest codes, so those codes dominate the decoding time, and hence + the speed. The idea is you can have a shorter table that decodes the + shorter, more probable codes, and then point to subsidiary tables for + the longer codes. The time it costs to decode the longer codes is + then traded against the time it takes to make longer tables. + + This results of this trade are in the variables lbits and dbits + below. lbits is the number of bits the first level table for literal/ + length codes can decode in one step, and dbits is the same thing for + the distance codes. Subsequent tables are also less than or equal to + those sizes. These values may be adjusted either when all of the + codes are shorter than that, in which case the longest code length in + bits is used, or when the shortest code is *longer* than the requested + table size, in which case the length of the shortest code in bits is + used. + + There are two different values for the two tables, since they code a + different number of possibilities each. The literal/length table + codes 286 possible values, or in a flat code, a little over eight + bits. The distance table codes 30 possible values, or a little less + than five bits, flat. The optimum values for speed end up being + about one bit more than those, so lbits is 8+1 and dbits is 5+1. + The optimum values may differ though from machine to machine, and + possibly even between compilers. Your mileage may vary. + */ + + +/* If BMAX needs to be larger than 16, then h and x[] should be uLong. */ +#define BMAX 15 /* maximum bit length of any code */ +#define N_MAX 288 /* maximum number of codes in any set */ + +#ifdef DEBUG_ZLIB + uInt inflate_hufts; +#endif + +local int huft_build( + uIntf *b, /* code lengths in bits (all assumed <= BMAX) */ + uInt n, /* number of codes (assumed <= N_MAX) */ + uInt s, /* number of simple-valued codes (0..s-1) */ + uIntf *d, /* list of base values for non-simple codes */ + uIntf *e, /* list of extra bits for non-simple codes */ + inflate_huft * FAR *t, /* result: starting table */ + uIntf *m, /* maximum lookup bits, returns actual */ + z_stream *zs /* for zalloc function */ +) +/* Given a list of code lengths and a maximum table size, make a set of + tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR + if the given code set is incomplete (the tables are still built in this + case), Z_DATA_ERROR if the input is invalid (all zero length codes or an + over-subscribed set of lengths), or Z_MEM_ERROR if not enough memory. */ +{ + + uInt a; /* counter for codes of length k */ + uInt c[BMAX+1]; /* bit length count table */ + uInt f; /* i repeats in table every f entries */ + int g; /* maximum code length */ + int h; /* table level */ + register uInt i; /* counter, current code */ + register uInt j; /* counter */ + register int k; /* number of bits in current code */ + int l; /* bits per table (returned in m) */ + register uIntf *p; /* pointer into c[], b[], or v[] */ + inflate_huft *q; /* points to current table */ + struct inflate_huft_s r; /* table entry for structure assignment */ + inflate_huft *u[BMAX]; /* table stack */ + uInt v[N_MAX]; /* values in order of bit length */ + register int w; /* bits before this table == (l * h) */ + uInt x[BMAX+1]; /* bit offsets, then code stack */ + uIntf *xp; /* pointer into x */ + int y; /* number of dummy codes added */ + uInt z; /* number of entries in current table */ + + + /* Generate counts for each bit length */ + p = c; +#define C0 *p++ = 0; +#define C2 C0 C0 C0 C0 +#define C4 C2 C2 C2 C2 + C4 /* clear c[]--assume BMAX+1 is 16 */ + p = b; i = n; + do { + c[*p++]++; /* assume all entries <= BMAX */ + } while (--i); + if (c[0] == n) /* null input--all zero length codes */ + { + *t = (inflate_huft *)Z_NULL; + *m = 0; + return Z_OK; + } + + + /* Find minimum and maximum length, bound *m by those */ + l = *m; + for (j = 1; j <= BMAX; j++) + if (c[j]) + break; + k = j; /* minimum code length */ + if ((uInt)l < j) + l = j; + for (i = BMAX; i; i--) + if (c[i]) + break; + g = i; /* maximum code length */ + if ((uInt)l > i) + l = i; + *m = l; + + + /* Adjust last length count to fill out codes, if needed */ + for (y = 1 << j; j < i; j++, y <<= 1) + if ((y -= c[j]) < 0) + return Z_DATA_ERROR; + if ((y -= c[i]) < 0) + return Z_DATA_ERROR; + c[i] += y; + + + /* Generate starting offsets into the value table for each length */ + x[1] = j = 0; + p = c + 1; xp = x + 2; + while (--i) { /* note that i == g from above */ + *xp++ = (j += *p++); + } + + + /* Make a table of values in order of bit lengths */ + p = b; i = 0; + do { + if ((j = *p++) != 0) + v[x[j]++] = i; + } while (++i < n); + + + /* Generate the Huffman codes and for each, make the table entries */ + x[0] = i = 0; /* first Huffman code is zero */ + p = v; /* grab values in bit order */ + h = -1; /* no tables yet--level -1 */ + w = -l; /* bits decoded == (l * h) */ + u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */ + q = (inflate_huft *)Z_NULL; /* ditto */ + z = 0; /* ditto */ + + /* go through the bit lengths (k already is bits in shortest code) */ + for (; k <= g; k++) + { + a = c[k]; + while (a--) + { + /* here i is the Huffman code of length k bits for value *p */ + /* make tables up to required level */ + while (k > w + l) + { + h++; + w += l; /* previous table always l bits */ + + /* compute minimum size table less than or equal to l bits */ + z = (z = g - w) > (uInt)l ? l : z; /* table size upper limit */ + if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ + { /* too few codes for k-w bit table */ + f -= a + 1; /* deduct codes from patterns left */ + xp = c + k; + if (j < z) + while (++j < z) /* try smaller tables up to z bits */ + { + if ((f <<= 1) <= *++xp) + break; /* enough codes to use up j bits */ + f -= *xp; /* else deduct codes from patterns */ + } + } + z = 1 << j; /* table entries for j-bit table */ + + /* allocate and link in new table */ + if ((q = (inflate_huft *)ZALLOC + (zs,z + 1,sizeof(inflate_huft))) == Z_NULL) + { + if (h) + inflate_trees_free(u[0], zs); + return Z_MEM_ERROR; /* not enough memory */ + } + q->word.Nalloc = z + 1; +#ifdef DEBUG_ZLIB + inflate_hufts += z + 1; +#endif + *t = q + 1; /* link to list for huft_free() */ + *(t = &(q->next)) = Z_NULL; + u[h] = ++q; /* table starts after link */ + + /* connect to last table, if there is one */ + if (h) + { + x[h] = i; /* save pattern for backing up */ + r.bits = (Byte)l; /* bits to dump before this table */ + r.exop = (Byte)j; /* bits in this table */ + r.next = q; /* pointer to this table */ + j = i >> (w - l); /* (get around Turbo C bug) */ + u[h-1][j] = r; /* connect to last table */ + } + } + + /* set up table entry in r */ + r.bits = (Byte)(k - w); + if (p >= v + n) + r.exop = 128 + 64; /* out of values--invalid code */ + else if (*p < s) + { + r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */ + r.base = *p++; /* simple code is just the value */ + } + else + { + r.exop = (Byte)e[*p - s] + 16 + 64; /* non-simple--look up in lists */ + r.base = d[*p++ - s]; + } + + /* fill code-like entries with r */ + f = 1 << (k - w); + for (j = i >> w; j < z; j += f) + q[j] = r; + + /* backwards increment the k-bit code i */ + for (j = 1 << (k - 1); i & j; j >>= 1) + i ^= j; + i ^= j; + + /* backup over finished tables */ + while ((i & ((1 << w) - 1)) != x[h]) + { + h--; /* don't need to update q */ + w -= l; + } + } + } + + + /* Return Z_BUF_ERROR if we were given an incomplete table */ + return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK; +} + + +local int inflate_trees_bits( + uIntf *c, /* 19 code lengths */ + uIntf *bb, /* bits tree desired/actual depth */ + inflate_huft * FAR *tb, /* bits tree result */ + z_stream *z /* for zfree function */ +) +{ + int r; + + r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL, tb, bb, z); + if (r == Z_DATA_ERROR) + z->msg = "oversubscribed dynamic bit lengths tree"; + else if (r == Z_BUF_ERROR) + { + inflate_trees_free(*tb, z); + z->msg = "incomplete dynamic bit lengths tree"; + r = Z_DATA_ERROR; + } + return r; +} + + +local int inflate_trees_dynamic( + uInt nl, /* number of literal/length codes */ + uInt nd, /* number of distance codes */ + uIntf *c, /* that many (total) code lengths */ + uIntf *bl, /* literal desired/actual bit depth */ + uIntf *bd, /* distance desired/actual bit depth */ + inflate_huft * FAR *tl, /* literal/length tree result */ + inflate_huft * FAR *td, /* distance tree result */ + z_stream *z /* for zfree function */ +) +{ + int r; + + /* build literal/length tree */ + if ((r = huft_build(c, nl, 257, cplens, cplext, tl, bl, z)) != Z_OK) + { + if (r == Z_DATA_ERROR) + z->msg = "oversubscribed literal/length tree"; + else if (r == Z_BUF_ERROR) + { + inflate_trees_free(*tl, z); + z->msg = "incomplete literal/length tree"; + r = Z_DATA_ERROR; + } + return r; + } + + /* build distance tree */ + if ((r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, z)) != Z_OK) + { + if (r == Z_DATA_ERROR) + z->msg = "oversubscribed literal/length tree"; + else if (r == Z_BUF_ERROR) { +#ifdef PKZIP_BUG_WORKAROUND + r = Z_OK; + } +#else + inflate_trees_free(*td, z); + z->msg = "incomplete literal/length tree"; + r = Z_DATA_ERROR; + } + inflate_trees_free(*tl, z); + return r; +#endif + } + + /* done */ + return Z_OK; +} + + +/* build fixed tables only once--keep them here */ +local int fixed_lock = 0; +local int fixed_built = 0; +#define FIXEDH 530 /* number of hufts used by fixed tables */ +local uInt fixed_left = FIXEDH; +local inflate_huft fixed_mem[FIXEDH]; +local uInt fixed_bl; +local uInt fixed_bd; +local inflate_huft *fixed_tl; +local inflate_huft *fixed_td; + + +local voidpf falloc( + voidpf q, /* opaque pointer (not used) */ + uInt n, /* number of items */ + uInt s /* size of item */ +) +{ + Assert(s == sizeof(inflate_huft) && n <= fixed_left, + "inflate_trees falloc overflow"); + if (q) s++; /* to make some compilers happy */ + fixed_left -= n; + return (voidpf)(fixed_mem + fixed_left); +} + + +local void ffree( + voidpf q, + voidpf p, + uInt n +) +{ + Assert(0, "inflate_trees ffree called!"); + if (q) q = p; /* to make some compilers happy */ +} + + +local int inflate_trees_fixed( + uIntf *bl, /* literal desired/actual bit depth */ + uIntf *bd, /* distance desired/actual bit depth */ + inflate_huft * FAR *tl, /* literal/length tree result */ + inflate_huft * FAR *td /* distance tree result */ +) +{ + /* build fixed tables if not built already--lock out other instances */ + while (++fixed_lock > 1) + fixed_lock--; + if (!fixed_built) + { + int k; /* temporary variable */ + unsigned c[288]; /* length list for huft_build */ + z_stream z; /* for falloc function */ + + /* set up fake z_stream for memory routines */ + z.zalloc = falloc; + z.zfree = ffree; + z.opaque = Z_NULL; + + /* literal table */ + for (k = 0; k < 144; k++) + c[k] = 8; + for (; k < 256; k++) + c[k] = 9; + for (; k < 280; k++) + c[k] = 7; + for (; k < 288; k++) + c[k] = 8; + fixed_bl = 7; + huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl, &z); + + /* distance table */ + for (k = 0; k < 30; k++) + c[k] = 5; + fixed_bd = 5; + huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd, &z); + + /* done */ + fixed_built = 1; + } + fixed_lock--; + *bl = fixed_bl; + *bd = fixed_bd; + *tl = fixed_tl; + *td = fixed_td; + return Z_OK; +} + + +local int inflate_trees_free( + inflate_huft *t, /* table to free */ + z_stream *z /* for zfree function */ +) +/* Free the malloc'ed tables built by huft_build(), which makes a linked + list of the tables it made, with the links in a dummy first entry of + each table. */ +{ + register inflate_huft *p, *q; + + /* Go through linked list, freeing from the malloced (t[-1]) address. */ + p = t; + while (p != Z_NULL) + { + q = (--p)->next; + ZFREE(z, p, p->word.Nalloc * sizeof(inflate_huft)); + p = q; + } + return Z_OK; +} + +/*+++++*/ +/* infcodes.c -- process literals and length/distance pairs + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* simplify the use of the inflate_huft type with some defines */ +#define base more.Base +#define next more.Next +#define exop word.what.Exop +#define bits word.what.Bits + +/* inflate codes private state */ +struct inflate_codes_state { + + /* mode */ + enum { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ + START, /* x: set up for LEN */ + LEN, /* i: get length/literal/eob next */ + LENEXT, /* i: getting length extra (have base) */ + DIST, /* i: get distance next */ + DISTEXT, /* i: getting distance extra */ + COPY, /* o: copying bytes in window, waiting for space */ + LIT, /* o: got literal, waiting for output space */ + WASH, /* o: got eob, possibly still output waiting */ + END, /* x: got eob and all data flushed */ + BADCODE} /* x: got error */ + mode; /* current inflate_codes mode */ + + /* mode dependent information */ + uInt len; + union { + struct { + inflate_huft *tree; /* pointer into tree */ + uInt need; /* bits needed */ + } code; /* if LEN or DIST, where in tree */ + uInt lit; /* if LIT, literal */ + struct { + uInt get; /* bits to get for extra */ + uInt dist; /* distance back to copy from */ + } copy; /* if EXT or COPY, where and how much */ + } sub; /* submode */ + + /* mode independent information */ + Byte lbits; /* ltree bits decoded per branch */ + Byte dbits; /* dtree bits decoder per branch */ + inflate_huft *ltree; /* literal/length/eob tree */ + inflate_huft *dtree; /* distance tree */ + +}; + + +local inflate_codes_statef *inflate_codes_new( + uInt bl, + uInt bd, + inflate_huft *tl, + inflate_huft *td, + z_stream *z +) +{ + inflate_codes_statef *c; + + if ((c = (inflate_codes_statef *) + ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL) + { + c->mode = START; + c->lbits = (Byte)bl; + c->dbits = (Byte)bd; + c->ltree = tl; + c->dtree = td; + Tracev((stderr, "inflate: codes new\n")); + } + return c; +} + + +local int inflate_codes( + inflate_blocks_statef *s, + z_stream *z, + int r +) +{ + uInt j; /* temporary storage */ + inflate_huft *t; /* temporary pointer */ + uInt e; /* extra bits or operation */ + uLong b; /* bit buffer */ + uInt k; /* bits in bit buffer */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + Bytef *f; /* pointer to copy strings from */ + inflate_codes_statef *c = s->sub.decode.codes; /* codes state */ + + /* copy input/output information to locals (UPDATE macro restores) */ + LOAD + + /* process input and output based on current state */ + while (1) switch (c->mode) + { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ + case START: /* x: set up for LEN */ +#ifndef SLOW + if (m >= 258 && n >= 10) + { + UPDATE + r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z); + LOAD + if (r != Z_OK) + { + c->mode = r == Z_STREAM_END ? WASH : BADCODE; + break; + } + } +#endif /* !SLOW */ + c->sub.code.need = c->lbits; + c->sub.code.tree = c->ltree; + c->mode = LEN; + case LEN: /* i: get length/literal/eob next */ + j = c->sub.code.need; + NEEDBITS(j) + t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); + DUMPBITS(t->bits) + e = (uInt)(t->exop); + if (e == 0) /* literal */ + { + c->sub.lit = t->base; + Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", t->base)); + c->mode = LIT; + break; + } + if (e & 16) /* length */ + { + c->sub.copy.get = e & 15; + c->len = t->base; + c->mode = LENEXT; + break; + } + if ((e & 64) == 0) /* next table */ + { + c->sub.code.need = e; + c->sub.code.tree = t->next; + break; + } + if (e & 32) /* end of block */ + { + Tracevv((stderr, "inflate: end of block\n")); + c->mode = WASH; + break; + } + c->mode = BADCODE; /* invalid code */ + z->msg = "invalid literal/length code"; + r = Z_DATA_ERROR; + LEAVE + case LENEXT: /* i: getting length extra (have base) */ + j = c->sub.copy.get; + NEEDBITS(j) + c->len += (uInt)b & inflate_mask[j]; + DUMPBITS(j) + c->sub.code.need = c->dbits; + c->sub.code.tree = c->dtree; + Tracevv((stderr, "inflate: length %u\n", c->len)); + c->mode = DIST; + case DIST: /* i: get distance next */ + j = c->sub.code.need; + NEEDBITS(j) + t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); + DUMPBITS(t->bits) + e = (uInt)(t->exop); + if (e & 16) /* distance */ + { + c->sub.copy.get = e & 15; + c->sub.copy.dist = t->base; + c->mode = DISTEXT; + break; + } + if ((e & 64) == 0) /* next table */ + { + c->sub.code.need = e; + c->sub.code.tree = t->next; + break; + } + c->mode = BADCODE; /* invalid code */ + z->msg = "invalid distance code"; + r = Z_DATA_ERROR; + LEAVE + case DISTEXT: /* i: getting distance extra */ + j = c->sub.copy.get; + NEEDBITS(j) + c->sub.copy.dist += (uInt)b & inflate_mask[j]; + DUMPBITS(j) + Tracevv((stderr, "inflate: distance %u\n", c->sub.copy.dist)); + c->mode = COPY; + case COPY: /* o: copying bytes in window, waiting for space */ +#ifndef __TURBOC__ /* Turbo C bug for following expression */ + f = (uInt)(q - s->window) < c->sub.copy.dist ? + s->end - (c->sub.copy.dist - (q - s->window)) : + q - c->sub.copy.dist; +#else + f = q - c->sub.copy.dist; + if ((uInt)(q - s->window) < c->sub.copy.dist) + f = s->end - (c->sub.copy.dist - (q - s->window)); +#endif + while (c->len) + { + NEEDOUT + OUTBYTE(*f++) + if (f == s->end) + f = s->window; + c->len--; + } + c->mode = START; + break; + case LIT: /* o: got literal, waiting for output space */ + NEEDOUT + OUTBYTE(c->sub.lit) + c->mode = START; + break; + case WASH: /* o: got eob, possibly more output */ + FLUSH + if (s->read != s->write) + LEAVE + c->mode = END; + case END: + r = Z_STREAM_END; + LEAVE + case BADCODE: /* x: got error */ + r = Z_DATA_ERROR; + LEAVE + default: + r = Z_STREAM_ERROR; + LEAVE + } +} + + +local void inflate_codes_free( + inflate_codes_statef *c, + z_stream *z +) +{ + ZFREE(z, c, sizeof(struct inflate_codes_state)); + Tracev((stderr, "inflate: codes free\n")); +} + +/*+++++*/ +/* inflate_util.c -- data and routines common to blocks and codes + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* copy as much as possible from the sliding window to the output area */ +local int inflate_flush( + inflate_blocks_statef *s, + z_stream *z, + int r +) +{ + uInt n; + Bytef *p, *q; + + /* local copies of source and destination pointers */ + p = z->next_out; + q = s->read; + + /* compute number of bytes to copy as far as end of window */ + n = (uInt)((q <= s->write ? s->write : s->end) - q); + if (n > z->avail_out) n = z->avail_out; + if (n && r == Z_BUF_ERROR) r = Z_OK; + + /* update counters */ + z->avail_out -= n; + z->total_out += n; + + /* update check information */ + if (s->checkfn != Z_NULL) + s->check = (*s->checkfn)(s->check, q, n); + + /* copy as far as end of window */ + zmemcpy(p, q, n); + p += n; + q += n; + + /* see if more to copy at beginning of window */ + if (q == s->end) + { + /* wrap pointers */ + q = s->window; + if (s->write == s->end) + s->write = s->window; + + /* compute bytes to copy */ + n = (uInt)(s->write - q); + if (n > z->avail_out) n = z->avail_out; + if (n && r == Z_BUF_ERROR) r = Z_OK; + + /* update counters */ + z->avail_out -= n; + z->total_out += n; + + /* update check information */ + if (s->checkfn != Z_NULL) + s->check = (*s->checkfn)(s->check, q, n); + + /* copy */ + zmemcpy(p, q, n); + p += n; + q += n; + } + + /* update pointers */ + z->next_out = p; + s->read = q; + + /* done */ + return r; +} + + +/*+++++*/ +/* inffast.c -- process literals and length/distance pairs fast + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* simplify the use of the inflate_huft type with some defines */ +#define base more.Base +#define next more.Next +#define exop word.what.Exop +#define bits word.what.Bits + +/* macros for bit input with no checking and for returning unused bytes */ +#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<>3);p-=c;k&=7;} + +/* Called with number of bytes left to write in window at least 258 + (the maximum string length) and number of input bytes available + at least ten. The ten bytes are six bytes for the longest length/ + distance pair plus four bytes for overloading the bit buffer. */ + +local int inflate_fast( + uInt bl, + uInt bd, + inflate_huft *tl, + inflate_huft *td, + inflate_blocks_statef *s, + z_stream *z +) +{ + inflate_huft *t; /* temporary pointer */ + uInt e; /* extra bits or operation */ + uLong b; /* bit buffer */ + uInt k; /* bits in bit buffer */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + uInt ml; /* mask for literal/length tree */ + uInt md; /* mask for distance tree */ + uInt c; /* bytes to copy */ + uInt d; /* distance back to copy from */ + Bytef *r; /* copy source pointer */ + + /* load input, output, bit values */ + LOAD + + /* initialize masks */ + ml = inflate_mask[bl]; + md = inflate_mask[bd]; + + /* do until not enough input or output space for fast loop */ + do { /* assume called with m >= 258 && n >= 10 */ + /* get literal/length code */ + GRABBITS(20) /* max bits for literal/length code */ + if ((e = (t = tl + ((uInt)b & ml))->exop) == 0) + { + DUMPBITS(t->bits) + Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? + "inflate: * literal '%c'\n" : + "inflate: * literal 0x%02x\n", t->base)); + *q++ = (Byte)t->base; + m--; + continue; + } + do { + DUMPBITS(t->bits) + if (e & 16) + { + /* get extra bits for length */ + e &= 15; + c = t->base + ((uInt)b & inflate_mask[e]); + DUMPBITS(e) + Tracevv((stderr, "inflate: * length %u\n", c)); + + /* decode distance base of block to copy */ + GRABBITS(15); /* max bits for distance code */ + e = (t = td + ((uInt)b & md))->exop; + do { + DUMPBITS(t->bits) + if (e & 16) + { + /* get extra bits to add to distance base */ + e &= 15; + GRABBITS(e) /* get extra bits (up to 13) */ + d = t->base + ((uInt)b & inflate_mask[e]); + DUMPBITS(e) + Tracevv((stderr, "inflate: * distance %u\n", d)); + + /* do the copy */ + m -= c; + if ((uInt)(q - s->window) >= d) /* offset before dest */ + { /* just copy */ + r = q - d; + *q++ = *r++; c--; /* minimum count is three, */ + *q++ = *r++; c--; /* so unroll loop a little */ + } + else /* else offset after destination */ + { + e = d - (q - s->window); /* bytes from offset to end */ + r = s->end - e; /* pointer to offset */ + if (c > e) /* if source crosses, */ + { + c -= e; /* copy to end of window */ + do { + *q++ = *r++; + } while (--e); + r = s->window; /* copy rest from start of window */ + } + } + do { /* copy all or what's left */ + *q++ = *r++; + } while (--c); + break; + } + else if ((e & 64) == 0) + e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop; + else + { + z->msg = "invalid distance code"; + UNGRAB + UPDATE + return Z_DATA_ERROR; + } + } while (1); + break; + } + if ((e & 64) == 0) + { + if ((e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop) == 0) + { + DUMPBITS(t->bits) + Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? + "inflate: * literal '%c'\n" : + "inflate: * literal 0x%02x\n", t->base)); + *q++ = (Byte)t->base; + m--; + break; + } + } + else if (e & 32) + { + Tracevv((stderr, "inflate: * end of block\n")); + UNGRAB + UPDATE + return Z_STREAM_END; + } + else + { + z->msg = "invalid literal/length code"; + UNGRAB + UPDATE + return Z_DATA_ERROR; + } + } while (1); + } while (m >= 258 && n >= 10); + + /* not enough input or output--restore pointers and return */ + UNGRAB + UPDATE + return Z_OK; +} + + +/*+++++*/ +/* zutil.c -- target dependent utility functions for the compression library + * Copyright (C) 1995 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* From: zutil.c,v 1.8 1995/05/03 17:27:12 jloup Exp */ + +char *zlib_version = ZLIB_VERSION; + +char *z_errmsg[] = { +"stream end", /* Z_STREAM_END 1 */ +"", /* Z_OK 0 */ +"file error", /* Z_ERRNO (-1) */ +"stream error", /* Z_STREAM_ERROR (-2) */ +"data error", /* Z_DATA_ERROR (-3) */ +"insufficient memory", /* Z_MEM_ERROR (-4) */ +"buffer error", /* Z_BUF_ERROR (-5) */ +""}; + + +/*+++++*/ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* From: adler32.c,v 1.6 1995/05/03 17:27:08 jloup Exp */ + +#define BASE 65521L /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf) {s1 += *buf++; s2 += s1;} +#define DO2(buf) DO1(buf); DO1(buf); +#define DO4(buf) DO2(buf); DO2(buf); +#define DO8(buf) DO4(buf); DO4(buf); +#define DO16(buf) DO8(buf); DO8(buf); + +/* ========================================================================= */ +uLong adler32( + uLong adler, + Bytef *buf, + uInt len +) +{ + unsigned long s1 = adler & 0xffff; + unsigned long s2 = (adler >> 16) & 0xffff; + int k; + + if (buf == Z_NULL) return 1L; + + while (len > 0) { + k = len < NMAX ? len : NMAX; + len -= k; + while (k >= 16) { + DO16(buf); + k -= 16; + } + if (k != 0) do { + DO1(buf); + } while (--k); + s1 %= BASE; + s2 %= BASE; + } + return (s2 << 16) | s1; +} diff --git a/arch/ppc64/boot/zlib.h b/arch/ppc64/boot/zlib.h new file mode 100644 index 00000000000..f0b996c6864 --- /dev/null +++ b/arch/ppc64/boot/zlib.h @@ -0,0 +1,432 @@ +/* */ + +/* + * This file is derived from zlib.h and zconf.h from the zlib-0.95 + * distribution by Jean-loup Gailly and Mark Adler, with some additions + * by Paul Mackerras to aid in implementing Deflate compression and + * decompression for PPP packets. + */ + +/* + * ==FILEVERSION 960122== + * + * This marker is used by the Linux installation script to determine + * whether an up-to-date version of this file is already installed. + */ + +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 0.95, Aug 16th, 1995. + + Copyright (C) 1995 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + gzip@prep.ai.mit.edu madler@alumni.caltech.edu + */ + +#ifndef _ZLIB_H +#define _ZLIB_H + +/* #include "zconf.h" */ /* included directly here */ + +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* From: zconf.h,v 1.12 1995/05/03 17:27:12 jloup Exp */ + +/* + The library does not install any signal handler. It is recommended to + add at least a handler for SIGSEGV when decompressing; the library checks + the consistency of the input data whenever possible but may go nuts + for some forms of corrupted input. + */ + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + * Compile with -DUNALIGNED_OK if it is OK to access shorts or ints + * at addresses which are not a multiple of their size. + * Under DOS, -DFAR=far or -DFAR=__far may be needed. + */ + +#ifndef STDC +# if defined(MSDOS) || defined(__STDC__) || defined(__cplusplus) +# define STDC +# endif +#endif + +#ifdef __MWERKS__ /* Metrowerks CodeWarrior declares fileno() in unix.h */ +# include +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +#ifndef FAR +# define FAR +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2 */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + 1 << (windowBits+2) + 1 << (memLevel+9) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus a few kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +typedef unsigned char Byte; /* 8 bits */ +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +typedef Byte FAR Bytef; +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +/* end of original zconf.h */ + +#define ZLIB_VERSION "0.95P" + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed + data. This version of the library supports only one compression method + (deflation) but other algorithms may be added later and will have the same + stream interface. + + For compression the application must provide the output buffer and + may optionally provide the input buffer for optimization. For decompression, + the application must provide the input buffer and may optionally provide + the output buffer for optimization. + + Compression can be done in a single step if the buffers are large + enough (for example if an input file is mmap'ed), or can be done by + repeated calls of the compression function. In the latter case, the + application must provide more input and/or consume the output + (providing more output space) before each call. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address, uInt nbytes)); + +struct internal_state; + +typedef struct z_stream_s { + Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total nb of input bytes read so far */ + + Bytef *next_out; /* next output byte should be put there */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total nb of bytes output so far */ + + char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidp opaque; /* private data object passed to zalloc and zfree */ + + Byte data_type; /* best guess about the data type: ascii or binary */ + +} z_stream; + +/* + The application must update next_in and avail_in when avail_in has + dropped to zero. It must update next_out and avail_out when avail_out + has dropped to zero. The application must initialize zalloc, zfree and + opaque before calling the init function. All other fields are set by the + compression library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this + if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, + pointers returned by zalloc for objects of exactly 65536 bytes *must* + have their offset normalized to zero. The default allocation function + provided by this library ensures this (see zutil.c). To reduce memory + requirements and avoid any allocation of 64K objects, at the expense of + compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or + progress reports. After compression, total_in holds the total size of + the uncompressed data and may be saved for use in the decompressor + (particularly if the decompressor wants to decompress everything in + a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_FULL_FLUSH 2 +#define Z_SYNC_FLUSH 3 /* experimental: partial_flush + byte align */ +#define Z_FINISH 4 +#define Z_PACKET_FLUSH 5 +/* See deflate() below for the usage of these constants */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +/* error codes for the compression/decompression functions */ + +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_DEFAULT_STRATEGY 0 + +#define Z_BINARY 0 +#define Z_ASCII 1 +#define Z_UNKNOWN 2 +/* Used to set the data_type field */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +extern char *zlib_version; +/* The application can compare zlib_version and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is + not compatible with the zlib.h header file used by the application. + */ + + /* basic functions */ + +extern int inflateInit OF((z_stream *strm)); +/* + Initializes the internal stream state for decompression. The fields + zalloc and zfree must be initialized before by the caller. If zalloc and + zfree are set to Z_NULL, inflateInit updates them to use default allocation + functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory. msg is set to null if there is no error message. + inflateInit does not perform any decompression: this will be done by + inflate(). +*/ + + +extern int inflate OF((z_stream *strm, int flush)); +/* + Performs one or both of the following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in is updated and processing + will resume at this point for the next call of inflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. inflate() always provides as much output as possible + (until there is no more input data or no more space in the output buffer). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming + more output, and updating the next_* and avail_* values accordingly. + The application can consume the uncompressed output when it wants, for + example when the output buffer is full (avail_out == 0), or after each + call of inflate(). + + If the parameter flush is set to Z_PARTIAL_FLUSH or Z_PACKET_FLUSH, + inflate flushes as much output as possible to the output buffer. The + flushing behavior of inflate is not specified for values of the flush + parameter other than Z_PARTIAL_FLUSH, Z_PACKET_FLUSH or Z_FINISH, but the + current implementation actually flushes as much output as possible + anyway. For Z_PACKET_FLUSH, inflate checks that once all the input data + has been consumed, it is expecting to see the length field of a stored + block; if not, it returns Z_DATA_ERROR. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step + (a single call of inflate), the parameter flush should be set to + Z_FINISH. In this case all pending input is processed and all pending + output is flushed; avail_out must be large enough to hold all the + uncompressed data. (The size of the uncompressed data may have been saved + by the compressor for this purpose.) The next operation on this stream must + be inflateEnd to deallocate the decompression state. The use of Z_FINISH + is never required, but can be used to inform inflate that a faster routine + may be used for the single inflate() call. + + inflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if the end of the + compressed data has been reached and all uncompressed output has been + produced, Z_DATA_ERROR if the input data was corrupted, Z_STREAM_ERROR if + the stream structure was inconsistent (for example if next_in or next_out + was NULL), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if no + progress is possible or if there was not enough room in the output buffer + when Z_FINISH is used. In the Z_DATA_ERROR case, the application may then + call inflateSync to look for a good compression block. */ + + +extern int inflateEnd OF((z_stream *strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any + pending output. + + inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state + was inconsistent. In the error case, msg may be set but then points to a + static string (which must not be deallocated). +*/ + + /* advanced functions */ + +extern int inflateInit2 OF((z_stream *strm, + int windowBits)); +/* + This is another version of inflateInit with more compression options. The + fields next_out, zalloc and zfree must be initialized before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library (the value 16 will be allowed soon). The + default value is 15 if inflateInit is used instead. If a compressed stream + with a larger window size is given as input, inflate() will return with + the error code Z_DATA_ERROR instead of trying to allocate a larger window. + + If next_out is not null, the library will use this buffer for the history + buffer; the buffer must either be large enough to hold the entire output + data, or have at least 1< +#include +#include +#include +#include + + +void HvCall_writeLogBuffer(const void *buffer, u64 len) +{ + struct HvLpBufferList hv_buf; + u64 left_this_page; + u64 cur = virt_to_abs(buffer); + + while (len) { + hv_buf.addr = cur; + left_this_page = ((cur & PAGE_MASK) + PAGE_SIZE) - cur; + if (left_this_page > len) + left_this_page = len; + hv_buf.len = left_this_page; + len -= left_this_page; + HvCall2(HvCallBaseWriteLogBuffer, + virt_to_abs(&hv_buf), + left_this_page); + cur = (cur & PAGE_MASK) + PAGE_SIZE; + } +} diff --git a/arch/ppc64/kernel/HvLpConfig.c b/arch/ppc64/kernel/HvLpConfig.c new file mode 100644 index 00000000000..cb1d6473203 --- /dev/null +++ b/arch/ppc64/kernel/HvLpConfig.c @@ -0,0 +1,27 @@ +/* + * HvLpConfig.c + * Copyright (C) 2001 Kyle A. Lucke, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include + +HvLpIndex HvLpConfig_getLpIndex_outline(void) +{ + return HvLpConfig_getLpIndex(); +} +EXPORT_SYMBOL(HvLpConfig_getLpIndex_outline); diff --git a/arch/ppc64/kernel/HvLpEvent.c b/arch/ppc64/kernel/HvLpEvent.c new file mode 100644 index 00000000000..9802beefa21 --- /dev/null +++ b/arch/ppc64/kernel/HvLpEvent.c @@ -0,0 +1,88 @@ +/* + * Copyright 2001 Mike Corrigan IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include + +/* Array of LpEvent handler functions */ +LpEventHandler lpEventHandler[HvLpEvent_Type_NumTypes]; +unsigned lpEventHandlerPaths[HvLpEvent_Type_NumTypes]; + +/* Register a handler for an LpEvent type */ + +int HvLpEvent_registerHandler( HvLpEvent_Type eventType, LpEventHandler handler ) +{ + int rc = 1; + if ( eventType < HvLpEvent_Type_NumTypes ) { + lpEventHandler[eventType] = handler; + rc = 0; + } + return rc; + +} + +int HvLpEvent_unregisterHandler( HvLpEvent_Type eventType ) +{ + int rc = 1; + + might_sleep(); + + if ( eventType < HvLpEvent_Type_NumTypes ) { + if ( !lpEventHandlerPaths[eventType] ) { + lpEventHandler[eventType] = NULL; + rc = 0; + + /* We now sleep until all other CPUs have scheduled. This ensures that + * the deletion is seen by all other CPUs, and that the deleted handler + * isn't still running on another CPU when we return. */ + synchronize_kernel(); + } + } + return rc; +} +EXPORT_SYMBOL(HvLpEvent_registerHandler); +EXPORT_SYMBOL(HvLpEvent_unregisterHandler); + +/* (lpIndex is the partition index of the target partition. + * needed only for VirtualIo, VirtualLan and SessionMgr. Zero + * indicates to use our partition index - for the other types) + */ +int HvLpEvent_openPath( HvLpEvent_Type eventType, HvLpIndex lpIndex ) +{ + int rc = 1; + if ( eventType < HvLpEvent_Type_NumTypes && + lpEventHandler[eventType] ) { + if ( lpIndex == 0 ) + lpIndex = itLpNaca.xLpIndex; + HvCallEvent_openLpEventPath( lpIndex, eventType ); + ++lpEventHandlerPaths[eventType]; + rc = 0; + } + return rc; +} + +int HvLpEvent_closePath( HvLpEvent_Type eventType, HvLpIndex lpIndex ) +{ + int rc = 1; + if ( eventType < HvLpEvent_Type_NumTypes && + lpEventHandler[eventType] && + lpEventHandlerPaths[eventType] ) { + if ( lpIndex == 0 ) + lpIndex = itLpNaca.xLpIndex; + HvCallEvent_closeLpEventPath( lpIndex, eventType ); + --lpEventHandlerPaths[eventType]; + rc = 0; + } + return rc; +} + diff --git a/arch/ppc64/kernel/ItLpQueue.c b/arch/ppc64/kernel/ItLpQueue.c new file mode 100644 index 00000000000..c923a815760 --- /dev/null +++ b/arch/ppc64/kernel/ItLpQueue.c @@ -0,0 +1,167 @@ +/* + * ItLpQueue.c + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static __inline__ int set_inUse( struct ItLpQueue * lpQueue ) +{ + int t; + u32 * inUseP = &(lpQueue->xInUseWord); + + __asm__ __volatile__("\n\ +1: lwarx %0,0,%2 \n\ + cmpwi 0,%0,0 \n\ + li %0,0 \n\ + bne- 2f \n\ + addi %0,%0,1 \n\ + stwcx. %0,0,%2 \n\ + bne- 1b \n\ +2: eieio" + : "=&r" (t), "=m" (lpQueue->xInUseWord) + : "r" (inUseP), "m" (lpQueue->xInUseWord) + : "cc"); + + return t; +} + +static __inline__ void clear_inUse( struct ItLpQueue * lpQueue ) +{ + lpQueue->xInUseWord = 0; +} + +/* Array of LpEvent handler functions */ +extern LpEventHandler lpEventHandler[HvLpEvent_Type_NumTypes]; +unsigned long ItLpQueueInProcess = 0; + +struct HvLpEvent * ItLpQueue_getNextLpEvent( struct ItLpQueue * lpQueue ) +{ + struct HvLpEvent * nextLpEvent = + (struct HvLpEvent *)lpQueue->xSlicCurEventPtr; + if ( nextLpEvent->xFlags.xValid ) { + /* rmb() needed only for weakly consistent machines (regatta) */ + rmb(); + /* Set pointer to next potential event */ + lpQueue->xSlicCurEventPtr += ((nextLpEvent->xSizeMinus1 + + LpEventAlign ) / + LpEventAlign ) * + LpEventAlign; + /* Wrap to beginning if no room at end */ + if (lpQueue->xSlicCurEventPtr > lpQueue->xSlicLastValidEventPtr) + lpQueue->xSlicCurEventPtr = lpQueue->xSlicEventStackPtr; + } + else + nextLpEvent = NULL; + + return nextLpEvent; +} + +int ItLpQueue_isLpIntPending( struct ItLpQueue * lpQueue ) +{ + int retval = 0; + struct HvLpEvent * nextLpEvent; + if ( lpQueue ) { + nextLpEvent = (struct HvLpEvent *)lpQueue->xSlicCurEventPtr; + retval = nextLpEvent->xFlags.xValid | lpQueue->xPlicOverflowIntPending; + } + return retval; +} + +void ItLpQueue_clearValid( struct HvLpEvent * event ) +{ + /* Clear the valid bit of the event + * Also clear bits within this event that might + * look like valid bits (on 64-byte boundaries) + */ + unsigned extra = (( event->xSizeMinus1 + LpEventAlign ) / + LpEventAlign ) - 1; + switch ( extra ) { + case 3: + ((struct HvLpEvent*)((char*)event+3*LpEventAlign))->xFlags.xValid=0; + case 2: + ((struct HvLpEvent*)((char*)event+2*LpEventAlign))->xFlags.xValid=0; + case 1: + ((struct HvLpEvent*)((char*)event+1*LpEventAlign))->xFlags.xValid=0; + case 0: + ; + } + mb(); + event->xFlags.xValid = 0; +} + +unsigned ItLpQueue_process( struct ItLpQueue * lpQueue, struct pt_regs *regs ) +{ + unsigned numIntsProcessed = 0; + struct HvLpEvent * nextLpEvent; + + /* If we have recursed, just return */ + if ( !set_inUse( lpQueue ) ) + return 0; + + if (ItLpQueueInProcess == 0) + ItLpQueueInProcess = 1; + else + BUG(); + + for (;;) { + nextLpEvent = ItLpQueue_getNextLpEvent( lpQueue ); + if ( nextLpEvent ) { + /* Count events to return to caller + * and count processed events in lpQueue + */ + ++numIntsProcessed; + lpQueue->xLpIntCount++; + /* Call appropriate handler here, passing + * a pointer to the LpEvent. The handler + * must make a copy of the LpEvent if it + * needs it in a bottom half. (perhaps for + * an ACK) + * + * Handlers are responsible for ACK processing + * + * The Hypervisor guarantees that LpEvents will + * only be delivered with types that we have + * registered for, so no type check is necessary + * here! + */ + if ( nextLpEvent->xType < HvLpEvent_Type_NumTypes ) + lpQueue->xLpIntCountByType[nextLpEvent->xType]++; + if ( nextLpEvent->xType < HvLpEvent_Type_NumTypes && + lpEventHandler[nextLpEvent->xType] ) + lpEventHandler[nextLpEvent->xType](nextLpEvent, regs); + else + printk(KERN_INFO "Unexpected Lp Event type=%d\n", nextLpEvent->xType ); + + ItLpQueue_clearValid( nextLpEvent ); + } else if ( lpQueue->xPlicOverflowIntPending ) + /* + * No more valid events. If overflow events are + * pending process them + */ + HvCallEvent_getOverflowLpEvents( lpQueue->xIndex); + else + break; + } + + ItLpQueueInProcess = 0; + mb(); + clear_inUse( lpQueue ); + + get_paca()->lpevent_count += numIntsProcessed; + + return numIntsProcessed; +} diff --git a/arch/ppc64/kernel/LparData.c b/arch/ppc64/kernel/LparData.c new file mode 100644 index 00000000000..badc5a44361 --- /dev/null +++ b/arch/ppc64/kernel/LparData.c @@ -0,0 +1,250 @@ +/* + * Copyright 2001 Mike Corrigan, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* The LpQueue is used to pass event data from the hypervisor to + * the partition. This is where I/O interrupt events are communicated. + */ + +/* May be filled in by the hypervisor so cannot end up in the BSS */ +struct ItLpQueue xItLpQueue __attribute__((__section__(".data"))); + + +/* The HvReleaseData is the root of the information shared between + * the hypervisor and Linux. + */ + +struct HvReleaseData hvReleaseData = { + .xDesc = 0xc8a5d9c4, /* "HvRD" ebcdic */ + .xSize = sizeof(struct HvReleaseData), + .xVpdAreasPtrOffset = offsetof(struct naca_struct, xItVpdAreas), + .xSlicNacaAddr = &naca, /* 64-bit Naca address */ + .xMsNucDataOffset = 0x4800, /* offset of LparMap within loadarea (see head.S) */ + .xTagsMode = 1, /* tags inactive */ + .xAddressSize = 0, /* 64 bit */ + .xNoSharedProcs = 0, /* shared processors */ + .xNoHMT = 0, /* HMT allowed */ + .xRsvd2 = 6, /* TEMP: This allows non-GA driver */ + .xVrmIndex = 4, /* We are v5r2m0 */ + .xMinSupportedPlicVrmIndex = 3, /* v5r1m0 */ + .xMinCompatablePlicVrmIndex = 3, /* v5r1m0 */ + .xVrmName = { 0xd3, 0x89, 0x95, 0xa4, /* "Linux 2.4.64" ebcdic */ + 0xa7, 0x40, 0xf2, 0x4b, + 0xf4, 0x4b, 0xf6, 0xf4 }, +}; + +extern void system_reset_iSeries(void); +extern void machine_check_iSeries(void); +extern void data_access_iSeries(void); +extern void instruction_access_iSeries(void); +extern void hardware_interrupt_iSeries(void); +extern void alignment_iSeries(void); +extern void program_check_iSeries(void); +extern void fp_unavailable_iSeries(void); +extern void decrementer_iSeries(void); +extern void trap_0a_iSeries(void); +extern void trap_0b_iSeries(void); +extern void system_call_iSeries(void); +extern void single_step_iSeries(void); +extern void trap_0e_iSeries(void); +extern void performance_monitor_iSeries(void); +extern void data_access_slb_iSeries(void); +extern void instruction_access_slb_iSeries(void); + +struct ItLpNaca itLpNaca = { + .xDesc = 0xd397d581, /* "LpNa" ebcdic */ + .xSize = 0x0400, /* size of ItLpNaca */ + .xIntHdlrOffset = 0x0300, /* offset to int array */ + .xMaxIntHdlrEntries = 19, /* # ents */ + .xPrimaryLpIndex = 0, /* Part # of primary */ + .xServiceLpIndex = 0, /* Part # of serv */ + .xLpIndex = 0, /* Part # of me */ + .xMaxLpQueues = 0, /* # of LP queues */ + .xLpQueueOffset = 0x100, /* offset of start of LP queues */ + .xPirEnvironMode = 0, /* Piranha stuff */ + .xPirConsoleMode = 0, + .xPirDasdMode = 0, + .xLparInstalled = 0, + .xSysPartitioned = 0, + .xHwSyncedTBs = 0, + .xIntProcUtilHmt = 0, + .xSpVpdFormat = 0, + .xIntProcRatio = 0, + .xPlicVrmIndex = 0, /* VRM index of PLIC */ + .xMinSupportedSlicVrmInd = 0, /* min supported SLIC */ + .xMinCompatableSlicVrmInd = 0, /* min compat SLIC */ + .xLoadAreaAddr = 0, /* 64-bit addr of load area */ + .xLoadAreaChunks = 0, /* chunks for load area */ + .xPaseSysCallCRMask = 0, /* PASE mask */ + .xSlicSegmentTablePtr = 0, /* seg table */ + .xOldLpQueue = { 0 }, /* Old LP Queue */ + .xInterruptHdlr = { + (u64)system_reset_iSeries, /* 0x100 System Reset */ + (u64)machine_check_iSeries, /* 0x200 Machine Check */ + (u64)data_access_iSeries, /* 0x300 Data Access */ + (u64)instruction_access_iSeries, /* 0x400 Instruction Access */ + (u64)hardware_interrupt_iSeries, /* 0x500 External */ + (u64)alignment_iSeries, /* 0x600 Alignment */ + (u64)program_check_iSeries, /* 0x700 Program Check */ + (u64)fp_unavailable_iSeries, /* 0x800 FP Unavailable */ + (u64)decrementer_iSeries, /* 0x900 Decrementer */ + (u64)trap_0a_iSeries, /* 0xa00 Trap 0A */ + (u64)trap_0b_iSeries, /* 0xb00 Trap 0B */ + (u64)system_call_iSeries, /* 0xc00 System Call */ + (u64)single_step_iSeries, /* 0xd00 Single Step */ + (u64)trap_0e_iSeries, /* 0xe00 Trap 0E */ + (u64)performance_monitor_iSeries,/* 0xf00 Performance Monitor */ + 0, /* int 0x1000 */ + 0, /* int 0x1010 */ + 0, /* int 0x1020 CPU ctls */ + (u64)hardware_interrupt_iSeries, /* SC Ret Hdlr */ + (u64)data_access_slb_iSeries, /* 0x380 D-SLB */ + (u64)instruction_access_slb_iSeries /* 0x480 I-SLB */ + } +}; +EXPORT_SYMBOL(itLpNaca); + +/* May be filled in by the hypervisor so cannot end up in the BSS */ +struct ItIplParmsReal xItIplParmsReal __attribute__((__section__(".data"))); + +/* May be filled in by the hypervisor so cannot end up in the BSS */ +struct ItExtVpdPanel xItExtVpdPanel __attribute__((__section__(".data"))); +EXPORT_SYMBOL(xItExtVpdPanel); + +#define maxPhysicalProcessors 32 + +struct IoHriProcessorVpd xIoHriProcessorVpd[maxPhysicalProcessors] = { + { + .xInstCacheOperandSize = 32, + .xDataCacheOperandSize = 32, + .xProcFreq = 50000000, + .xTimeBaseFreq = 50000000, + .xPVR = 0x3600 + } +}; + +/* Space for Main Store Vpd 27,200 bytes */ +/* May be filled in by the hypervisor so cannot end up in the BSS */ +u64 xMsVpd[3400] __attribute__((__section__(".data"))); + +/* Space for Recovery Log Buffer */ +/* May be filled in by the hypervisor so cannot end up in the BSS */ +u64 xRecoveryLogBuffer[32] __attribute__((__section__(".data"))); + +struct SpCommArea xSpCommArea = { + .xDesc = 0xE2D7C3C2, + .xFormat = 1, +}; + +/* The LparMap data is now located at offset 0x6000 in head.S + * It was put there so that the HvReleaseData could address it + * with a 32-bit offset as required by the iSeries hypervisor + * + * The Naca has a pointer to the ItVpdAreas. The hypervisor finds + * the Naca via the HvReleaseData area. The HvReleaseData has the + * offset into the Naca of the pointer to the ItVpdAreas. + */ +struct ItVpdAreas itVpdAreas = { + .xSlicDesc = 0xc9a3e5c1, /* "ItVA" */ + .xSlicSize = sizeof(struct ItVpdAreas), + .xSlicVpdEntries = ItVpdMaxEntries, /* # VPD array entries */ + .xSlicDmaEntries = ItDmaMaxEntries, /* # DMA array entries */ + .xSlicMaxLogicalProcs = NR_CPUS * 2, /* Max logical procs */ + .xSlicMaxPhysicalProcs = maxPhysicalProcessors, /* Max physical procs */ + .xSlicDmaToksOffset = offsetof(struct ItVpdAreas, xPlicDmaToks), + .xSlicVpdAdrsOffset = offsetof(struct ItVpdAreas, xSlicVpdAdrs), + .xSlicDmaLensOffset = offsetof(struct ItVpdAreas, xPlicDmaLens), + .xSlicVpdLensOffset = offsetof(struct ItVpdAreas, xSlicVpdLens), + .xSlicMaxSlotLabels = 0, /* max slot labels */ + .xSlicMaxLpQueues = 1, /* max LP queues */ + .xPlicDmaLens = { 0 }, /* DMA lengths */ + .xPlicDmaToks = { 0 }, /* DMA tokens */ + .xSlicVpdLens = { /* VPD lengths */ + 0,0,0, /* 0 - 2 */ + sizeof(xItExtVpdPanel), /* 3 Extended VPD */ + sizeof(struct paca_struct), /* 4 length of Paca */ + 0, /* 5 */ + sizeof(struct ItIplParmsReal),/* 6 length of IPL parms */ + 26992, /* 7 length of MS VPD */ + 0, /* 8 */ + sizeof(struct ItLpNaca),/* 9 length of LP Naca */ + 0, /* 10 */ + 256, /* 11 length of Recovery Log Buf */ + sizeof(struct SpCommArea), /* 12 length of SP Comm Area */ + 0,0,0, /* 13 - 15 */ + sizeof(struct IoHriProcessorVpd),/* 16 length of Proc Vpd */ + 0,0,0,0,0,0, /* 17 - 22 */ + sizeof(struct ItLpQueue),/* 23 length of Lp Queue */ + 0,0 /* 24 - 25 */ + }, + .xSlicVpdAdrs = { /* VPD addresses */ + 0,0,0, /* 0 - 2 */ + &xItExtVpdPanel, /* 3 Extended VPD */ + &paca[0], /* 4 first Paca */ + 0, /* 5 */ + &xItIplParmsReal, /* 6 IPL parms */ + &xMsVpd, /* 7 MS Vpd */ + 0, /* 8 */ + &itLpNaca, /* 9 LpNaca */ + 0, /* 10 */ + &xRecoveryLogBuffer, /* 11 Recovery Log Buffer */ + &xSpCommArea, /* 12 SP Comm Area */ + 0,0,0, /* 13 - 15 */ + &xIoHriProcessorVpd, /* 16 Proc Vpd */ + 0,0,0,0,0,0, /* 17 - 22 */ + &xItLpQueue, /* 23 Lp Queue */ + 0,0 + } +}; + +struct msChunks msChunks; +EXPORT_SYMBOL(msChunks); + +/* Depending on whether this is called from iSeries or pSeries setup + * code, the location of the msChunks struct may or may not have + * to be reloc'd, so we force the caller to do that for us by passing + * in a pointer to the structure. + */ +unsigned long +msChunks_alloc(unsigned long mem, unsigned long num_chunks, unsigned long chunk_size) +{ + unsigned long offset = reloc_offset(); + struct msChunks *_msChunks = PTRRELOC(&msChunks); + + _msChunks->num_chunks = num_chunks; + _msChunks->chunk_size = chunk_size; + _msChunks->chunk_shift = __ilog2(chunk_size); + _msChunks->chunk_mask = (1UL<<_msChunks->chunk_shift)-1; + + mem = _ALIGN(mem, sizeof(msChunks_entry)); + _msChunks->abs = (msChunks_entry *)(mem + offset); + mem += num_chunks * sizeof(msChunks_entry); + + return mem; +} diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile new file mode 100644 index 00000000000..96d90b0c511 --- /dev/null +++ b/arch/ppc64/kernel/Makefile @@ -0,0 +1,68 @@ +# +# Makefile for the linux ppc64 kernel. +# + +EXTRA_CFLAGS += -mno-minimal-toc +extra-y := head.o vmlinux.lds + +obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ + time.o process.o signal.o syscalls.o misc.o ptrace.o \ + align.o semaphore.o bitops.o pacaData.o \ + udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ + ptrace32.o signal32.o rtc.o init_task.o \ + lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ + iommu.o sysfs.o vdso.o pmc.o +obj-y += vdso32/ vdso64/ + +obj-$(CONFIG_PPC_OF) += of_device.o + +pci-obj-$(CONFIG_PPC_ISERIES) += iSeries_pci.o iSeries_pci_reset.o +pci-obj-$(CONFIG_PPC_MULTIPLATFORM) += pci_dn.o pci_direct_iommu.o + +obj-$(CONFIG_PCI) += pci.o pci_iommu.o iomap.o $(pci-obj-y) + +obj-$(CONFIG_PPC_ISERIES) += iSeries_irq.o \ + iSeries_VpdInfo.o XmPciLpEvent.o \ + HvCall.o HvLpConfig.o LparData.o \ + iSeries_setup.o ItLpQueue.o hvCall.o \ + mf.o HvLpEvent.o iSeries_proc.o iSeries_htab.o \ + iSeries_iommu.o + +obj-$(CONFIG_PPC_MULTIPLATFORM) += nvram.o i8259.o prom_init.o prom.o mpic.o + +obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \ + pSeries_nvram.o rtasd.o ras.o pSeries_reconfig.o \ + xics.o rtas.o pSeries_setup.o pSeries_iommu.o + +obj-$(CONFIG_EEH) += eeh.o +obj-$(CONFIG_PROC_FS) += proc_ppc64.o +obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_MODULES) += module.o ppc_ksyms.o +obj-$(CONFIG_RTAS_PROC) += rtas-proc.o +obj-$(CONFIG_SCANLOG) += scanlog.o +obj-$(CONFIG_VIOPATH) += viopath.o +obj-$(CONFIG_LPARCFG) += lparcfg.o +obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o +obj-$(CONFIG_BOOTX_TEXT) += btext.o +obj-$(CONFIG_HVCS) += hvcserver.o +obj-$(CONFIG_IBMVIO) += vio.o + +obj-$(CONFIG_PPC_PMAC) += pmac_setup.o pmac_feature.o pmac_pci.o \ + pmac_time.o pmac_nvram.o pmac_low_i2c.o + +obj-$(CONFIG_PPC_MAPLE) += maple_setup.o maple_pci.o maple_time.o + +obj-$(CONFIG_U3_DART) += u3_iommu.o + +ifdef CONFIG_SMP +obj-$(CONFIG_PPC_PMAC) += pmac_smp.o smp-tbsync.o +obj-$(CONFIG_PPC_ISERIES) += iSeries_smp.o +obj-$(CONFIG_PPC_PSERIES) += pSeries_smp.o +obj-$(CONFIG_PPC_MAPLE) += smp-tbsync.o +endif + +obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o +obj-$(CONFIG_KPROBES) += kprobes.o + +CFLAGS_ioctl32.o += -Ifs/ diff --git a/arch/ppc64/kernel/XmPciLpEvent.c b/arch/ppc64/kernel/XmPciLpEvent.c new file mode 100644 index 00000000000..809c9bc6678 --- /dev/null +++ b/arch/ppc64/kernel/XmPciLpEvent.c @@ -0,0 +1,190 @@ +/* + * File XmPciLpEvent.h created by Wayne Holm on Mon Jan 15 2001. + * + * This module handles PCI interrupt events sent by the iSeries Hypervisor. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static long Pci_Interrupt_Count; +static long Pci_Event_Count; + +enum XmPciLpEvent_Subtype { + XmPciLpEvent_BusCreated = 0, // PHB has been created + XmPciLpEvent_BusError = 1, // PHB has failed + XmPciLpEvent_BusFailed = 2, // Msg to Secondary, Primary failed bus + XmPciLpEvent_NodeFailed = 4, // Multi-adapter bridge has failed + XmPciLpEvent_NodeRecovered = 5, // Multi-adapter bridge has recovered + XmPciLpEvent_BusRecovered = 12, // PHB has been recovered + XmPciLpEvent_UnQuiesceBus = 18, // Secondary bus unqiescing + XmPciLpEvent_BridgeError = 21, // Bridge Error + XmPciLpEvent_SlotInterrupt = 22 // Slot interrupt +}; + +struct XmPciLpEvent_BusInterrupt { + HvBusNumber busNumber; + HvSubBusNumber subBusNumber; +}; + +struct XmPciLpEvent_NodeInterrupt { + HvBusNumber busNumber; + HvSubBusNumber subBusNumber; + HvAgentId deviceId; +}; + +struct XmPciLpEvent { + struct HvLpEvent hvLpEvent; + + union { + u64 alignData; // Align on an 8-byte boundary + + struct { + u32 fisr; + HvBusNumber busNumber; + HvSubBusNumber subBusNumber; + HvAgentId deviceId; + } slotInterrupt; + + struct XmPciLpEvent_BusInterrupt busFailed; + struct XmPciLpEvent_BusInterrupt busRecovered; + struct XmPciLpEvent_BusInterrupt busCreated; + + struct XmPciLpEvent_NodeInterrupt nodeFailed; + struct XmPciLpEvent_NodeInterrupt nodeRecovered; + + } eventData; + +}; + +static void intReceived(struct XmPciLpEvent *eventParm, + struct pt_regs *regsParm); + +static void XmPciLpEvent_handler(struct HvLpEvent *eventParm, + struct pt_regs *regsParm) +{ +#ifdef CONFIG_PCI +#if 0 + PPCDBG(PPCDBG_BUSWALK, "XmPciLpEvent_handler, type 0x%x\n", + eventParm->xType); +#endif + ++Pci_Event_Count; + + if (eventParm && (eventParm->xType == HvLpEvent_Type_PciIo)) { + switch (eventParm->xFlags.xFunction) { + case HvLpEvent_Function_Int: + intReceived((struct XmPciLpEvent *)eventParm, regsParm); + break; + case HvLpEvent_Function_Ack: + printk(KERN_ERR + "XmPciLpEvent.c: unexpected ack received\n"); + break; + default: + printk(KERN_ERR + "XmPciLpEvent.c: unexpected event function %d\n", + (int)eventParm->xFlags.xFunction); + break; + } + } else if (eventParm) + printk(KERN_ERR + "XmPciLpEvent.c: Unrecognized PCI event type 0x%x\n", + (int)eventParm->xType); + else + printk(KERN_ERR "XmPciLpEvent.c: NULL event received\n"); +#endif +} + +static void intReceived(struct XmPciLpEvent *eventParm, + struct pt_regs *regsParm) +{ + int irq; + + ++Pci_Interrupt_Count; +#if 0 + PPCDBG(PPCDBG_BUSWALK, "PCI: XmPciLpEvent.c: intReceived\n"); +#endif + + switch (eventParm->hvLpEvent.xSubtype) { + case XmPciLpEvent_SlotInterrupt: + irq = eventParm->hvLpEvent.xCorrelationToken; + /* Dispatch the interrupt handlers for this irq */ + ppc_irq_dispatch_handler(regsParm, irq); + HvCallPci_eoi(eventParm->eventData.slotInterrupt.busNumber, + eventParm->eventData.slotInterrupt.subBusNumber, + eventParm->eventData.slotInterrupt.deviceId); + break; + /* Ignore error recovery events for now */ + case XmPciLpEvent_BusCreated: + printk(KERN_INFO "XmPciLpEvent.c: system bus %d created\n", + eventParm->eventData.busCreated.busNumber); + break; + case XmPciLpEvent_BusError: + case XmPciLpEvent_BusFailed: + printk(KERN_INFO "XmPciLpEvent.c: system bus %d failed\n", + eventParm->eventData.busFailed.busNumber); + break; + case XmPciLpEvent_BusRecovered: + case XmPciLpEvent_UnQuiesceBus: + printk(KERN_INFO "XmPciLpEvent.c: system bus %d recovered\n", + eventParm->eventData.busRecovered.busNumber); + break; + case XmPciLpEvent_NodeFailed: + case XmPciLpEvent_BridgeError: + printk(KERN_INFO + "XmPciLpEvent.c: multi-adapter bridge %d/%d/%d failed\n", + eventParm->eventData.nodeFailed.busNumber, + eventParm->eventData.nodeFailed.subBusNumber, + eventParm->eventData.nodeFailed.deviceId); + break; + case XmPciLpEvent_NodeRecovered: + printk(KERN_INFO + "XmPciLpEvent.c: multi-adapter bridge %d/%d/%d recovered\n", + eventParm->eventData.nodeRecovered.busNumber, + eventParm->eventData.nodeRecovered.subBusNumber, + eventParm->eventData.nodeRecovered.deviceId); + break; + default: + printk(KERN_ERR + "XmPciLpEvent.c: unrecognized event subtype 0x%x\n", + eventParm->hvLpEvent.xSubtype); + break; + } +} + + +/* This should be called sometime prior to buswalk (init_IRQ would be good) */ +int XmPciLpEvent_init() +{ + int xRc; + + PPCDBG(PPCDBG_BUSWALK, + "XmPciLpEvent_init, Register Event type 0x%04X\n", + HvLpEvent_Type_PciIo); + + xRc = HvLpEvent_registerHandler(HvLpEvent_Type_PciIo, + &XmPciLpEvent_handler); + if (xRc == 0) { + xRc = HvLpEvent_openPath(HvLpEvent_Type_PciIo, 0); + if (xRc != 0) + printk(KERN_ERR + "XmPciLpEvent.c: open event path failed with rc 0x%x\n", + xRc); + } else + printk(KERN_ERR + "XmPciLpEvent.c: register handler failed with rc 0x%x\n", + xRc); + return xRc; +} diff --git a/arch/ppc64/kernel/align.c b/arch/ppc64/kernel/align.c new file mode 100644 index 00000000000..330e7ef8142 --- /dev/null +++ b/arch/ppc64/kernel/align.c @@ -0,0 +1,396 @@ +/* align.c - handle alignment exceptions for the Power PC. + * + * Copyright (c) 1996 Paul Mackerras + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson + * PowerPC 403GCX/405GP modifications. + * Copyright (c) 2001-2002 PPC64 team, IBM Corp + * 64-bit and Power4 support + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +struct aligninfo { + unsigned char len; + unsigned char flags; +}; + +#define IS_XFORM(inst) (((inst) >> 26) == 31) +#define IS_DSFORM(inst) (((inst) >> 26) >= 56) + +#define INVALID { 0, 0 } + +#define LD 1 /* load */ +#define ST 2 /* store */ +#define SE 4 /* sign-extend value */ +#define F 8 /* to/from fp regs */ +#define U 0x10 /* update index register */ +#define M 0x20 /* multiple load/store */ +#define SW 0x40 /* byte swap */ + +#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */ + +/* + * The PowerPC stores certain bits of the instruction that caused the + * alignment exception in the DSISR register. This array maps those + * bits to information about the operand length and what the + * instruction would do. + */ +static struct aligninfo aligninfo[128] = { + { 4, LD }, /* 00 0 0000: lwz / lwarx */ + INVALID, /* 00 0 0001 */ + { 4, ST }, /* 00 0 0010: stw */ + INVALID, /* 00 0 0011 */ + { 2, LD }, /* 00 0 0100: lhz */ + { 2, LD+SE }, /* 00 0 0101: lha */ + { 2, ST }, /* 00 0 0110: sth */ + { 4, LD+M }, /* 00 0 0111: lmw */ + { 4, LD+F }, /* 00 0 1000: lfs */ + { 8, LD+F }, /* 00 0 1001: lfd */ + { 4, ST+F }, /* 00 0 1010: stfs */ + { 8, ST+F }, /* 00 0 1011: stfd */ + INVALID, /* 00 0 1100 */ + { 8, LD }, /* 00 0 1101: ld */ + INVALID, /* 00 0 1110 */ + { 8, ST }, /* 00 0 1111: std */ + { 4, LD+U }, /* 00 1 0000: lwzu */ + INVALID, /* 00 1 0001 */ + { 4, ST+U }, /* 00 1 0010: stwu */ + INVALID, /* 00 1 0011 */ + { 2, LD+U }, /* 00 1 0100: lhzu */ + { 2, LD+SE+U }, /* 00 1 0101: lhau */ + { 2, ST+U }, /* 00 1 0110: sthu */ + { 4, ST+M }, /* 00 1 0111: stmw */ + { 4, LD+F+U }, /* 00 1 1000: lfsu */ + { 8, LD+F+U }, /* 00 1 1001: lfdu */ + { 4, ST+F+U }, /* 00 1 1010: stfsu */ + { 8, ST+F+U }, /* 00 1 1011: stfdu */ + INVALID, /* 00 1 1100 */ + INVALID, /* 00 1 1101 */ + INVALID, /* 00 1 1110 */ + INVALID, /* 00 1 1111 */ + { 8, LD }, /* 01 0 0000: ldx */ + INVALID, /* 01 0 0001 */ + { 8, ST }, /* 01 0 0010: stdx */ + INVALID, /* 01 0 0011 */ + INVALID, /* 01 0 0100 */ + { 4, LD+SE }, /* 01 0 0101: lwax */ + INVALID, /* 01 0 0110 */ + INVALID, /* 01 0 0111 */ + { 0, LD }, /* 01 0 1000: lswx */ + { 0, LD }, /* 01 0 1001: lswi */ + { 0, ST }, /* 01 0 1010: stswx */ + { 0, ST }, /* 01 0 1011: stswi */ + INVALID, /* 01 0 1100 */ + { 8, LD+U }, /* 01 0 1101: ldu */ + INVALID, /* 01 0 1110 */ + { 8, ST+U }, /* 01 0 1111: stdu */ + { 8, LD+U }, /* 01 1 0000: ldux */ + INVALID, /* 01 1 0001 */ + { 8, ST+U }, /* 01 1 0010: stdux */ + INVALID, /* 01 1 0011 */ + INVALID, /* 01 1 0100 */ + { 4, LD+SE+U }, /* 01 1 0101: lwaux */ + INVALID, /* 01 1 0110 */ + INVALID, /* 01 1 0111 */ + INVALID, /* 01 1 1000 */ + INVALID, /* 01 1 1001 */ + INVALID, /* 01 1 1010 */ + INVALID, /* 01 1 1011 */ + INVALID, /* 01 1 1100 */ + INVALID, /* 01 1 1101 */ + INVALID, /* 01 1 1110 */ + INVALID, /* 01 1 1111 */ + INVALID, /* 10 0 0000 */ + INVALID, /* 10 0 0001 */ + { 0, ST }, /* 10 0 0010: stwcx. */ + INVALID, /* 10 0 0011 */ + INVALID, /* 10 0 0100 */ + INVALID, /* 10 0 0101 */ + INVALID, /* 10 0 0110 */ + INVALID, /* 10 0 0111 */ + { 4, LD+SW }, /* 10 0 1000: lwbrx */ + INVALID, /* 10 0 1001 */ + { 4, ST+SW }, /* 10 0 1010: stwbrx */ + INVALID, /* 10 0 1011 */ + { 2, LD+SW }, /* 10 0 1100: lhbrx */ + { 4, LD+SE }, /* 10 0 1101 lwa */ + { 2, ST+SW }, /* 10 0 1110: sthbrx */ + INVALID, /* 10 0 1111 */ + INVALID, /* 10 1 0000 */ + INVALID, /* 10 1 0001 */ + INVALID, /* 10 1 0010 */ + INVALID, /* 10 1 0011 */ + INVALID, /* 10 1 0100 */ + INVALID, /* 10 1 0101 */ + INVALID, /* 10 1 0110 */ + INVALID, /* 10 1 0111 */ + INVALID, /* 10 1 1000 */ + INVALID, /* 10 1 1001 */ + INVALID, /* 10 1 1010 */ + INVALID, /* 10 1 1011 */ + INVALID, /* 10 1 1100 */ + INVALID, /* 10 1 1101 */ + INVALID, /* 10 1 1110 */ + { L1_CACHE_BYTES, ST }, /* 10 1 1111: dcbz */ + { 4, LD }, /* 11 0 0000: lwzx */ + INVALID, /* 11 0 0001 */ + { 4, ST }, /* 11 0 0010: stwx */ + INVALID, /* 11 0 0011 */ + { 2, LD }, /* 11 0 0100: lhzx */ + { 2, LD+SE }, /* 11 0 0101: lhax */ + { 2, ST }, /* 11 0 0110: sthx */ + INVALID, /* 11 0 0111 */ + { 4, LD+F }, /* 11 0 1000: lfsx */ + { 8, LD+F }, /* 11 0 1001: lfdx */ + { 4, ST+F }, /* 11 0 1010: stfsx */ + { 8, ST+F }, /* 11 0 1011: stfdx */ + INVALID, /* 11 0 1100 */ + { 8, LD+M }, /* 11 0 1101: lmd */ + INVALID, /* 11 0 1110 */ + { 8, ST+M }, /* 11 0 1111: stmd */ + { 4, LD+U }, /* 11 1 0000: lwzux */ + INVALID, /* 11 1 0001 */ + { 4, ST+U }, /* 11 1 0010: stwux */ + INVALID, /* 11 1 0011 */ + { 2, LD+U }, /* 11 1 0100: lhzux */ + { 2, LD+SE+U }, /* 11 1 0101: lhaux */ + { 2, ST+U }, /* 11 1 0110: sthux */ + INVALID, /* 11 1 0111 */ + { 4, LD+F+U }, /* 11 1 1000: lfsux */ + { 8, LD+F+U }, /* 11 1 1001: lfdux */ + { 4, ST+F+U }, /* 11 1 1010: stfsux */ + { 8, ST+F+U }, /* 11 1 1011: stfdux */ + INVALID, /* 11 1 1100 */ + INVALID, /* 11 1 1101 */ + INVALID, /* 11 1 1110 */ + INVALID, /* 11 1 1111 */ +}; + +#define SWAP(a, b) (t = (a), (a) = (b), (b) = t) + +static inline unsigned make_dsisr(unsigned instr) +{ + unsigned dsisr; + + /* create a DSISR value from the instruction */ + dsisr = (instr & 0x03ff0000) >> 16; /* bits 6:15 --> 22:31 */ + + if ( IS_XFORM(instr) ) { + dsisr |= (instr & 0x00000006) << 14; /* bits 29:30 --> 15:16 */ + dsisr |= (instr & 0x00000040) << 8; /* bit 25 --> 17 */ + dsisr |= (instr & 0x00000780) << 3; /* bits 21:24 --> 18:21 */ + } + else { + dsisr |= (instr & 0x04000000) >> 12; /* bit 5 --> 17 */ + dsisr |= (instr & 0x78000000) >> 17; /* bits 1: 4 --> 18:21 */ + if ( IS_DSFORM(instr) ) { + dsisr |= (instr & 0x00000003) << 18; /* bits 30:31 --> 12:13 */ + } + } + + return dsisr; +} + +int +fix_alignment(struct pt_regs *regs) +{ + unsigned int instr, nb, flags; + int t; + unsigned long reg, areg; + unsigned long i; + int ret; + unsigned dsisr; + unsigned char __user *addr; + unsigned char __user *p; + unsigned long __user *lp; + union { + long ll; + double dd; + unsigned char v[8]; + struct { + unsigned hi32; + int low32; + } x32; + struct { + unsigned char hi48[6]; + short low16; + } x16; + } data; + + /* + * Return 1 on success + * Return 0 if unable to handle the interrupt + * Return -EFAULT if data address is bad + */ + + dsisr = regs->dsisr; + + if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) { + unsigned int real_instr; + if (__get_user(real_instr, (unsigned int __user *)regs->nip)) + return 0; + dsisr = make_dsisr(real_instr); + } + + /* extract the operation and registers from the dsisr */ + reg = (dsisr >> 5) & 0x1f; /* source/dest register */ + areg = dsisr & 0x1f; /* register to update */ + instr = (dsisr >> 10) & 0x7f; + instr |= (dsisr >> 13) & 0x60; + + /* Lookup the operation in our table */ + nb = aligninfo[instr].len; + flags = aligninfo[instr].flags; + + /* DAR has the operand effective address */ + addr = (unsigned char __user *)regs->dar; + + /* A size of 0 indicates an instruction we don't support */ + /* we also don't support the multiples (lmw, stmw, lmd, stmd) */ + if ((nb == 0) || (flags & M)) + return 0; /* too hard or invalid instruction */ + + /* + * Special handling for dcbz + * dcbz may give an alignment exception for accesses to caching inhibited + * storage + */ + if (instr == DCBZ) + addr = (unsigned char __user *) ((unsigned long)addr & -L1_CACHE_BYTES); + + /* Verify the address of the operand */ + if (user_mode(regs)) { + if (!access_ok((flags & ST? VERIFY_WRITE: VERIFY_READ), addr, nb)) + return -EFAULT; /* bad address */ + } + + /* Force the fprs into the save area so we can reference them */ + if (flags & F) { + if (!user_mode(regs)) + return 0; + flush_fp_to_thread(current); + } + + /* If we are loading, get the data from user space */ + if (flags & LD) { + data.ll = 0; + ret = 0; + p = addr; + switch (nb) { + case 8: + ret |= __get_user(data.v[0], p++); + ret |= __get_user(data.v[1], p++); + ret |= __get_user(data.v[2], p++); + ret |= __get_user(data.v[3], p++); + case 4: + ret |= __get_user(data.v[4], p++); + ret |= __get_user(data.v[5], p++); + case 2: + ret |= __get_user(data.v[6], p++); + ret |= __get_user(data.v[7], p++); + if (ret) + return -EFAULT; + } + } + + /* If we are storing, get the data from the saved gpr or fpr */ + if (flags & ST) { + if (flags & F) { + if (nb == 4) { + /* Doing stfs, have to convert to single */ + preempt_disable(); + enable_kernel_fp(); + cvt_df(¤t->thread.fpr[reg], (float *)&data.v[4], ¤t->thread.fpscr); + disable_kernel_fp(); + preempt_enable(); + } + else + data.dd = current->thread.fpr[reg]; + } + else + data.ll = regs->gpr[reg]; + } + + /* Swap bytes as needed */ + if (flags & SW) { + if (nb == 2) + SWAP(data.v[6], data.v[7]); + else { /* nb must be 4 */ + SWAP(data.v[4], data.v[7]); + SWAP(data.v[5], data.v[6]); + } + } + + /* Sign extend as needed */ + if (flags & SE) { + if ( nb == 2 ) + data.ll = data.x16.low16; + else /* nb must be 4 */ + data.ll = data.x32.low32; + } + + /* If we are loading, move the data to the gpr or fpr */ + if (flags & LD) { + if (flags & F) { + if (nb == 4) { + /* Doing lfs, have to convert to double */ + preempt_disable(); + enable_kernel_fp(); + cvt_fd((float *)&data.v[4], ¤t->thread.fpr[reg], ¤t->thread.fpscr); + disable_kernel_fp(); + preempt_enable(); + } + else + current->thread.fpr[reg] = data.dd; + } + else + regs->gpr[reg] = data.ll; + } + + /* If we are storing, copy the data to the user */ + if (flags & ST) { + ret = 0; + p = addr; + switch (nb) { + case 128: /* Special case - must be dcbz */ + lp = (unsigned long __user *)p; + for (i = 0; i < L1_CACHE_BYTES / sizeof(long); ++i) + ret |= __put_user(0, lp++); + break; + case 8: + ret |= __put_user(data.v[0], p++); + ret |= __put_user(data.v[1], p++); + ret |= __put_user(data.v[2], p++); + ret |= __put_user(data.v[3], p++); + case 4: + ret |= __put_user(data.v[4], p++); + ret |= __put_user(data.v[5], p++); + case 2: + ret |= __put_user(data.v[6], p++); + ret |= __put_user(data.v[7], p++); + } + if (ret) + return -EFAULT; + } + + /* Update RA as needed */ + if (flags & U) { + regs->gpr[areg] = regs->dar; + } + + return 1; +} + diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c new file mode 100644 index 00000000000..0094ac79a18 --- /dev/null +++ b/arch/ppc64/kernel/asm-offsets.c @@ -0,0 +1,193 @@ +/* + * This program is used to generate definitions needed by + * assembly language modules. + * + * We use the technique used in the OSF Mach kernel code: + * generate asm statements containing #defines, + * compile this file to assembler, and then extract the + * #defines from the assembly-language output. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : : ) + +int main(void) +{ + /* thread struct on stack */ + DEFINE(THREAD_SHIFT, THREAD_SHIFT); + DEFINE(THREAD_SIZE, THREAD_SIZE); + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_SC_NOERR, offsetof(struct thread_info, syscall_noerror)); + + /* task_struct->thread */ + DEFINE(THREAD, offsetof(struct task_struct, thread)); + DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); + DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode)); + DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0])); + DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr)); + DEFINE(KSP, offsetof(struct thread_struct, ksp)); + DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); + +#ifdef CONFIG_ALTIVEC + DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0])); + DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave)); + DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr)); + DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr)); +#endif /* CONFIG_ALTIVEC */ + DEFINE(MM, offsetof(struct task_struct, mm)); + + DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); + DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size)); + DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page)); + DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); + DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); + DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); + DEFINE(PLATFORM, offsetof(struct systemcfg, platform)); + + /* paca */ + DEFINE(PACA_SIZE, sizeof(struct paca_struct)); + DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); + DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start)); + DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack)); + DEFINE(PACACURRENT, offsetof(struct paca_struct, __current)); + DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr)); + DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real)); + DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr)); + DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr)); + DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1)); + DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc)); + DEFINE(PACAPROCENABLED, offsetof(struct paca_struct, proc_enabled)); + DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); + DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); + DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); +#ifdef CONFIG_HUGETLB_PAGE + DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs)); +#endif /* CONFIG_HUGETLB_PAGE */ + DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr)); + DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); + DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); + DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb)); + DEFINE(PACA_EXDSI, offsetof(struct paca_struct, exdsi)); + DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); + DEFINE(PACALPPACA, offsetof(struct paca_struct, lppaca)); + DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); + DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); + DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); + DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); + DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); + + /* RTAS */ + DEFINE(RTASBASE, offsetof(struct rtas_t, base)); + DEFINE(RTASENTRY, offsetof(struct rtas_t, entry)); + + /* Interrupt register frame */ + DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); + + DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); + + /* 288 = # of volatile regs, int & fp, for leaf routines */ + /* which do not stack a frame. See the PPC64 ABI. */ + DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 288); + /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ + DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); + DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); + DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0])); + DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1])); + DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2])); + DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3])); + DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4])); + DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5])); + DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6])); + DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7])); + DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8])); + DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9])); + DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10])); + DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11])); + DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12])); + DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13])); + /* + * Note: these symbols include _ because they overlap with special + * register names + */ + DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip)); + DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr)); + DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr)); + DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link)); + DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr)); + DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer)); + DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar)); + DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr)); + DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3)); + DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result)); + DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); + DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe)); + + /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */ + DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)); + DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8); + + DEFINE(CLONE_VM, CLONE_VM); + DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + + /* About the CPU features table */ + DEFINE(CPU_SPEC_ENTRY_SIZE, sizeof(struct cpu_spec)); + DEFINE(CPU_SPEC_PVR_MASK, offsetof(struct cpu_spec, pvr_mask)); + DEFINE(CPU_SPEC_PVR_VALUE, offsetof(struct cpu_spec, pvr_value)); + DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); + DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); + + /* systemcfg offsets for use by vdso */ + DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct systemcfg, tb_orig_stamp)); + DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct systemcfg, tb_ticks_per_sec)); + DEFINE(CFG_TB_TO_XS, offsetof(struct systemcfg, tb_to_xs)); + DEFINE(CFG_STAMP_XSEC, offsetof(struct systemcfg, stamp_xsec)); + DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct systemcfg, tb_update_count)); + DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct systemcfg, tz_minuteswest)); + DEFINE(CFG_TZ_DSTTIME, offsetof(struct systemcfg, tz_dsttime)); + DEFINE(CFG_SYSCALL_MAP32, offsetof(struct systemcfg, syscall_map_32)); + DEFINE(CFG_SYSCALL_MAP64, offsetof(struct systemcfg, syscall_map_64)); + + /* timeval/timezone offsets for use by vdso */ + DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec)); + DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec)); + DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec)); + DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec)); + DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); + DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); + + return 0; +} diff --git a/arch/ppc64/kernel/binfmt_elf32.c b/arch/ppc64/kernel/binfmt_elf32.c new file mode 100644 index 00000000000..fadc699a049 --- /dev/null +++ b/arch/ppc64/kernel/binfmt_elf32.c @@ -0,0 +1,78 @@ +/* + * binfmt_elf32.c: Support 32-bit PPC ELF binaries on Power3 and followons. + * based on the SPARC64 version. + * Copyright (C) 1995, 1996, 1997, 1998 David S. Miller (davem@redhat.com) + * Copyright (C) 1995, 1996, 1997, 1998 Jakub Jelinek (jj@ultra.linux.cz) + * + * Copyright (C) 2000,2001 Ken Aaker (kdaaker@rchland.vnet.ibm.com), IBM Corp + * Copyright (C) 2001 Anton Blanchard (anton@au.ibm.com), IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define ELF_ARCH EM_PPC +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2MSB; + +#include +#include +#include +#include +#include + +#define elf_prstatus elf_prstatus32 +struct elf_prstatus32 +{ + struct elf_siginfo pr_info; /* Info associated with signal */ + short pr_cursig; /* Current signal */ + unsigned int pr_sigpend; /* Set of pending signals */ + unsigned int pr_sighold; /* Set of held signals */ + pid_t pr_pid; + pid_t pr_ppid; + pid_t pr_pgrp; + pid_t pr_sid; + struct compat_timeval pr_utime; /* User time */ + struct compat_timeval pr_stime; /* System time */ + struct compat_timeval pr_cutime; /* Cumulative user time */ + struct compat_timeval pr_cstime; /* Cumulative system time */ + elf_gregset_t pr_reg; /* General purpose registers. */ + int pr_fpvalid; /* True if math co-processor being used. */ +}; + +#define elf_prpsinfo elf_prpsinfo32 +struct elf_prpsinfo32 +{ + char pr_state; /* numeric process state */ + char pr_sname; /* char for pr_state */ + char pr_zomb; /* zombie */ + char pr_nice; /* nice val */ + unsigned int pr_flag; /* flags */ + u32 pr_uid; + u32 pr_gid; + pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; + /* Lots missing */ + char pr_fname[16]; /* filename of executable */ + char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ +}; + +#include + +#undef cputime_to_timeval +#define cputime_to_timeval cputime_to_compat_timeval +static __inline__ void +cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value) +{ + unsigned long jiffies = cputime_to_jiffies(cputime); + value->tv_usec = (jiffies % HZ) * (1000000L / HZ); + value->tv_sec = jiffies / HZ; +} + +extern void start_thread32(struct pt_regs *, unsigned long, unsigned long); +#undef start_thread +#define start_thread start_thread32 +#define init_elf_binfmt init_elf32_binfmt + +#include "../../../fs/binfmt_elf.c" diff --git a/arch/ppc64/kernel/bitops.c b/arch/ppc64/kernel/bitops.c new file mode 100644 index 00000000000..ae329e8b4ac --- /dev/null +++ b/arch/ppc64/kernel/bitops.c @@ -0,0 +1,147 @@ +/* + * These are too big to be inlined. + */ + +#include +#include +#include +#include + +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + (offset >> 6); + unsigned long result = offset & ~63UL; + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 63UL; + if (offset) { + tmp = *(p++); + tmp |= ~0UL >> (64 - offset); + if (size < 64) + goto found_first; + if (~tmp) + goto found_middle; + size -= 64; + result += 64; + } + while (size & ~63UL) { + if (~(tmp = *(p++))) + goto found_middle; + result += 64; + size -= 64; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ +found_middle: + return result + ffz(tmp); +} + +EXPORT_SYMBOL(find_next_zero_bit); + +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + (offset >> 6); + unsigned long result = offset & ~63UL; + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 63UL; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < 64) + goto found_first; + if (tmp) + goto found_middle; + size -= 64; + result += 64; + } + while (size & ~63UL) { + if ((tmp = *(p++))) + goto found_middle; + result += 64; + size -= 64; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (64 - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __ffs(tmp); +} + +EXPORT_SYMBOL(find_next_bit); + +static inline unsigned int ext2_ilog2(unsigned int x) +{ + int lz; + + asm("cntlzw %0,%1": "=r"(lz):"r"(x)); + return 31 - lz; +} + +static inline unsigned int ext2_ffz(unsigned int x) +{ + u32 rc; + if ((x = ~x) == 0) + return 32; + rc = ext2_ilog2(x & -x); + return rc; +} + +unsigned long find_next_zero_le_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned int *p = ((const unsigned int *)addr) + (offset >> 5); + unsigned int result = offset & ~31; + unsigned int tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 31; + if (offset) { + tmp = cpu_to_le32p(p++); + tmp |= ~0U >> (32 - offset); /* bug or feature ? */ + if (size < 32) + goto found_first; + if (tmp != ~0) + goto found_middle; + size -= 32; + result += 32; + } + while (size >= 32) { + if ((tmp = cpu_to_le32p(p++)) != ~0) + goto found_middle; + result += 32; + size -= 32; + } + if (!size) + return result; + tmp = cpu_to_le32p(p); +found_first: + tmp |= ~0 << size; + if (tmp == ~0) /* Are any bits zero? */ + return result + size; /* Nope. */ +found_middle: + return result + ext2_ffz(tmp); +} + +EXPORT_SYMBOL(find_next_zero_le_bit); diff --git a/arch/ppc64/kernel/btext.c b/arch/ppc64/kernel/btext.c new file mode 100644 index 00000000000..c53f079e9b7 --- /dev/null +++ b/arch/ppc64/kernel/btext.c @@ -0,0 +1,751 @@ +/* + * Procedures for drawing on the screen early on in the boot process. + * + * Benjamin Herrenschmidt + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef NO_SCROLL + +#ifndef NO_SCROLL +static void scrollscreen(void); +#endif + +static void draw_byte(unsigned char c, long locX, long locY); +static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb); +static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb); +static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb); + +static int g_loc_X; +static int g_loc_Y; +static int g_max_loc_X; +static int g_max_loc_Y; + +static int dispDeviceRowBytes; +static int dispDeviceDepth; +static int dispDeviceRect[4]; +static unsigned char *dispDeviceBase, *logicalDisplayBase; + +unsigned long disp_BAT[2] __initdata = {0, 0}; + +#define cmapsz (16*256) + +static unsigned char vga_font[cmapsz]; + +int boot_text_mapped; +int force_printk_to_btext = 0; + + +/* Here's a small text engine to use during early boot + * or for debugging purposes + * + * todo: + * + * - build some kind of vgacon with it to enable early printk + * - move to a separate file + * - add a few video driver hooks to keep in sync with display + * changes. + */ + +void map_boot_text(void) +{ + unsigned long base, offset, size; + unsigned char *vbase; + + /* By default, we are no longer mapped */ + boot_text_mapped = 0; + if (dispDeviceBase == 0) + return; + base = ((unsigned long) dispDeviceBase) & 0xFFFFF000UL; + offset = ((unsigned long) dispDeviceBase) - base; + size = dispDeviceRowBytes * dispDeviceRect[3] + offset + + dispDeviceRect[0]; + vbase = __ioremap(base, size, _PAGE_NO_CACHE); + if (vbase == 0) + return; + logicalDisplayBase = vbase + offset; + boot_text_mapped = 1; +} + +int btext_initialize(struct device_node *np) +{ + unsigned int width, height, depth, pitch; + unsigned long address = 0; + u32 *prop; + + prop = (u32 *)get_property(np, "width", NULL); + if (prop == NULL) + return -EINVAL; + width = *prop; + prop = (u32 *)get_property(np, "height", NULL); + if (prop == NULL) + return -EINVAL; + height = *prop; + prop = (u32 *)get_property(np, "depth", NULL); + if (prop == NULL) + return -EINVAL; + depth = *prop; + pitch = width * ((depth + 7) / 8); + prop = (u32 *)get_property(np, "linebytes", NULL); + if (prop) + pitch = *prop; + if (pitch == 1) + pitch = 0x1000; + prop = (u32 *)get_property(np, "address", NULL); + if (prop) + address = *prop; + + /* FIXME: Add support for PCI reg properties */ + + if (address == 0) + return -EINVAL; + + g_loc_X = 0; + g_loc_Y = 0; + g_max_loc_X = width / 8; + g_max_loc_Y = height / 16; + logicalDisplayBase = (unsigned char *)address; + dispDeviceBase = (unsigned char *)address; + dispDeviceRowBytes = pitch; + dispDeviceDepth = depth; + dispDeviceRect[0] = dispDeviceRect[1] = 0; + dispDeviceRect[2] = width; + dispDeviceRect[3] = height; + + map_boot_text(); + + return 0; +} + + +/* Calc the base address of a given point (x,y) */ +static unsigned char * calc_base(int x, int y) +{ + unsigned char *base; + + base = logicalDisplayBase; + if (base == 0) + base = dispDeviceBase; + base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3); + base += (y + dispDeviceRect[1]) * dispDeviceRowBytes; + return base; +} + +/* Adjust the display to a new resolution */ +void btext_update_display(unsigned long phys, int width, int height, + int depth, int pitch) +{ + if (dispDeviceBase == 0) + return; + + /* check it's the same frame buffer (within 256MB) */ + if ((phys ^ (unsigned long)dispDeviceBase) & 0xf0000000) + return; + + dispDeviceBase = (__u8 *) phys; + dispDeviceRect[0] = 0; + dispDeviceRect[1] = 0; + dispDeviceRect[2] = width; + dispDeviceRect[3] = height; + dispDeviceDepth = depth; + dispDeviceRowBytes = pitch; + if (boot_text_mapped) { + iounmap(logicalDisplayBase); + boot_text_mapped = 0; + } + map_boot_text(); + g_loc_X = 0; + g_loc_Y = 0; + g_max_loc_X = width / 8; + g_max_loc_Y = height / 16; +} + +void btext_clearscreen(void) +{ + unsigned long *base = (unsigned long *)calc_base(0, 0); + unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * + (dispDeviceDepth >> 3)) >> 3; + int i,j; + + for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++) + { + unsigned long *ptr = base; + for(j=width; j; --j) + *(ptr++) = 0; + base += (dispDeviceRowBytes >> 3); + } +} + +#ifndef NO_SCROLL +static void scrollscreen(void) +{ + unsigned long *src = (unsigned long *)calc_base(0,16); + unsigned long *dst = (unsigned long *)calc_base(0,0); + unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * + (dispDeviceDepth >> 3)) >> 3; + int i,j; + + for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++) + { + unsigned long *src_ptr = src; + unsigned long *dst_ptr = dst; + for(j=width; j; --j) + *(dst_ptr++) = *(src_ptr++); + src += (dispDeviceRowBytes >> 3); + dst += (dispDeviceRowBytes >> 3); + } + for (i=0; i<16; i++) + { + unsigned long *dst_ptr = dst; + for(j=width; j; --j) + *(dst_ptr++) = 0; + dst += (dispDeviceRowBytes >> 3); + } +} +#endif /* ndef NO_SCROLL */ + +void btext_drawchar(char c) +{ + int cline = 0; +#ifdef NO_SCROLL + int x; +#endif + if (!boot_text_mapped) + return; + + switch (c) { + case '\b': + if (g_loc_X > 0) + --g_loc_X; + break; + case '\t': + g_loc_X = (g_loc_X & -8) + 8; + break; + case '\r': + g_loc_X = 0; + break; + case '\n': + g_loc_X = 0; + g_loc_Y++; + cline = 1; + break; + default: + draw_byte(c, g_loc_X++, g_loc_Y); + } + if (g_loc_X >= g_max_loc_X) { + g_loc_X = 0; + g_loc_Y++; + cline = 1; + } +#ifndef NO_SCROLL + while (g_loc_Y >= g_max_loc_Y) { + scrollscreen(); + g_loc_Y--; + } +#else + /* wrap around from bottom to top of screen so we don't + waste time scrolling each line. -- paulus. */ + if (g_loc_Y >= g_max_loc_Y) + g_loc_Y = 0; + if (cline) { + for (x = 0; x < g_max_loc_X; ++x) + draw_byte(' ', x, g_loc_Y); + } +#endif +} + +void btext_drawstring(const char *c) +{ + if (!boot_text_mapped) + return; + while (*c) + btext_drawchar(*c++); +} + +void btext_drawhex(unsigned long v) +{ + char *hex_table = "0123456789abcdef"; + + if (!boot_text_mapped) + return; + btext_drawchar(hex_table[(v >> 60) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 56) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 52) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 48) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 44) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 40) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 36) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 32) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 28) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 24) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 20) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 16) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 12) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 8) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 4) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 0) & 0x0000000FUL]); + btext_drawchar(' '); +} + +static void draw_byte(unsigned char c, long locX, long locY) +{ + unsigned char *base = calc_base(locX << 3, locY << 4); + unsigned char *font = &vga_font[((unsigned int)c) * 16]; + int rb = dispDeviceRowBytes; + + switch(dispDeviceDepth) { + case 24: + case 32: + draw_byte_32(font, (unsigned int *)base, rb); + break; + case 15: + case 16: + draw_byte_16(font, (unsigned int *)base, rb); + break; + case 8: + draw_byte_8(font, (unsigned int *)base, rb); + break; + } +} + +static unsigned int expand_bits_8[16] = { + 0x00000000, + 0x000000ff, + 0x0000ff00, + 0x0000ffff, + 0x00ff0000, + 0x00ff00ff, + 0x00ffff00, + 0x00ffffff, + 0xff000000, + 0xff0000ff, + 0xff00ff00, + 0xff00ffff, + 0xffff0000, + 0xffff00ff, + 0xffffff00, + 0xffffffff +}; + +static unsigned int expand_bits_16[4] = { + 0x00000000, + 0x0000ffff, + 0xffff0000, + 0xffffffff +}; + + +static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0xFFFFFFFFUL; + int bg = 0x00000000UL; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (-(bits >> 7) & fg) ^ bg; + base[1] = (-((bits >> 6) & 1) & fg) ^ bg; + base[2] = (-((bits >> 5) & 1) & fg) ^ bg; + base[3] = (-((bits >> 4) & 1) & fg) ^ bg; + base[4] = (-((bits >> 3) & 1) & fg) ^ bg; + base[5] = (-((bits >> 2) & 1) & fg) ^ bg; + base[6] = (-((bits >> 1) & 1) & fg) ^ bg; + base[7] = (-(bits & 1) & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static void draw_byte_16(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0xFFFFFFFFUL; + int bg = 0x00000000UL; + unsigned int *eb = (int *)expand_bits_16; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (eb[bits >> 6] & fg) ^ bg; + base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg; + base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg; + base[3] = (eb[bits & 3] & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static void draw_byte_8(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0x0F0F0F0FUL; + int bg = 0x00000000UL; + unsigned int *eb = (int *)expand_bits_8; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (eb[bits >> 4] & fg) ^ bg; + base[1] = (eb[bits & 0xf] & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static unsigned char vga_font[cmapsz] = { +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, +0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff, +0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, +0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, +0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, +0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00, +0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd, +0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e, +0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30, +0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63, +0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8, +0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e, +0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, +0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb, +0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6, +0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, +0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0, +0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c, +0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c, +0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, +0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c, +0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18, +0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, +0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, +0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18, +0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, +0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, +0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, +0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18, +0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, +0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, +0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, +0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde, +0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, +0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, +0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c, +0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, +0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, +0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c, +0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60, +0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7, +0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, +0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c, +0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c, +0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, +0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, +0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, +0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, +0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, +0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, +0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06, +0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb, +0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, +0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60, +0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30, +0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, +0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18, +0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, +0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, +0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00, +0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe, +0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, +0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, +0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06, +0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe, +0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, +0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18, +0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66, +0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, +0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00, +0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b, +0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c, +0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00, +0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, +0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, +0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00, +0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, +0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18, +0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, +0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00, +0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, +0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, +0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, +0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, +0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, +0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, +0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06, +0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, +0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, +0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36, +0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44, +0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, +0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, +0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, +0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, +0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0, +0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, +0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, +0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0, +0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, +0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66, +0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, +0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66, +0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60, +0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, +0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, +0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, +0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00, +0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, +0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c, +0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00, +0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, +}; diff --git a/arch/ppc64/kernel/cpu_setup_power4.S b/arch/ppc64/kernel/cpu_setup_power4.S new file mode 100644 index 00000000000..3bd95182085 --- /dev/null +++ b/arch/ppc64/kernel/cpu_setup_power4.S @@ -0,0 +1,214 @@ +/* + * This file contains low level CPU setup functions. + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +_GLOBAL(__970_cpu_preinit) + /* + * Do nothing if not running in HV mode + */ + mfmsr r0 + rldicl. r0,r0,4,63 + beqlr + + /* + * Deal only with PPC970 and PPC970FX. + */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi cr0,r0,0x39 + cmpwi cr1,r0,0x3c + cror 4*cr0+eq,4*cr0+eq,4*cr1+eq + bnelr + + /* Make sure HID4:rm_ci is off before MMU is turned off, that large + * pages are enabled with HID4:61 and clear HID5:DCBZ_size and + * HID5:DCBZ32_ill + */ + li r0,0 + mfspr r3,SPRN_HID4 + rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */ + rldimi r3,r0,2,61 /* clear bit 61 (lg_pg_en) */ + sync + mtspr SPRN_HID4,r3 + isync + sync + mfspr r3,SPRN_HID5 + rldimi r3,r0,6,56 /* clear bits 56 & 57 (DCBZ*) */ + sync + mtspr SPRN_HID5,r3 + isync + sync + + /* Setup some basic HID1 features */ + mfspr r0,SPRN_HID1 + li r3,0x1200 /* enable i-fetch cacheability */ + sldi r3,r3,44 /* and prefetch */ + or r0,r0,r3 + mtspr SPRN_HID1,r0 + mtspr SPRN_HID1,r0 + isync + + /* Clear HIOR */ + li r0,0 + sync + mtspr SPRN_HIOR,0 /* Clear interrupt prefix */ + isync + blr + +_GLOBAL(__setup_cpu_power4) + blr + +_GLOBAL(__setup_cpu_ppc970) + mfspr r0,SPRN_HID0 + li r11,5 /* clear DOZE and SLEEP */ + rldimi r0,r11,52,8 /* set NAP and DPM */ + mtspr SPRN_HID0,r0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + sync + isync + blr + +/* Definitions for the table use to save CPU states */ +#define CS_HID0 0 +#define CS_HID1 8 +#define CS_HID4 16 +#define CS_HID5 24 +#define CS_SIZE 32 + + .data + .balign L1_CACHE_BYTES,0 +cpu_state_storage: + .space CS_SIZE + .balign L1_CACHE_BYTES,0 + .text + +/* Called in normal context to backup CPU 0 state. This + * does not include cache settings. This function is also + * called for machine sleep. This does not include the MMU + * setup, BATs, etc... but rather the "special" registers + * like HID0, HID1, HID4, etc... + */ +_GLOBAL(__save_cpu_setup) + /* Some CR fields are volatile, we back it up all */ + mfcr r7 + + /* Get storage ptr */ + LOADADDR(r5,cpu_state_storage) + + /* We only deal with 970 for now */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi cr0,r0,0x39 + cmpwi cr1,r0,0x3c + cror 4*cr0+eq,4*cr0+eq,4*cr1+eq + bne 1f + + /* Save HID0,1,4 and 5 */ + mfspr r3,SPRN_HID0 + std r3,CS_HID0(r5) + mfspr r3,SPRN_HID1 + std r3,CS_HID1(r5) + mfspr r3,SPRN_HID4 + std r3,CS_HID4(r5) + mfspr r3,SPRN_HID5 + std r3,CS_HID5(r5) + +1: + mtcr r7 + blr + +/* Called with no MMU context (typically MSR:IR/DR off) to + * restore CPU state as backed up by the previous + * function. This does not include cache setting + */ +_GLOBAL(__restore_cpu_setup) + /* Get storage ptr (FIXME when using anton reloc as we + * are running with translation disabled here + */ + LOADADDR(r5,cpu_state_storage) + + /* We only deal with 970 for now */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi cr0,r0,0x39 + cmpwi cr1,r0,0x3c + cror 4*cr0+eq,4*cr0+eq,4*cr1+eq + bne 1f + + /* Before accessing memory, we make sure rm_ci is clear */ + li r0,0 + mfspr r3,SPRN_HID4 + rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */ + sync + mtspr SPRN_HID4,r3 + isync + sync + + /* Clear interrupt prefix */ + li r0,0 + sync + mtspr SPRN_HIOR,0 + isync + + /* Restore HID0 */ + ld r3,CS_HID0(r5) + sync + isync + mtspr SPRN_HID0,r3 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + sync + isync + + /* Restore HID1 */ + ld r3,CS_HID1(r5) + sync + isync + mtspr SPRN_HID1,r3 + mtspr SPRN_HID1,r3 + sync + isync + + /* Restore HID4 */ + ld r3,CS_HID4(r5) + sync + isync + mtspr SPRN_HID4,r3 + sync + isync + + /* Restore HID5 */ + ld r3,CS_HID5(r5) + sync + isync + mtspr SPRN_HID5,r3 + sync + isync +1: + blr + diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c new file mode 100644 index 00000000000..8644a864805 --- /dev/null +++ b/arch/ppc64/kernel/cputable.c @@ -0,0 +1,197 @@ +/* + * arch/ppc64/kernel/cputable.c + * + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include + +struct cpu_spec* cur_cpu_spec = NULL; +EXPORT_SYMBOL(cur_cpu_spec); + +/* NOTE: + * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's + * the responsibility of the appropriate CPU save/restore functions to + * eventually copy these settings over. Those save/restore aren't yet + * part of the cputable though. That has to be fixed for both ppc32 + * and ppc64 + */ +extern void __setup_cpu_power3(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_power4(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); + + +/* We only set the altivec features if the kernel was compiled with altivec + * support + */ +#ifdef CONFIG_ALTIVEC +#define CPU_FTR_ALTIVEC_COMP CPU_FTR_ALTIVEC +#define PPC_FEATURE_HAS_ALTIVEC_COMP PPC_FEATURE_HAS_ALTIVEC +#else +#define CPU_FTR_ALTIVEC_COMP 0 +#define PPC_FEATURE_HAS_ALTIVEC_COMP 0 +#endif + +struct cpu_spec cpu_specs[] = { + { /* Power3 */ + 0xffff0000, 0x00400000, "POWER3 (630)", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_IABR | CPU_FTR_PMC8, + COMMON_USER_PPC64, + 128, 128, + __setup_cpu_power3, + COMMON_PPC64_FW + }, + { /* Power3+ */ + 0xffff0000, 0x00410000, "POWER3 (630+)", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_IABR | CPU_FTR_PMC8, + COMMON_USER_PPC64, + 128, 128, + __setup_cpu_power3, + COMMON_PPC64_FW + }, + { /* Northstar */ + 0xffff0000, 0x00330000, "RS64-II (northstar)", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + COMMON_USER_PPC64, + 128, 128, + __setup_cpu_power3, + COMMON_PPC64_FW + }, + { /* Pulsar */ + 0xffff0000, 0x00340000, "RS64-III (pulsar)", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + COMMON_USER_PPC64, + 128, 128, + __setup_cpu_power3, + COMMON_PPC64_FW + }, + { /* I-star */ + 0xffff0000, 0x00360000, "RS64-III (icestar)", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + COMMON_USER_PPC64, + 128, 128, + __setup_cpu_power3, + COMMON_PPC64_FW + }, + { /* S-star */ + 0xffff0000, 0x00370000, "RS64-IV (sstar)", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + COMMON_USER_PPC64, + 128, 128, + __setup_cpu_power3, + COMMON_PPC64_FW + }, + { /* Power4 */ + 0xffff0000, 0x00350000, "POWER4 (gp)", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + COMMON_USER_PPC64, + 128, 128, + __setup_cpu_power4, + COMMON_PPC64_FW + }, + { /* Power4+ */ + 0xffff0000, 0x00380000, "POWER4+ (gq)", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + COMMON_USER_PPC64, + 128, 128, + __setup_cpu_power4, + COMMON_PPC64_FW + }, + { /* PPC970 */ + 0xffff0000, 0x00390000, "PPC970", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, + 128, 128, + __setup_cpu_ppc970, + COMMON_PPC64_FW + }, + { /* PPC970FX */ + 0xffff0000, 0x003c0000, "PPC970FX", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, + 128, 128, + __setup_cpu_ppc970, + COMMON_PPC64_FW + }, + { /* Power5 */ + 0xffff0000, 0x003a0000, "POWER5 (gr)", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | + CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | + CPU_FTR_MMCRA_SIHV, + COMMON_USER_PPC64, + 128, 128, + __setup_cpu_power4, + COMMON_PPC64_FW + }, + { /* Power5 */ + 0xffff0000, 0x003b0000, "POWER5 (gs)", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | + CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | + CPU_FTR_MMCRA_SIHV, + COMMON_USER_PPC64, + 128, 128, + __setup_cpu_power4, + COMMON_PPC64_FW + }, + { /* default match */ + 0x00000000, 0x00000000, "POWER4 (compatible)", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2, + COMMON_USER_PPC64, + 128, 128, + __setup_cpu_power4, + COMMON_PPC64_FW + } +}; + +firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { + {FW_FEATURE_PFT, "hcall-pft"}, + {FW_FEATURE_TCE, "hcall-tce"}, + {FW_FEATURE_SPRG0, "hcall-sprg0"}, + {FW_FEATURE_DABR, "hcall-dabr"}, + {FW_FEATURE_COPY, "hcall-copy"}, + {FW_FEATURE_ASR, "hcall-asr"}, + {FW_FEATURE_DEBUG, "hcall-debug"}, + {FW_FEATURE_PERF, "hcall-perf"}, + {FW_FEATURE_DUMP, "hcall-dump"}, + {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, + {FW_FEATURE_MIGRATE, "hcall-migrate"}, + {FW_FEATURE_PERFMON, "hcall-perfmon"}, + {FW_FEATURE_CRQ, "hcall-crq"}, + {FW_FEATURE_VIO, "hcall-vio"}, + {FW_FEATURE_RDMA, "hcall-rdma"}, + {FW_FEATURE_LLAN, "hcall-lLAN"}, + {FW_FEATURE_BULK, "hcall-bulk"}, + {FW_FEATURE_XDABR, "hcall-xdabr"}, + {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, + {FW_FEATURE_SPLPAR, "hcall-splpar"}, +}; diff --git a/arch/ppc64/kernel/dma.c b/arch/ppc64/kernel/dma.c new file mode 100644 index 00000000000..ce714c92713 --- /dev/null +++ b/arch/ppc64/kernel/dma.c @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2004 IBM Corporation + * + * Implements the generic device dma API for ppc64. Handles + * the pci and vio busses + */ + +#include +#include +/* Include the busses we support */ +#include +#include +#include +#include + +static struct dma_mapping_ops *get_dma_ops(struct device *dev) +{ + if (dev->bus == &pci_bus_type) + return &pci_dma_ops; +#ifdef CONFIG_IBMVIO + if (dev->bus == &vio_bus_type) + return &vio_dma_ops; +#endif + return NULL; +} + +int dma_supported(struct device *dev, u64 mask) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + return dma_ops->dma_supported(dev, mask); + BUG(); + return 0; +} +EXPORT_SYMBOL(dma_supported); + +int dma_set_mask(struct device *dev, u64 dma_mask) +{ + if (dev->bus == &pci_bus_type) + return pci_set_dma_mask(to_pci_dev(dev), dma_mask); +#ifdef CONFIG_IBMVIO + if (dev->bus == &vio_bus_type) + return -EIO; +#endif /* CONFIG_IBMVIO */ + BUG(); + return 0; +} +EXPORT_SYMBOL(dma_set_mask); + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, unsigned int __nocast flag) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + return dma_ops->alloc_coherent(dev, size, dma_handle, flag); + BUG(); + return NULL; +} +EXPORT_SYMBOL(dma_alloc_coherent); + +void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + dma_ops->free_coherent(dev, size, cpu_addr, dma_handle); + else + BUG(); +} +EXPORT_SYMBOL(dma_free_coherent); + +dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size, + enum dma_data_direction direction) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + return dma_ops->map_single(dev, cpu_addr, size, direction); + BUG(); + return (dma_addr_t)0; +} +EXPORT_SYMBOL(dma_map_single); + +void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + dma_ops->unmap_single(dev, dma_addr, size, direction); + else + BUG(); +} +EXPORT_SYMBOL(dma_unmap_single); + +dma_addr_t dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + return dma_ops->map_single(dev, + (page_address(page) + offset), size, direction); + BUG(); + return (dma_addr_t)0; +} +EXPORT_SYMBOL(dma_map_page); + +void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + dma_ops->unmap_single(dev, dma_address, size, direction); + else + BUG(); +} +EXPORT_SYMBOL(dma_unmap_page); + +int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + return dma_ops->map_sg(dev, sg, nents, direction); + BUG(); + return 0; +} +EXPORT_SYMBOL(dma_map_sg); + +void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, + enum dma_data_direction direction) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + dma_ops->unmap_sg(dev, sg, nhwentries, direction); + else + BUG(); +} +EXPORT_SYMBOL(dma_unmap_sg); diff --git a/arch/ppc64/kernel/eeh.c b/arch/ppc64/kernel/eeh.c new file mode 100644 index 00000000000..d63d41f3eec --- /dev/null +++ b/arch/ppc64/kernel/eeh.c @@ -0,0 +1,937 @@ +/* + * eeh.c + * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pci.h" + +#undef DEBUG + +/** Overview: + * EEH, or "Extended Error Handling" is a PCI bridge technology for + * dealing with PCI bus errors that can't be dealt with within the + * usual PCI framework, except by check-stopping the CPU. Systems + * that are designed for high-availability/reliability cannot afford + * to crash due to a "mere" PCI error, thus the need for EEH. + * An EEH-capable bridge operates by converting a detected error + * into a "slot freeze", taking the PCI adapter off-line, making + * the slot behave, from the OS'es point of view, as if the slot + * were "empty": all reads return 0xff's and all writes are silently + * ignored. EEH slot isolation events can be triggered by parity + * errors on the address or data busses (e.g. during posted writes), + * which in turn might be caused by dust, vibration, humidity, + * radioactivity or plain-old failed hardware. + * + * Note, however, that one of the leading causes of EEH slot + * freeze events are buggy device drivers, buggy device microcode, + * or buggy device hardware. This is because any attempt by the + * device to bus-master data to a memory address that is not + * assigned to the device will trigger a slot freeze. (The idea + * is to prevent devices-gone-wild from corrupting system memory). + * Buggy hardware/drivers will have a miserable time co-existing + * with EEH. + * + * Ideally, a PCI device driver, when suspecting that an isolation + * event has occured (e.g. by reading 0xff's), will then ask EEH + * whether this is the case, and then take appropriate steps to + * reset the PCI slot, the PCI device, and then resume operations. + * However, until that day, the checking is done here, with the + * eeh_check_failure() routine embedded in the MMIO macros. If + * the slot is found to be isolated, an "EEH Event" is synthesized + * and sent out for processing. + */ + +/** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */ +#define BUID_HI(buid) ((buid) >> 32) +#define BUID_LO(buid) ((buid) & 0xffffffff) + +/* EEH event workqueue setup. */ +static DEFINE_SPINLOCK(eeh_eventlist_lock); +LIST_HEAD(eeh_eventlist); +static void eeh_event_handler(void *); +DECLARE_WORK(eeh_event_wq, eeh_event_handler, NULL); + +static struct notifier_block *eeh_notifier_chain; + +/* + * If a device driver keeps reading an MMIO register in an interrupt + * handler after a slot isolation event has occurred, we assume it + * is broken and panic. This sets the threshold for how many read + * attempts we allow before panicking. + */ +#define EEH_MAX_FAILS 1000 +static atomic_t eeh_fail_count; + +/* RTAS tokens */ +static int ibm_set_eeh_option; +static int ibm_set_slot_reset; +static int ibm_read_slot_reset_state; +static int ibm_read_slot_reset_state2; +static int ibm_slot_error_detail; + +static int eeh_subsystem_enabled; + +/* Buffer for reporting slot-error-detail rtas calls */ +static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX]; +static DEFINE_SPINLOCK(slot_errbuf_lock); +static int eeh_error_buf_size; + +/* System monitoring statistics */ +static DEFINE_PER_CPU(unsigned long, total_mmio_ffs); +static DEFINE_PER_CPU(unsigned long, false_positives); +static DEFINE_PER_CPU(unsigned long, ignored_failures); +static DEFINE_PER_CPU(unsigned long, slot_resets); + +/** + * The pci address cache subsystem. This subsystem places + * PCI device address resources into a red-black tree, sorted + * according to the address range, so that given only an i/o + * address, the corresponding PCI device can be **quickly** + * found. It is safe to perform an address lookup in an interrupt + * context; this ability is an important feature. + * + * Currently, the only customer of this code is the EEH subsystem; + * thus, this code has been somewhat tailored to suit EEH better. + * In particular, the cache does *not* hold the addresses of devices + * for which EEH is not enabled. + * + * (Implementation Note: The RB tree seems to be better/faster + * than any hash algo I could think of for this problem, even + * with the penalty of slow pointer chases for d-cache misses). + */ +struct pci_io_addr_range +{ + struct rb_node rb_node; + unsigned long addr_lo; + unsigned long addr_hi; + struct pci_dev *pcidev; + unsigned int flags; +}; + +static struct pci_io_addr_cache +{ + struct rb_root rb_root; + spinlock_t piar_lock; +} pci_io_addr_cache_root; + +static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr) +{ + struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; + + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + + if (addr < piar->addr_lo) { + n = n->rb_left; + } else { + if (addr > piar->addr_hi) { + n = n->rb_right; + } else { + pci_dev_get(piar->pcidev); + return piar->pcidev; + } + } + } + + return NULL; +} + +/** + * pci_get_device_by_addr - Get device, given only address + * @addr: mmio (PIO) phys address or i/o port number + * + * Given an mmio phys address, or a port number, find a pci device + * that implements this address. Be sure to pci_dev_put the device + * when finished. I/O port numbers are assumed to be offset + * from zero (that is, they do *not* have pci_io_addr added in). + * It is safe to call this function within an interrupt. + */ +static struct pci_dev *pci_get_device_by_addr(unsigned long addr) +{ + struct pci_dev *dev; + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + dev = __pci_get_device_by_addr(addr); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); + return dev; +} + +#ifdef DEBUG +/* + * Handy-dandy debug print routine, does nothing more + * than print out the contents of our addr cache. + */ +static void pci_addr_cache_print(struct pci_io_addr_cache *cache) +{ + struct rb_node *n; + int cnt = 0; + + n = rb_first(&cache->rb_root); + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s %s\n", + (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, + piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev), + pci_pretty_name(piar->pcidev)); + cnt++; + n = rb_next(n); + } +} +#endif + +/* Insert address range into the rb tree. */ +static struct pci_io_addr_range * +pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, + unsigned long ahi, unsigned int flags) +{ + struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; + struct rb_node *parent = NULL; + struct pci_io_addr_range *piar; + + /* Walk tree, find a place to insert into tree */ + while (*p) { + parent = *p; + piar = rb_entry(parent, struct pci_io_addr_range, rb_node); + if (alo < piar->addr_lo) { + p = &parent->rb_left; + } else if (ahi > piar->addr_hi) { + p = &parent->rb_right; + } else { + if (dev != piar->pcidev || + alo != piar->addr_lo || ahi != piar->addr_hi) { + printk(KERN_WARNING "PIAR: overlapping address range\n"); + } + return piar; + } + } + piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); + if (!piar) + return NULL; + + piar->addr_lo = alo; + piar->addr_hi = ahi; + piar->pcidev = dev; + piar->flags = flags; + + rb_link_node(&piar->rb_node, parent, p); + rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root); + + return piar; +} + +static void __pci_addr_cache_insert_device(struct pci_dev *dev) +{ + struct device_node *dn; + int i; + int inserted = 0; + + dn = pci_device_to_OF_node(dev); + if (!dn) { + printk(KERN_WARNING "PCI: no pci dn found for dev=%s %s\n", + pci_name(dev), pci_pretty_name(dev)); + return; + } + + /* Skip any devices for which EEH is not enabled. */ + if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) || + dn->eeh_mode & EEH_MODE_NOCHECK) { +#ifdef DEBUG + printk(KERN_INFO "PCI: skip building address cache for=%s %s\n", + pci_name(dev), pci_pretty_name(dev)); +#endif + return; + } + + /* The cache holds a reference to the device... */ + pci_dev_get(dev); + + /* Walk resources on this device, poke them into the tree */ + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + unsigned long start = pci_resource_start(dev,i); + unsigned long end = pci_resource_end(dev,i); + unsigned int flags = pci_resource_flags(dev,i); + + /* We are interested only bus addresses, not dma or other stuff */ + if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM))) + continue; + if (start == 0 || ~start == 0 || end == 0 || ~end == 0) + continue; + pci_addr_cache_insert(dev, start, end, flags); + inserted = 1; + } + + /* If there was nothing to add, the cache has no reference... */ + if (!inserted) + pci_dev_put(dev); +} + +/** + * pci_addr_cache_insert_device - Add a device to the address cache + * @dev: PCI device whose I/O addresses we are interested in. + * + * In order to support the fast lookup of devices based on addresses, + * we maintain a cache of devices that can be quickly searched. + * This routine adds a device to that cache. + */ +void pci_addr_cache_insert_device(struct pci_dev *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + __pci_addr_cache_insert_device(dev); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); +} + +static inline void __pci_addr_cache_remove_device(struct pci_dev *dev) +{ + struct rb_node *n; + int removed = 0; + +restart: + n = rb_first(&pci_io_addr_cache_root.rb_root); + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + + if (piar->pcidev == dev) { + rb_erase(n, &pci_io_addr_cache_root.rb_root); + removed = 1; + kfree(piar); + goto restart; + } + n = rb_next(n); + } + + /* The cache no longer holds its reference to this device... */ + if (removed) + pci_dev_put(dev); +} + +/** + * pci_addr_cache_remove_device - remove pci device from addr cache + * @dev: device to remove + * + * Remove a device from the addr-cache tree. + * This is potentially expensive, since it will walk + * the tree multiple times (once per resource). + * But so what; device removal doesn't need to be that fast. + */ +void pci_addr_cache_remove_device(struct pci_dev *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + __pci_addr_cache_remove_device(dev); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); +} + +/** + * pci_addr_cache_build - Build a cache of I/O addresses + * + * Build a cache of pci i/o addresses. This cache will be used to + * find the pci device that corresponds to a given address. + * This routine scans all pci busses to build the cache. + * Must be run late in boot process, after the pci controllers + * have been scaned for devices (after all device resources are known). + */ +void __init pci_addr_cache_build(void) +{ + struct pci_dev *dev = NULL; + + spin_lock_init(&pci_io_addr_cache_root.piar_lock); + + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + /* Ignore PCI bridges ( XXX why ??) */ + if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) { + continue; + } + pci_addr_cache_insert_device(dev); + } + +#ifdef DEBUG + /* Verify tree built up above, echo back the list of addrs. */ + pci_addr_cache_print(&pci_io_addr_cache_root); +#endif +} + +/* --------------------------------------------------------------- */ +/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */ + +/** + * eeh_register_notifier - Register to find out about EEH events. + * @nb: notifier block to callback on events + */ +int eeh_register_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&eeh_notifier_chain, nb); +} + +/** + * eeh_unregister_notifier - Unregister to an EEH event notifier. + * @nb: notifier block to callback on events + */ +int eeh_unregister_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&eeh_notifier_chain, nb); +} + +/** + * read_slot_reset_state - Read the reset state of a device node's slot + * @dn: device node to read + * @rets: array to return results in + */ +static int read_slot_reset_state(struct device_node *dn, int rets[]) +{ + int token, outputs; + + if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { + token = ibm_read_slot_reset_state2; + outputs = 4; + } else { + token = ibm_read_slot_reset_state; + outputs = 3; + } + + return rtas_call(token, 3, outputs, rets, dn->eeh_config_addr, + BUID_HI(dn->phb->buid), BUID_LO(dn->phb->buid)); +} + +/** + * eeh_panic - call panic() for an eeh event that cannot be handled. + * The philosophy of this routine is that it is better to panic and + * halt the OS than it is to risk possible data corruption by + * oblivious device drivers that don't know better. + * + * @dev pci device that had an eeh event + * @reset_state current reset state of the device slot + */ +static void eeh_panic(struct pci_dev *dev, int reset_state) +{ + /* + * XXX We should create a separate sysctl for this. + * + * Since the panic_on_oops sysctl is used to halt the system + * in light of potential corruption, we can use it here. + */ + if (panic_on_oops) + panic("EEH: MMIO failure (%d) on device:%s %s\n", reset_state, + pci_name(dev), pci_pretty_name(dev)); + else { + __get_cpu_var(ignored_failures)++; + printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s %s\n", + reset_state, pci_name(dev), pci_pretty_name(dev)); + } +} + +/** + * eeh_event_handler - dispatch EEH events. The detection of a frozen + * slot can occur inside an interrupt, where it can be hard to do + * anything about it. The goal of this routine is to pull these + * detection events out of the context of the interrupt handler, and + * re-dispatch them for processing at a later time in a normal context. + * + * @dummy - unused + */ +static void eeh_event_handler(void *dummy) +{ + unsigned long flags; + struct eeh_event *event; + + while (1) { + spin_lock_irqsave(&eeh_eventlist_lock, flags); + event = NULL; + if (!list_empty(&eeh_eventlist)) { + event = list_entry(eeh_eventlist.next, struct eeh_event, list); + list_del(&event->list); + } + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); + if (event == NULL) + break; + + printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device " + "%s %s\n", event->reset_state, + pci_name(event->dev), pci_pretty_name(event->dev)); + + atomic_set(&eeh_fail_count, 0); + notifier_call_chain (&eeh_notifier_chain, + EEH_NOTIFY_FREEZE, event); + + __get_cpu_var(slot_resets)++; + + pci_dev_put(event->dev); + kfree(event); + } +} + +/** + * eeh_token_to_phys - convert EEH address token to phys address + * @token i/o token, should be address in the form 0xE.... + */ +static inline unsigned long eeh_token_to_phys(unsigned long token) +{ + pte_t *ptep; + unsigned long pa; + + ptep = find_linux_pte(ioremap_mm.pgd, token); + if (!ptep) + return token; + pa = pte_pfn(*ptep) << PAGE_SHIFT; + + return pa | (token & (PAGE_SIZE-1)); +} + +/** + * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze + * @dn device node + * @dev pci device, if known + * + * Check for an EEH failure for the given device node. Call this + * routine if the result of a read was all 0xff's and you want to + * find out if this is due to an EEH slot freeze. This routine + * will query firmware for the EEH status. + * + * Returns 0 if there has not been an EEH error; otherwise returns + * a non-zero value and queues up a solt isolation event notification. + * + * It is safe to call this routine in an interrupt context. + */ +int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) +{ + int ret; + int rets[3]; + unsigned long flags; + int rc, reset_state; + struct eeh_event *event; + + __get_cpu_var(total_mmio_ffs)++; + + if (!eeh_subsystem_enabled) + return 0; + + if (!dn) + return 0; + + /* Access to IO BARs might get this far and still not want checking. */ + if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) || + dn->eeh_mode & EEH_MODE_NOCHECK) { + return 0; + } + + if (!dn->eeh_config_addr) { + return 0; + } + + /* + * If we already have a pending isolation event for this + * slot, we know it's bad already, we don't need to check... + */ + if (dn->eeh_mode & EEH_MODE_ISOLATED) { + atomic_inc(&eeh_fail_count); + if (atomic_read(&eeh_fail_count) >= EEH_MAX_FAILS) { + /* re-read the slot reset state */ + if (read_slot_reset_state(dn, rets) != 0) + rets[0] = -1; /* reset state unknown */ + eeh_panic(dev, rets[0]); + } + return 0; + } + + /* + * Now test for an EEH failure. This is VERY expensive. + * Note that the eeh_config_addr may be a parent device + * in the case of a device behind a bridge, or it may be + * function zero of a multi-function device. + * In any case they must share a common PHB. + */ + ret = read_slot_reset_state(dn, rets); + if (!(ret == 0 && rets[1] == 1 && (rets[0] == 2 || rets[0] == 4))) { + __get_cpu_var(false_positives)++; + return 0; + } + + /* prevent repeated reports of this failure */ + dn->eeh_mode |= EEH_MODE_ISOLATED; + + reset_state = rets[0]; + + spin_lock_irqsave(&slot_errbuf_lock, flags); + memset(slot_errbuf, 0, eeh_error_buf_size); + + rc = rtas_call(ibm_slot_error_detail, + 8, 1, NULL, dn->eeh_config_addr, + BUID_HI(dn->phb->buid), + BUID_LO(dn->phb->buid), NULL, 0, + virt_to_phys(slot_errbuf), + eeh_error_buf_size, + 1 /* Temporary Error */); + + if (rc == 0) + log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0); + spin_unlock_irqrestore(&slot_errbuf_lock, flags); + + printk(KERN_INFO "EEH: MMIO failure (%d) on device: %s %s\n", + rets[0], dn->name, dn->full_name); + event = kmalloc(sizeof(*event), GFP_ATOMIC); + if (event == NULL) { + eeh_panic(dev, reset_state); + return 1; + } + + event->dev = dev; + event->dn = dn; + event->reset_state = reset_state; + + /* We may or may not be called in an interrupt context */ + spin_lock_irqsave(&eeh_eventlist_lock, flags); + list_add(&event->list, &eeh_eventlist); + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); + + /* Most EEH events are due to device driver bugs. Having + * a stack trace will help the device-driver authors figure + * out what happened. So print that out. */ + dump_stack(); + schedule_work(&eeh_event_wq); + + return 0; +} + +EXPORT_SYMBOL(eeh_dn_check_failure); + +/** + * eeh_check_failure - check if all 1's data is due to EEH slot freeze + * @token i/o token, should be address in the form 0xA.... + * @val value, should be all 1's (XXX why do we need this arg??) + * + * Check for an eeh failure at the given token address. + * Check for an EEH failure at the given token address. Call this + * routine if the result of a read was all 0xff's and you want to + * find out if this is due to an EEH slot freeze event. This routine + * will query firmware for the EEH status. + * + * Note this routine is safe to call in an interrupt context. + */ +unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) +{ + unsigned long addr; + struct pci_dev *dev; + struct device_node *dn; + + /* Finding the phys addr + pci device; this is pretty quick. */ + addr = eeh_token_to_phys((unsigned long __force) token); + dev = pci_get_device_by_addr(addr); + if (!dev) + return val; + + dn = pci_device_to_OF_node(dev); + eeh_dn_check_failure (dn, dev); + + pci_dev_put(dev); + return val; +} + +EXPORT_SYMBOL(eeh_check_failure); + +struct eeh_early_enable_info { + unsigned int buid_hi; + unsigned int buid_lo; +}; + +/* Enable eeh for the given device node. */ +static void *early_enable_eeh(struct device_node *dn, void *data) +{ + struct eeh_early_enable_info *info = data; + int ret; + char *status = get_property(dn, "status", NULL); + u32 *class_code = (u32 *)get_property(dn, "class-code", NULL); + u32 *vendor_id = (u32 *)get_property(dn, "vendor-id", NULL); + u32 *device_id = (u32 *)get_property(dn, "device-id", NULL); + u32 *regs; + int enable; + + dn->eeh_mode = 0; + + if (status && strcmp(status, "ok") != 0) + return NULL; /* ignore devices with bad status */ + + /* Ignore bad nodes. */ + if (!class_code || !vendor_id || !device_id) + return NULL; + + /* There is nothing to check on PCI to ISA bridges */ + if (dn->type && !strcmp(dn->type, "isa")) { + dn->eeh_mode |= EEH_MODE_NOCHECK; + return NULL; + } + + /* + * Now decide if we are going to "Disable" EEH checking + * for this device. We still run with the EEH hardware active, + * but we won't be checking for ff's. This means a driver + * could return bad data (very bad!), an interrupt handler could + * hang waiting on status bits that won't change, etc. + * But there are a few cases like display devices that make sense. + */ + enable = 1; /* i.e. we will do checking */ + if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY) + enable = 0; + + if (!enable) + dn->eeh_mode |= EEH_MODE_NOCHECK; + + /* Ok... see if this device supports EEH. Some do, some don't, + * and the only way to find out is to check each and every one. */ + regs = (u32 *)get_property(dn, "reg", NULL); + if (regs) { + /* First register entry is addr (00BBSS00) */ + /* Try to enable eeh */ + ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, + regs[0], info->buid_hi, info->buid_lo, + EEH_ENABLE); + if (ret == 0) { + eeh_subsystem_enabled = 1; + dn->eeh_mode |= EEH_MODE_SUPPORTED; + dn->eeh_config_addr = regs[0]; +#ifdef DEBUG + printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name); +#endif + } else { + + /* This device doesn't support EEH, but it may have an + * EEH parent, in which case we mark it as supported. */ + if (dn->parent && (dn->parent->eeh_mode & EEH_MODE_SUPPORTED)) { + /* Parent supports EEH. */ + dn->eeh_mode |= EEH_MODE_SUPPORTED; + dn->eeh_config_addr = dn->parent->eeh_config_addr; + return NULL; + } + } + } else { + printk(KERN_WARNING "EEH: %s: unable to get reg property.\n", + dn->full_name); + } + + return NULL; +} + +/* + * Initialize EEH by trying to enable it for all of the adapters in the system. + * As a side effect we can determine here if eeh is supported at all. + * Note that we leave EEH on so failed config cycles won't cause a machine + * check. If a user turns off EEH for a particular adapter they are really + * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't + * grant access to a slot if EEH isn't enabled, and so we always enable + * EEH for all slots/all devices. + * + * The eeh-force-off option disables EEH checking globally, for all slots. + * Even if force-off is set, the EEH hardware is still enabled, so that + * newer systems can boot. + */ +void __init eeh_init(void) +{ + struct device_node *phb, *np; + struct eeh_early_enable_info info; + + np = of_find_node_by_path("/rtas"); + if (np == NULL) + return; + + ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); + ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); + ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); + ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); + ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); + + if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) + return; + + eeh_error_buf_size = rtas_token("rtas-error-log-max"); + if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { + eeh_error_buf_size = 1024; + } + if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { + printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated " + "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX); + eeh_error_buf_size = RTAS_ERROR_LOG_MAX; + } + + /* Enable EEH for all adapters. Note that eeh requires buid's */ + for (phb = of_find_node_by_name(NULL, "pci"); phb; + phb = of_find_node_by_name(phb, "pci")) { + unsigned long buid; + + buid = get_phb_buid(phb); + if (buid == 0) + continue; + + info.buid_lo = BUID_LO(buid); + info.buid_hi = BUID_HI(buid); + traverse_pci_devices(phb, early_enable_eeh, &info); + } + + if (eeh_subsystem_enabled) + printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n"); + else + printk(KERN_WARNING "EEH: No capable adapters found\n"); +} + +/** + * eeh_add_device_early - enable EEH for the indicated device_node + * @dn: device node for which to set up EEH + * + * This routine must be used to perform EEH initialization for PCI + * devices that were added after system boot (e.g. hotplug, dlpar). + * This routine must be called before any i/o is performed to the + * adapter (inluding any config-space i/o). + * Whether this actually enables EEH or not for this device depends + * on the CEC architecture, type of the device, on earlier boot + * command-line arguments & etc. + */ +void eeh_add_device_early(struct device_node *dn) +{ + struct pci_controller *phb; + struct eeh_early_enable_info info; + + if (!dn) + return; + phb = dn->phb; + if (NULL == phb || 0 == phb->buid) { + printk(KERN_WARNING "EEH: Expected buid but found none\n"); + return; + } + + info.buid_hi = BUID_HI(phb->buid); + info.buid_lo = BUID_LO(phb->buid); + early_enable_eeh(dn, &info); +} +EXPORT_SYMBOL(eeh_add_device_early); + +/** + * eeh_add_device_late - perform EEH initialization for the indicated pci device + * @dev: pci device for which to set up EEH + * + * This routine must be used to complete EEH initialization for PCI + * devices that were added after system boot (e.g. hotplug, dlpar). + */ +void eeh_add_device_late(struct pci_dev *dev) +{ + if (!dev || !eeh_subsystem_enabled) + return; + +#ifdef DEBUG + printk(KERN_DEBUG "EEH: adding device %s %s\n", pci_name(dev), + pci_pretty_name(dev)); +#endif + + pci_addr_cache_insert_device (dev); +} +EXPORT_SYMBOL(eeh_add_device_late); + +/** + * eeh_remove_device - undo EEH setup for the indicated pci device + * @dev: pci device to be removed + * + * This routine should be when a device is removed from a running + * system (e.g. by hotplug or dlpar). + */ +void eeh_remove_device(struct pci_dev *dev) +{ + if (!dev || !eeh_subsystem_enabled) + return; + + /* Unregister the device with the EEH/PCI address search system */ +#ifdef DEBUG + printk(KERN_DEBUG "EEH: remove device %s %s\n", pci_name(dev), + pci_pretty_name(dev)); +#endif + pci_addr_cache_remove_device(dev); +} +EXPORT_SYMBOL(eeh_remove_device); + +static int proc_eeh_show(struct seq_file *m, void *v) +{ + unsigned int cpu; + unsigned long ffs = 0, positives = 0, failures = 0; + unsigned long resets = 0; + + for_each_cpu(cpu) { + ffs += per_cpu(total_mmio_ffs, cpu); + positives += per_cpu(false_positives, cpu); + failures += per_cpu(ignored_failures, cpu); + resets += per_cpu(slot_resets, cpu); + } + + if (0 == eeh_subsystem_enabled) { + seq_printf(m, "EEH Subsystem is globally disabled\n"); + seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs); + } else { + seq_printf(m, "EEH Subsystem is enabled\n"); + seq_printf(m, "eeh_total_mmio_ffs=%ld\n" + "eeh_false_positives=%ld\n" + "eeh_ignored_failures=%ld\n" + "eeh_slot_resets=%ld\n" + "eeh_fail_count=%d\n", + ffs, positives, failures, resets, + eeh_fail_count.counter); + } + + return 0; +} + +static int proc_eeh_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_eeh_show, NULL); +} + +static struct file_operations proc_eeh_operations = { + .open = proc_eeh_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init eeh_init_proc(void) +{ + struct proc_dir_entry *e; + + if (systemcfg->platform & PLATFORM_PSERIES) { + e = create_proc_entry("ppc64/eeh", 0, NULL); + if (e) + e->proc_fops = &proc_eeh_operations; + } + + return 0; +} +__initcall(eeh_init_proc); diff --git a/arch/ppc64/kernel/entry.S b/arch/ppc64/kernel/entry.S new file mode 100644 index 00000000000..d3604056e1a --- /dev/null +++ b/arch/ppc64/kernel/entry.S @@ -0,0 +1,845 @@ +/* + * arch/ppc64/kernel/entry.S + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This file contains the system call entry code, context switch + * code, and exception/interrupt return code for PowerPC. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PPC_ISERIES +#define DO_SOFT_DISABLE +#endif + +/* + * System calls. + */ + .section ".toc","aw" +.SYS_CALL_TABLE: + .tc .sys_call_table[TC],.sys_call_table + +.SYS_CALL_TABLE32: + .tc .sys_call_table32[TC],.sys_call_table32 + +/* This value is used to mark exception frames on the stack. */ +exception_marker: + .tc ID_72656773_68657265[TC],0x7265677368657265 + + .section ".text" + .align 7 + +#undef SHOW_SYSCALLS + + .globl system_call_common +system_call_common: + andi. r10,r12,MSR_PR + mr r10,r1 + addi r1,r1,-INT_FRAME_SIZE + beq- 1f + ld r1,PACAKSAVE(r13) +1: std r10,0(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + std r0,GPR0(r1) + std r10,GPR1(r1) + std r2,GPR2(r1) + std r3,GPR3(r1) + std r4,GPR4(r1) + std r5,GPR5(r1) + std r6,GPR6(r1) + std r7,GPR7(r1) + std r8,GPR8(r1) + li r11,0 + std r11,GPR9(r1) + std r11,GPR10(r1) + std r11,GPR11(r1) + std r11,GPR12(r1) + std r9,GPR13(r1) + crclr so + mfcr r9 + mflr r10 + li r11,0xc01 + std r9,_CCR(r1) + std r10,_LINK(r1) + std r11,_TRAP(r1) + mfxer r9 + mfctr r10 + std r9,_XER(r1) + std r10,_CTR(r1) + std r3,ORIG_GPR3(r1) + ld r2,PACATOC(r13) + addi r9,r1,STACK_FRAME_OVERHEAD + ld r11,exception_marker@toc(r2) + std r11,-16(r9) /* "regshere" marker */ +#ifdef CONFIG_PPC_ISERIES + /* Hack for handling interrupts when soft-enabling on iSeries */ + cmpdi cr1,r0,0x5555 /* syscall 0x5555 */ + andi. r10,r12,MSR_PR /* from kernel */ + crand 4*cr0+eq,4*cr1+eq,4*cr0+eq + beq hardware_interrupt_entry + lbz r10,PACAPROCENABLED(r13) + std r10,SOFTE(r1) +#endif + mfmsr r11 + ori r11,r11,MSR_EE + mtmsrd r11,1 + +#ifdef SHOW_SYSCALLS + bl .do_show_syscall + REST_GPR(0,r1) + REST_4GPRS(3,r1) + REST_2GPRS(7,r1) + addi r9,r1,STACK_FRAME_OVERHEAD +#endif + clrrdi r11,r1,THREAD_SHIFT + li r12,0 + ld r10,TI_FLAGS(r11) + stb r12,TI_SC_NOERR(r11) + andi. r11,r10,_TIF_SYSCALL_T_OR_A + bne- syscall_dotrace +syscall_dotrace_cont: + cmpldi 0,r0,NR_syscalls + bge- syscall_enosys + +system_call: /* label this so stack traces look sane */ +/* + * Need to vector to 32 Bit or default sys_call_table here, + * based on caller's run-mode / personality. + */ + ld r11,.SYS_CALL_TABLE@toc(2) + andi. r10,r10,_TIF_32BIT + beq 15f + ld r11,.SYS_CALL_TABLE32@toc(2) + clrldi r3,r3,32 + clrldi r4,r4,32 + clrldi r5,r5,32 + clrldi r6,r6,32 + clrldi r7,r7,32 + clrldi r8,r8,32 +15: + slwi r0,r0,3 + ldx r10,r11,r0 /* Fetch system call handler [ptr] */ + mtctr r10 + bctrl /* Call handler */ + +syscall_exit: +#ifdef SHOW_SYSCALLS + std r3,GPR3(r1) + bl .do_show_syscall_exit + ld r3,GPR3(r1) +#endif + std r3,RESULT(r1) + ld r5,_CCR(r1) + li r10,-_LAST_ERRNO + cmpld r3,r10 + clrrdi r12,r1,THREAD_SHIFT + bge- syscall_error +syscall_error_cont: + + /* check for syscall tracing or audit */ + ld r9,TI_FLAGS(r12) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) + bne- syscall_exit_trace +syscall_exit_trace_cont: + + /* disable interrupts so current_thread_info()->flags can't change, + and so that we don't get interrupted after loading SRR0/1. */ + ld r8,_MSR(r1) + andi. r10,r8,MSR_RI + beq- unrecov_restore + mfmsr r10 + rldicl r10,r10,48,1 + rotldi r10,r10,16 + mtmsrd r10,1 + ld r9,TI_FLAGS(r12) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) + bne- syscall_exit_work + ld r7,_NIP(r1) + stdcx. r0,0,r1 /* to clear the reservation */ + andi. r6,r8,MSR_PR + ld r4,_LINK(r1) + beq- 1f /* only restore r13 if */ + ld r13,GPR13(r1) /* returning to usermode */ +1: ld r2,GPR2(r1) + li r12,MSR_RI + andc r10,r10,r12 + mtmsrd r10,1 /* clear MSR.RI */ + ld r1,GPR1(r1) + mtlr r4 + mtcr r5 + mtspr SRR0,r7 + mtspr SRR1,r8 + rfid + b . /* prevent speculative execution */ + +syscall_enosys: + li r3,-ENOSYS + std r3,RESULT(r1) + clrrdi r12,r1,THREAD_SHIFT + ld r5,_CCR(r1) + +syscall_error: + lbz r11,TI_SC_NOERR(r12) + cmpwi 0,r11,0 + bne- syscall_error_cont + neg r3,r3 + oris r5,r5,0x1000 /* Set SO bit in CR */ + std r5,_CCR(r1) + b syscall_error_cont + +/* Traced system call support */ +syscall_dotrace: + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_syscall_trace_enter + ld r0,GPR0(r1) /* Restore original registers */ + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r5,GPR5(r1) + ld r6,GPR6(r1) + ld r7,GPR7(r1) + ld r8,GPR8(r1) + addi r9,r1,STACK_FRAME_OVERHEAD + clrrdi r10,r1,THREAD_SHIFT + ld r10,TI_FLAGS(r10) + b syscall_dotrace_cont + +syscall_exit_trace: + std r3,GPR3(r1) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_syscall_trace_leave + REST_NVGPRS(r1) + ld r3,GPR3(r1) + ld r5,_CCR(r1) + clrrdi r12,r1,THREAD_SHIFT + b syscall_exit_trace_cont + +/* Stuff to do on exit from a system call. */ +syscall_exit_work: + std r3,GPR3(r1) + std r5,_CCR(r1) + b .ret_from_except_lite + +/* Save non-volatile GPRs, if not already saved. */ +_GLOBAL(save_nvgprs) + ld r11,_TRAP(r1) + andi. r0,r11,1 + beqlr- + SAVE_NVGPRS(r1) + clrrdi r0,r11,1 + std r0,_TRAP(r1) + blr + +/* + * The sigsuspend and rt_sigsuspend system calls can call do_signal + * and thus put the process into the stopped state where we might + * want to examine its user state with ptrace. Therefore we need + * to save all the nonvolatile registers (r14 - r31) before calling + * the C code. Similarly, fork, vfork and clone need the full + * register state on the stack so that it can be copied to the child. + */ +_GLOBAL(ppc32_sigsuspend) + bl .save_nvgprs + bl .sys32_sigsuspend + b 70f + +_GLOBAL(ppc64_rt_sigsuspend) + bl .save_nvgprs + bl .sys_rt_sigsuspend + b 70f + +_GLOBAL(ppc32_rt_sigsuspend) + bl .save_nvgprs + bl .sys32_rt_sigsuspend + /* If sigsuspend() returns zero, we are going into a signal handler */ +70: cmpdi 0,r3,0 + beq .ret_from_except + /* If it returned -EINTR, we need to return via syscall_exit to set + the SO bit in cr0 and potentially stop for ptrace. */ + b syscall_exit + +_GLOBAL(ppc_fork) + bl .save_nvgprs + bl .sys_fork + b syscall_exit + +_GLOBAL(ppc_vfork) + bl .save_nvgprs + bl .sys_vfork + b syscall_exit + +_GLOBAL(ppc_clone) + bl .save_nvgprs + bl .sys_clone + b syscall_exit + +_GLOBAL(ppc32_swapcontext) + bl .save_nvgprs + bl .sys32_swapcontext + b 80f + +_GLOBAL(ppc64_swapcontext) + bl .save_nvgprs + bl .sys_swapcontext + b 80f + +_GLOBAL(ppc32_sigreturn) + bl .sys32_sigreturn + b 80f + +_GLOBAL(ppc32_rt_sigreturn) + bl .sys32_rt_sigreturn + b 80f + +_GLOBAL(ppc64_rt_sigreturn) + bl .sys_rt_sigreturn + +80: cmpdi 0,r3,0 + blt syscall_exit + clrrdi r4,r1,THREAD_SHIFT + ld r4,TI_FLAGS(r4) + andi. r4,r4,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) + beq+ 81f + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_syscall_trace_leave +81: b .ret_from_except + +_GLOBAL(ret_from_fork) + bl .schedule_tail + REST_NVGPRS(r1) + li r3,0 + b syscall_exit + +/* + * This routine switches between two different tasks. The process + * state of one is saved on its kernel stack. Then the state + * of the other is restored from its kernel stack. The memory + * management hardware is updated to the second process's state. + * Finally, we can return to the second process, via ret_from_except. + * On entry, r3 points to the THREAD for the current task, r4 + * points to the THREAD for the new task. + * + * Note: there are two ways to get to the "going out" portion + * of this code; either by coming in via the entry (_switch) + * or via "fork" which must set up an environment equivalent + * to the "_switch" path. If you change this you'll have to change + * the fork code also. + * + * The code which creates the new task context is in 'copy_thread' + * in arch/ppc64/kernel/process.c + */ + .align 7 +_GLOBAL(_switch) + mflr r0 + std r0,16(r1) + stdu r1,-SWITCH_FRAME_SIZE(r1) + /* r3-r13 are caller saved -- Cort */ + SAVE_8GPRS(14, r1) + SAVE_10GPRS(22, r1) + mflr r20 /* Return to switch caller */ + mfmsr r22 + li r0, MSR_FP +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + oris r0,r0,MSR_VEC@h /* Disable altivec */ + mfspr r24,SPRN_VRSAVE /* save vrsave register value */ + std r24,THREAD_VRSAVE(r3) +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif /* CONFIG_ALTIVEC */ + and. r0,r0,r22 + beq+ 1f + andc r22,r22,r0 + mtmsrd r22 + isync +1: std r20,_NIP(r1) + mfcr r23 + std r23,_CCR(r1) + std r1,KSP(r3) /* Set old stack pointer */ + +#ifdef CONFIG_SMP + /* We need a sync somewhere here to make sure that if the + * previous task gets rescheduled on another CPU, it sees all + * stores it has performed on this one. + */ + sync +#endif /* CONFIG_SMP */ + + addi r6,r4,-THREAD /* Convert THREAD to 'current' */ + std r6,PACACURRENT(r13) /* Set new 'current' */ + + ld r8,KSP(r4) /* new stack pointer */ +BEGIN_FTR_SECTION + clrrdi r6,r8,28 /* get its ESID */ + clrrdi r9,r1,28 /* get current sp ESID */ + clrldi. r0,r6,2 /* is new ESID c00000000? */ + cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */ + cror eq,4*cr1+eq,eq + beq 2f /* if yes, don't slbie it */ + oris r0,r6,0x0800 /* set C (class) bit */ + + /* Bolt in the new stack SLB entry */ + ld r7,KSP_VSID(r4) /* Get new stack's VSID */ + oris r6,r6,(SLB_ESID_V)@h + ori r6,r6,(SLB_NUM_BOLTED-1)@l + slbie r0 + slbie r0 /* Workaround POWER5 < DD2.1 issue */ + slbmte r7,r6 + isync + +2: +END_FTR_SECTION_IFSET(CPU_FTR_SLB) + clrrdi r7,r8,THREAD_SHIFT /* base of new stack */ + /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE + because we don't need to leave the 288-byte ABI gap at the + top of the kernel stack. */ + addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE + + mr r1,r8 /* start using new stack pointer */ + std r7,PACAKSAVE(r13) + + ld r6,_CCR(r1) + mtcrf 0xFF,r6 + +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + ld r0,THREAD_VRSAVE(r4) + mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif /* CONFIG_ALTIVEC */ + + /* r3-r13 are destroyed -- Cort */ + REST_8GPRS(14, r1) + REST_10GPRS(22, r1) + +#ifdef CONFIG_PPC_ISERIES + clrrdi r7,r1,THREAD_SHIFT /* get current_thread_info() */ + ld r7,TI_FLAGS(r7) /* Get run light flag */ + mfspr r9,CTRLF + srdi r7,r7,TIF_RUN_LIGHT + insrdi r9,r7,1,63 /* Insert run light into CTRL */ + mtspr CTRLT,r9 +#endif + + /* convert old thread to its task_struct for return value */ + addi r3,r3,-THREAD + ld r7,_NIP(r1) /* Return to _switch caller in new task */ + mtlr r7 + addi r1,r1,SWITCH_FRAME_SIZE + blr + + .align 7 +_GLOBAL(ret_from_except) + ld r11,_TRAP(r1) + andi. r0,r11,1 + bne .ret_from_except_lite + REST_NVGPRS(r1) + +_GLOBAL(ret_from_except_lite) + /* + * Disable interrupts so that current_thread_info()->flags + * can't change between when we test it and when we return + * from the interrupt. + */ + mfmsr r10 /* Get current interrupt state */ + rldicl r9,r10,48,1 /* clear MSR_EE */ + rotldi r9,r9,16 + mtmsrd r9,1 /* Update machine state */ + +#ifdef CONFIG_PREEMPT + clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ + li r0,_TIF_NEED_RESCHED /* bits to check */ + ld r3,_MSR(r1) + ld r4,TI_FLAGS(r9) + /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */ + rlwimi r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING + and. r0,r4,r0 /* check NEED_RESCHED and maybe SIGPENDING */ + bne do_work + +#else /* !CONFIG_PREEMPT */ + ld r3,_MSR(r1) /* Returning to user mode? */ + andi. r3,r3,MSR_PR + beq restore /* if not, just restore regs and return */ + + /* Check current_thread_info()->flags */ + clrrdi r9,r1,THREAD_SHIFT + ld r4,TI_FLAGS(r9) + andi. r0,r4,_TIF_USER_WORK_MASK + bne do_work +#endif + +restore: +#ifdef CONFIG_PPC_ISERIES + ld r5,SOFTE(r1) + cmpdi 0,r5,0 + beq 4f + /* Check for pending interrupts (iSeries) */ + ld r3,PACALPPACA+LPPACAANYINT(r13) + cmpdi r3,0 + beq+ 4f /* skip do_IRQ if no interrupts */ + + li r3,0 + stb r3,PACAPROCENABLED(r13) /* ensure we are soft-disabled */ + ori r10,r10,MSR_EE + mtmsrd r10 /* hard-enable again */ + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_IRQ + b .ret_from_except_lite /* loop back and handle more */ + +4: stb r5,PACAPROCENABLED(r13) +#endif + + ld r3,_MSR(r1) + andi. r0,r3,MSR_RI + beq- unrecov_restore + + andi. r0,r3,MSR_PR + + /* + * r13 is our per cpu area, only restore it if we are returning to + * userspace + */ + beq 1f + REST_GPR(13, r1) +1: + ld r3,_CTR(r1) + ld r0,_LINK(r1) + mtctr r3 + mtlr r0 + ld r3,_XER(r1) + mtspr XER,r3 + + REST_8GPRS(5, r1) + + stdcx. r0,0,r1 /* to clear the reservation */ + + mfmsr r0 + li r2, MSR_RI + andc r0,r0,r2 + mtmsrd r0,1 + + ld r0,_MSR(r1) + mtspr SRR1,r0 + + ld r2,_CCR(r1) + mtcrf 0xFF,r2 + ld r2,_NIP(r1) + mtspr SRR0,r2 + + ld r0,GPR0(r1) + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r1,GPR1(r1) + + rfid + b . /* prevent speculative execution */ + +/* Note: this must change if we start using the TIF_NOTIFY_RESUME bit */ +do_work: +#ifdef CONFIG_PREEMPT + andi. r0,r3,MSR_PR /* Returning to user mode? */ + bne user_work + /* Check that preempt_count() == 0 and interrupts are enabled */ + lwz r8,TI_PREEMPT(r9) + cmpwi cr1,r8,0 +#ifdef CONFIG_PPC_ISERIES + ld r0,SOFTE(r1) + cmpdi r0,0 +#else + andi. r0,r3,MSR_EE +#endif + crandc eq,cr1*4+eq,eq + bne restore + /* here we are preempting the current task */ +1: +#ifdef CONFIG_PPC_ISERIES + li r0,1 + stb r0,PACAPROCENABLED(r13) +#endif + ori r10,r10,MSR_EE + mtmsrd r10,1 /* reenable interrupts */ + bl .preempt_schedule + mfmsr r10 + clrrdi r9,r1,THREAD_SHIFT + rldicl r10,r10,48,1 /* disable interrupts again */ + rotldi r10,r10,16 + mtmsrd r10,1 + ld r4,TI_FLAGS(r9) + andi. r0,r4,_TIF_NEED_RESCHED + bne 1b + b restore + +user_work: +#endif + /* Enable interrupts */ + ori r10,r10,MSR_EE + mtmsrd r10,1 + + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f + bl .schedule + b .ret_from_except_lite + +1: bl .save_nvgprs + li r3,0 + addi r4,r1,STACK_FRAME_OVERHEAD + bl .do_signal + b .ret_from_except + +unrecov_restore: + addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b unrecov_restore + +#ifdef CONFIG_PPC_RTAS +/* + * On CHRP, the Run-Time Abstraction Services (RTAS) have to be + * called with the MMU off. + * + * In addition, we need to be in 32b mode, at least for now. + * + * Note: r3 is an input parameter to rtas, so don't trash it... + */ +_GLOBAL(enter_rtas) + mflr r0 + std r0,16(r1) + stdu r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */ + + /* Because RTAS is running in 32b mode, it clobbers the high order half + * of all registers that it saves. We therefore save those registers + * RTAS might touch to the stack. (r0, r3-r13 are caller saved) + */ + SAVE_GPR(2, r1) /* Save the TOC */ + SAVE_GPR(13, r1) /* Save paca */ + SAVE_8GPRS(14, r1) /* Save the non-volatiles */ + SAVE_10GPRS(22, r1) /* ditto */ + + mfcr r4 + std r4,_CCR(r1) + mfctr r5 + std r5,_CTR(r1) + mfspr r6,XER + std r6,_XER(r1) + mfdar r7 + std r7,_DAR(r1) + mfdsisr r8 + std r8,_DSISR(r1) + mfsrr0 r9 + std r9,_SRR0(r1) + mfsrr1 r10 + std r10,_SRR1(r1) + + /* There is no way it is acceptable to get here with interrupts enabled, + * check it with the asm equivalent of WARN_ON + */ + mfmsr r6 + andi. r0,r6,MSR_EE +1: tdnei r0,0 +.section __bug_table,"a" + .llong 1b,__LINE__ + 0x1000000, 1f, 2f +.previous +.section .rodata,"a" +1: .asciz __FILE__ +2: .asciz "enter_rtas" +.previous + + /* Unfortunately, the stack pointer and the MSR are also clobbered, + * so they are saved in the PACA which allows us to restore + * our original state after RTAS returns. + */ + std r1,PACAR1(r13) + std r6,PACASAVEDMSR(r13) + + /* Setup our real return addr */ + SET_REG_TO_LABEL(r4,.rtas_return_loc) + SET_REG_TO_CONST(r9,KERNELBASE) + sub r4,r4,r9 + mtlr r4 + + li r0,0 + ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI + andc r0,r6,r0 + + li r9,1 + rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG) + ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP + andc r6,r0,r9 + ori r6,r6,MSR_RI + sync /* disable interrupts so SRR0/1 */ + mtmsrd r0 /* don't get trashed */ + + SET_REG_TO_LABEL(r4,rtas) + ld r5,RTASENTRY(r4) /* get the rtas->entry value */ + ld r4,RTASBASE(r4) /* get the rtas->base value */ + + mtspr SRR0,r5 + mtspr SRR1,r6 + rfid + b . /* prevent speculative execution */ + +_STATIC(rtas_return_loc) + /* relocation is off at this point */ + mfspr r4,SPRG3 /* Get PACA */ + SET_REG_TO_CONST(r5, KERNELBASE) + sub r4,r4,r5 /* RELOC the PACA base pointer */ + + mfmsr r6 + li r0,MSR_RI + andc r6,r6,r0 + sync + mtmsrd r6 + + ld r1,PACAR1(r4) /* Restore our SP */ + LOADADDR(r3,.rtas_restore_regs) + ld r4,PACASAVEDMSR(r4) /* Restore our MSR */ + + mtspr SRR0,r3 + mtspr SRR1,r4 + rfid + b . /* prevent speculative execution */ + +_STATIC(rtas_restore_regs) + /* relocation is on at this point */ + REST_GPR(2, r1) /* Restore the TOC */ + REST_GPR(13, r1) /* Restore paca */ + REST_8GPRS(14, r1) /* Restore the non-volatiles */ + REST_10GPRS(22, r1) /* ditto */ + + mfspr r13,SPRG3 + + ld r4,_CCR(r1) + mtcr r4 + ld r5,_CTR(r1) + mtctr r5 + ld r6,_XER(r1) + mtspr XER,r6 + ld r7,_DAR(r1) + mtdar r7 + ld r8,_DSISR(r1) + mtdsisr r8 + ld r9,_SRR0(r1) + mtsrr0 r9 + ld r10,_SRR1(r1) + mtsrr1 r10 + + addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */ + ld r0,16(r1) /* get return address */ + + mtlr r0 + blr /* return to caller */ + +#endif /* CONFIG_PPC_RTAS */ + +#ifdef CONFIG_PPC_MULTIPLATFORM + +_GLOBAL(enter_prom) + mflr r0 + std r0,16(r1) + stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */ + + /* Because PROM is running in 32b mode, it clobbers the high order half + * of all registers that it saves. We therefore save those registers + * PROM might touch to the stack. (r0, r3-r13 are caller saved) + */ + SAVE_8GPRS(2, r1) + SAVE_GPR(13, r1) + SAVE_8GPRS(14, r1) + SAVE_10GPRS(22, r1) + mfcr r4 + std r4,_CCR(r1) + mfctr r5 + std r5,_CTR(r1) + mfspr r6,XER + std r6,_XER(r1) + mfdar r7 + std r7,_DAR(r1) + mfdsisr r8 + std r8,_DSISR(r1) + mfsrr0 r9 + std r9,_SRR0(r1) + mfsrr1 r10 + std r10,_SRR1(r1) + mfmsr r11 + std r11,_MSR(r1) + + /* Get the PROM entrypoint */ + ld r0,GPR4(r1) + mtlr r0 + + /* Switch MSR to 32 bits mode + */ + mfmsr r11 + li r12,1 + rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) + andc r11,r11,r12 + li r12,1 + rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) + andc r11,r11,r12 + mtmsrd r11 + isync + + /* Restore arguments & enter PROM here... */ + ld r3,GPR3(r1) + blrl + + /* Just make sure that r1 top 32 bits didn't get + * corrupt by OF + */ + rldicl r1,r1,0,32 + + /* Restore the MSR (back to 64 bits) */ + ld r0,_MSR(r1) + mtmsrd r0 + isync + + /* Restore other registers */ + REST_GPR(2, r1) + REST_GPR(13, r1) + REST_8GPRS(14, r1) + REST_10GPRS(22, r1) + ld r4,_CCR(r1) + mtcr r4 + ld r5,_CTR(r1) + mtctr r5 + ld r6,_XER(r1) + mtspr XER,r6 + ld r7,_DAR(r1) + mtdar r7 + ld r8,_DSISR(r1) + mtdsisr r8 + ld r9,_SRR0(r1) + mtsrr0 r9 + ld r10,_SRR1(r1) + mtsrr1 r10 + + addi r1,r1,PROM_FRAME_SIZE + ld r0,16(r1) + mtlr r0 + blr + +#endif /* CONFIG_PPC_MULTIPLATFORM */ diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S new file mode 100644 index 00000000000..fe05f3fbf9d --- /dev/null +++ b/arch/ppc64/kernel/head.S @@ -0,0 +1,2139 @@ +/* + * arch/ppc64/kernel/head.S + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com + * + * This file contains the low-level support and setup for the + * PowerPC-64 platform, including trap and interrupt dispatch. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define SECONDARY_PROCESSORS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PPC_ISERIES +#define DO_SOFT_DISABLE +#endif + +/* + * hcall interface to pSeries LPAR + */ +#define H_SET_ASR 0x30 + +/* + * We layout physical memory as follows: + * 0x0000 - 0x00ff : Secondary processor spin code + * 0x0100 - 0x2fff : pSeries Interrupt prologs + * 0x3000 - 0x3fff : Interrupt support + * 0x4000 - 0x4fff : NACA + * 0x6000 : iSeries and common interrupt prologs + * 0x9000 - 0x9fff : Initial segment table + */ + +/* + * SPRG Usage + * + * Register Definition + * + * SPRG0 reserved for hypervisor + * SPRG1 temp - used to save gpr + * SPRG2 temp - used to save gpr + * SPRG3 virt addr of paca + */ + +/* + * Entering into this code we make the following assumptions: + * For pSeries: + * 1. The MMU is off & open firmware is running in real mode. + * 2. The kernel is entered at __start + * + * For iSeries: + * 1. The MMU is on (as it always is for iSeries) + * 2. The kernel is entered at system_reset_iSeries + */ + + .text + .globl _stext +_stext: +#ifdef CONFIG_PPC_MULTIPLATFORM +_GLOBAL(__start) + /* NOP this out unconditionally */ +BEGIN_FTR_SECTION + b .__start_initialization_multiplatform +END_FTR_SECTION(0, 1) +#endif /* CONFIG_PPC_MULTIPLATFORM */ + + /* Catch branch to 0 in real mode */ + trap +#ifdef CONFIG_PPC_ISERIES + /* + * At offset 0x20, there is a pointer to iSeries LPAR data. + * This is required by the hypervisor + */ + . = 0x20 + .llong hvReleaseData-KERNELBASE + + /* + * At offset 0x28 and 0x30 are offsets to the msChunks + * array (used by the iSeries LPAR debugger to do translation + * between physical addresses and absolute addresses) and + * to the pidhash table (also used by the debugger) + */ + .llong msChunks-KERNELBASE + .llong 0 /* pidhash-KERNELBASE SFRXXX */ + + /* Offset 0x38 - Pointer to start of embedded System.map */ + .globl embedded_sysmap_start +embedded_sysmap_start: + .llong 0 + /* Offset 0x40 - Pointer to end of embedded System.map */ + .globl embedded_sysmap_end +embedded_sysmap_end: + .llong 0 + +#else /* CONFIG_PPC_ISERIES */ + + /* Secondary processors spin on this value until it goes to 1. */ + .globl __secondary_hold_spinloop +__secondary_hold_spinloop: + .llong 0x0 + + /* Secondary processors write this value with their cpu # */ + /* after they enter the spin loop immediately below. */ + .globl __secondary_hold_acknowledge +__secondary_hold_acknowledge: + .llong 0x0 + + . = 0x60 +/* + * The following code is used on pSeries to hold secondary processors + * in a spin loop after they have been freed from OpenFirmware, but + * before the bulk of the kernel has been relocated. This code + * is relocated to physical address 0x60 before prom_init is run. + * All of it must fit below the first exception vector at 0x100. + */ +_GLOBAL(__secondary_hold) + mfmsr r24 + ori r24,r24,MSR_RI + mtmsrd r24 /* RI on */ + + /* Grab our linux cpu number */ + mr r24,r3 + + /* Tell the master cpu we're here */ + /* Relocation is off & we are located at an address less */ + /* than 0x100, so only need to grab low order offset. */ + std r24,__secondary_hold_acknowledge@l(0) + sync + + /* All secondary cpu's wait here until told to start. */ +100: ld r4,__secondary_hold_spinloop@l(0) + cmpdi 0,r4,1 + bne 100b + +#ifdef CONFIG_HMT + b .hmt_init +#else +#ifdef CONFIG_SMP + mr r3,r24 + b .pSeries_secondary_smp_init +#else + BUG_OPCODE +#endif +#endif +#endif + +/* This value is used to mark exception frames on the stack. */ + .section ".toc","aw" +exception_marker: + .tc ID_72656773_68657265[TC],0x7265677368657265 + .text + +/* + * The following macros define the code that appears as + * the prologue to each of the exception handlers. They + * are split into two parts to allow a single kernel binary + * to be used for pSeries and iSeries. + * LOL. One day... - paulus + */ + +/* + * We make as much of the exception code common between native + * exception handlers (including pSeries LPAR) and iSeries LPAR + * implementations as possible. + */ + +/* + * This is the start of the interrupt handlers for pSeries + * This code runs with relocation off. + */ +#define EX_R9 0 +#define EX_R10 8 +#define EX_R11 16 +#define EX_R12 24 +#define EX_R13 32 +#define EX_SRR0 40 +#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */ +#define EX_DAR 48 +#define EX_LR 48 /* SLB miss saves LR, but not DAR */ +#define EX_DSISR 56 +#define EX_CCR 60 + +#define EXCEPTION_PROLOG_PSERIES(area, label) \ + mfspr r13,SPRG3; /* get paca address into r13 */ \ + std r9,area+EX_R9(r13); /* save r9 - r12 */ \ + std r10,area+EX_R10(r13); \ + std r11,area+EX_R11(r13); \ + std r12,area+EX_R12(r13); \ + mfspr r9,SPRG1; \ + std r9,area+EX_R13(r13); \ + mfcr r9; \ + clrrdi r12,r13,32; /* get high part of &label */ \ + mfmsr r10; \ + mfspr r11,SRR0; /* save SRR0 */ \ + ori r12,r12,(label)@l; /* virt addr of handler */ \ + ori r10,r10,MSR_IR|MSR_DR|MSR_RI; \ + mtspr SRR0,r12; \ + mfspr r12,SRR1; /* and SRR1 */ \ + mtspr SRR1,r10; \ + rfid; \ + b . /* prevent speculative execution */ + +/* + * This is the start of the interrupt handlers for iSeries + * This code runs with relocation on. + */ +#define EXCEPTION_PROLOG_ISERIES_1(area) \ + mfspr r13,SPRG3; /* get paca address into r13 */ \ + std r9,area+EX_R9(r13); /* save r9 - r12 */ \ + std r10,area+EX_R10(r13); \ + std r11,area+EX_R11(r13); \ + std r12,area+EX_R12(r13); \ + mfspr r9,SPRG1; \ + std r9,area+EX_R13(r13); \ + mfcr r9 + +#define EXCEPTION_PROLOG_ISERIES_2 \ + mfmsr r10; \ + ld r11,PACALPPACA+LPPACASRR0(r13); \ + ld r12,PACALPPACA+LPPACASRR1(r13); \ + ori r10,r10,MSR_RI; \ + mtmsrd r10,1 + +/* + * The common exception prolog is used for all except a few exceptions + * such as a segment miss on a kernel address. We have to be prepared + * to take another exception from the point where we first touch the + * kernel stack onwards. + * + * On entry r13 points to the paca, r9-r13 are saved in the paca, + * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and + * SRR1, and relocation is on. + */ +#define EXCEPTION_PROLOG_COMMON(n, area) \ + andi. r10,r12,MSR_PR; /* See if coming from user */ \ + mr r10,r1; /* Save r1 */ \ + subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \ + beq- 1f; \ + ld r1,PACAKSAVE(r13); /* kernel stack to use */ \ +1: cmpdi cr1,r1,0; /* check if r1 is in userspace */ \ + bge- cr1,bad_stack; /* abort if it is */ \ + std r9,_CCR(r1); /* save CR in stackframe */ \ + std r11,_NIP(r1); /* save SRR0 in stackframe */ \ + std r12,_MSR(r1); /* save SRR1 in stackframe */ \ + std r10,0(r1); /* make stack chain pointer */ \ + std r0,GPR0(r1); /* save r0 in stackframe */ \ + std r10,GPR1(r1); /* save r1 in stackframe */ \ + std r2,GPR2(r1); /* save r2 in stackframe */ \ + SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ + SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ + ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \ + ld r10,area+EX_R10(r13); \ + std r9,GPR9(r1); \ + std r10,GPR10(r1); \ + ld r9,area+EX_R11(r13); /* move r11 - r13 to stackframe */ \ + ld r10,area+EX_R12(r13); \ + ld r11,area+EX_R13(r13); \ + std r9,GPR11(r1); \ + std r10,GPR12(r1); \ + std r11,GPR13(r1); \ + ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ + mflr r9; /* save LR in stackframe */ \ + std r9,_LINK(r1); \ + mfctr r10; /* save CTR in stackframe */ \ + std r10,_CTR(r1); \ + mfspr r11,XER; /* save XER in stackframe */ \ + std r11,_XER(r1); \ + li r9,(n)+1; \ + std r9,_TRAP(r1); /* set trap number */ \ + li r10,0; \ + ld r11,exception_marker@toc(r2); \ + std r10,RESULT(r1); /* clear regs->result */ \ + std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ + +/* + * Exception vectors. + */ +#define STD_EXCEPTION_PSERIES(n, label) \ + . = n; \ + .globl label##_pSeries; \ +label##_pSeries: \ + HMT_MEDIUM; \ + mtspr SPRG1,r13; /* save r13 */ \ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) + +#define STD_EXCEPTION_ISERIES(n, label, area) \ + .globl label##_iSeries; \ +label##_iSeries: \ + HMT_MEDIUM; \ + mtspr SPRG1,r13; /* save r13 */ \ + EXCEPTION_PROLOG_ISERIES_1(area); \ + EXCEPTION_PROLOG_ISERIES_2; \ + b label##_common + +#define MASKABLE_EXCEPTION_ISERIES(n, label) \ + .globl label##_iSeries; \ +label##_iSeries: \ + HMT_MEDIUM; \ + mtspr SPRG1,r13; /* save r13 */ \ + EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \ + lbz r10,PACAPROCENABLED(r13); \ + cmpwi 0,r10,0; \ + beq- label##_iSeries_masked; \ + EXCEPTION_PROLOG_ISERIES_2; \ + b label##_common; \ + +#ifdef DO_SOFT_DISABLE +#define DISABLE_INTS \ + lbz r10,PACAPROCENABLED(r13); \ + li r11,0; \ + std r10,SOFTE(r1); \ + mfmsr r10; \ + stb r11,PACAPROCENABLED(r13); \ + ori r10,r10,MSR_EE; \ + mtmsrd r10,1 + +#define ENABLE_INTS \ + lbz r10,PACAPROCENABLED(r13); \ + mfmsr r11; \ + std r10,SOFTE(r1); \ + ori r11,r11,MSR_EE; \ + mtmsrd r11,1 + +#else /* hard enable/disable interrupts */ +#define DISABLE_INTS + +#define ENABLE_INTS \ + ld r12,_MSR(r1); \ + mfmsr r11; \ + rlwimi r11,r12,0,MSR_EE; \ + mtmsrd r11,1 + +#endif + +#define STD_EXCEPTION_COMMON(trap, label, hdlr) \ + .align 7; \ + .globl label##_common; \ +label##_common: \ + EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + DISABLE_INTS; \ + bl .save_nvgprs; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except + +#define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr) \ + .align 7; \ + .globl label##_common; \ +label##_common: \ + EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + DISABLE_INTS; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except_lite + +/* + * Start of pSeries system interrupt routines + */ + . = 0x100 + .globl __start_interrupts +__start_interrupts: + + STD_EXCEPTION_PSERIES(0x100, system_reset) + + . = 0x200 +_machine_check_pSeries: + HMT_MEDIUM + mtspr SPRG1,r13 /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + + . = 0x300 + .globl data_access_pSeries +data_access_pSeries: + HMT_MEDIUM + mtspr SPRG1,r13 +BEGIN_FTR_SECTION + mtspr SPRG2,r12 + mfspr r13,DAR + mfspr r12,DSISR + srdi r13,r13,60 + rlwimi r13,r12,16,0x20 + mfcr r12 + cmpwi r13,0x2c + beq .do_stab_bolted_pSeries + mtcrf 0x80,r12 + mfspr r12,SPRG2 +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common) + + . = 0x380 + .globl data_access_slb_pSeries +data_access_slb_pSeries: + HMT_MEDIUM + mtspr SPRG1,r13 + mfspr r13,SPRG3 /* get paca address into r13 */ + std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r9,SPRG1 + std r9,PACA_EXSLB+EX_R13(r13) + mfcr r9 + mfspr r12,SRR1 /* and SRR1 */ + mfspr r3,DAR + b .do_slb_miss /* Rel. branch works in real mode */ + + STD_EXCEPTION_PSERIES(0x400, instruction_access) + + . = 0x480 + .globl instruction_access_slb_pSeries +instruction_access_slb_pSeries: + HMT_MEDIUM + mtspr SPRG1,r13 + mfspr r13,SPRG3 /* get paca address into r13 */ + std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r9,SPRG1 + std r9,PACA_EXSLB+EX_R13(r13) + mfcr r9 + mfspr r12,SRR1 /* and SRR1 */ + mfspr r3,SRR0 /* SRR0 is faulting address */ + b .do_slb_miss /* Rel. branch works in real mode */ + + STD_EXCEPTION_PSERIES(0x500, hardware_interrupt) + STD_EXCEPTION_PSERIES(0x600, alignment) + STD_EXCEPTION_PSERIES(0x700, program_check) + STD_EXCEPTION_PSERIES(0x800, fp_unavailable) + STD_EXCEPTION_PSERIES(0x900, decrementer) + STD_EXCEPTION_PSERIES(0xa00, trap_0a) + STD_EXCEPTION_PSERIES(0xb00, trap_0b) + + . = 0xc00 + .globl system_call_pSeries +system_call_pSeries: + HMT_MEDIUM + mr r9,r13 + mfmsr r10 + mfspr r13,SPRG3 + mfspr r11,SRR0 + clrrdi r12,r13,32 + oris r12,r12,system_call_common@h + ori r12,r12,system_call_common@l + mtspr SRR0,r12 + ori r10,r10,MSR_IR|MSR_DR|MSR_RI + mfspr r12,SRR1 + mtspr SRR1,r10 + rfid + b . /* prevent speculative execution */ + + STD_EXCEPTION_PSERIES(0xd00, single_step) + STD_EXCEPTION_PSERIES(0xe00, trap_0e) + + /* We need to deal with the Altivec unavailable exception + * here which is at 0xf20, thus in the middle of the + * prolog code of the PerformanceMonitor one. A little + * trickery is thus necessary + */ + . = 0xf00 + b performance_monitor_pSeries + + STD_EXCEPTION_PSERIES(0xf20, altivec_unavailable) + + STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) + STD_EXCEPTION_PSERIES(0x1700, altivec_assist) + + /* moved from 0xf00 */ + STD_EXCEPTION_PSERIES(0x3000, performance_monitor) + + . = 0x3100 +_GLOBAL(do_stab_bolted_pSeries) + mtcrf 0x80,r12 + mfspr r12,SPRG2 + EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) + + + /* Space for the naca. Architected to be located at real address + * NACA_PHYS_ADDR. Various tools rely on this location being fixed. + * The first dword of the naca is required by iSeries LPAR to + * point to itVpdAreas. On pSeries native, this value is not used. + */ + . = NACA_PHYS_ADDR + .globl __end_interrupts +__end_interrupts: +#ifdef CONFIG_PPC_ISERIES + .globl naca +naca: + .llong itVpdAreas + + /* + * The iSeries LPAR map is at this fixed address + * so that the HvReleaseData structure can address + * it with a 32-bit offset. + * + * The VSID values below are dependent on the + * VSID generation algorithm. See include/asm/mmu_context.h. + */ + + . = 0x4800 + + .llong 2 /* # ESIDs to be mapped by hypervisor */ + .llong 1 /* # memory ranges to be mapped by hypervisor */ + .llong STAB0_PAGE /* Page # of segment table within load area */ + .llong 0 /* Reserved */ + .llong 0 /* Reserved */ + .llong 0 /* Reserved */ + .llong 0 /* Reserved */ + .llong 0 /* Reserved */ + .llong (KERNELBASE>>SID_SHIFT) + .llong 0x408f92c94 /* KERNELBASE VSID */ + /* We have to list the bolted VMALLOC segment here, too, so that it + * will be restored on shared processor switch */ + .llong (VMALLOCBASE>>SID_SHIFT) + .llong 0xf09b89af5 /* VMALLOCBASE VSID */ + .llong 8192 /* # pages to map (32 MB) */ + .llong 0 /* Offset from start of loadarea to start of map */ + .llong 0x408f92c940000 /* VPN of first page to map */ + + . = 0x6100 + +/*** ISeries-LPAR interrupt handlers ***/ + + STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC) + + .globl data_access_iSeries +data_access_iSeries: + mtspr SPRG1,r13 +BEGIN_FTR_SECTION + mtspr SPRG2,r12 + mfspr r13,DAR + mfspr r12,DSISR + srdi r13,r13,60 + rlwimi r13,r12,16,0x20 + mfcr r12 + cmpwi r13,0x2c + beq .do_stab_bolted_iSeries + mtcrf 0x80,r12 + mfspr r12,SPRG2 +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN) + EXCEPTION_PROLOG_ISERIES_2 + b data_access_common + +.do_stab_bolted_iSeries: + mtcrf 0x80,r12 + mfspr r12,SPRG2 + EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + EXCEPTION_PROLOG_ISERIES_2 + b .do_stab_bolted + + .globl data_access_slb_iSeries +data_access_slb_iSeries: + mtspr SPRG1,r13 /* save r13 */ + EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + std r3,PACA_EXSLB+EX_R3(r13) + ld r12,PACALPPACA+LPPACASRR1(r13) + mfspr r3,DAR + b .do_slb_miss + + STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN) + + .globl instruction_access_slb_iSeries +instruction_access_slb_iSeries: + mtspr SPRG1,r13 /* save r13 */ + EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + std r3,PACA_EXSLB+EX_R3(r13) + ld r12,PACALPPACA+LPPACASRR1(r13) + ld r3,PACALPPACA+LPPACASRR0(r13) + b .do_slb_miss + + MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt) + STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN) + STD_EXCEPTION_ISERIES(0x700, program_check, PACA_EXGEN) + STD_EXCEPTION_ISERIES(0x800, fp_unavailable, PACA_EXGEN) + MASKABLE_EXCEPTION_ISERIES(0x900, decrementer) + STD_EXCEPTION_ISERIES(0xa00, trap_0a, PACA_EXGEN) + STD_EXCEPTION_ISERIES(0xb00, trap_0b, PACA_EXGEN) + + .globl system_call_iSeries +system_call_iSeries: + mr r9,r13 + mfspr r13,SPRG3 + EXCEPTION_PROLOG_ISERIES_2 + b system_call_common + + STD_EXCEPTION_ISERIES( 0xd00, single_step, PACA_EXGEN) + STD_EXCEPTION_ISERIES( 0xe00, trap_0e, PACA_EXGEN) + STD_EXCEPTION_ISERIES( 0xf00, performance_monitor, PACA_EXGEN) + + .globl system_reset_iSeries +system_reset_iSeries: + mfspr r13,SPRG3 /* Get paca address */ + mfmsr r24 + ori r24,r24,MSR_RI + mtmsrd r24 /* RI on */ + lhz r24,PACAPACAINDEX(r13) /* Get processor # */ + cmpwi 0,r24,0 /* Are we processor 0? */ + beq .__start_initialization_iSeries /* Start up the first processor */ + mfspr r4,CTRLF + li r5,RUNLATCH /* Turn off the run light */ + andc r4,r4,r5 + mtspr CTRLT,r4 + +1: + HMT_LOW +#ifdef CONFIG_SMP + lbz r23,PACAPROCSTART(r13) /* Test if this processor + * should start */ + sync + LOADADDR(r3,current_set) + sldi r28,r24,3 /* get current_set[cpu#] */ + ldx r3,r3,r28 + addi r1,r3,THREAD_SIZE + subi r1,r1,STACK_FRAME_OVERHEAD + + cmpwi 0,r23,0 + beq iSeries_secondary_smp_loop /* Loop until told to go */ +#ifdef SECONDARY_PROCESSORS + bne .__secondary_start /* Loop until told to go */ +#endif +iSeries_secondary_smp_loop: + /* Let the Hypervisor know we are alive */ + /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ + lis r3,0x8002 + rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */ +#else /* CONFIG_SMP */ + /* Yield the processor. This is required for non-SMP kernels + which are running on multi-threaded machines. */ + lis r3,0x8000 + rldicr r3,r3,32,15 /* r3 = (r3 << 32) & 0xffff000000000000 */ + addi r3,r3,18 /* r3 = 0x8000000000000012 which is "yield" */ + li r4,0 /* "yield timed" */ + li r5,-1 /* "yield forever" */ +#endif /* CONFIG_SMP */ + li r0,-1 /* r0=-1 indicates a Hypervisor call */ + sc /* Invoke the hypervisor via a system call */ + mfspr r13,SPRG3 /* Put r13 back ???? */ + b 1b /* If SMP not configured, secondaries + * loop forever */ + + .globl decrementer_iSeries_masked +decrementer_iSeries_masked: + li r11,1 + stb r11,PACALPPACA+LPPACADECRINT(r13) + lwz r12,PACADEFAULTDECR(r13) + mtspr SPRN_DEC,r12 + /* fall through */ + + .globl hardware_interrupt_iSeries_masked +hardware_interrupt_iSeries_masked: + mtcrf 0x80,r9 /* Restore regs */ + ld r11,PACALPPACA+LPPACASRR0(r13) + ld r12,PACALPPACA+LPPACASRR1(r13) + mtspr SRR0,r11 + mtspr SRR1,r12 + ld r9,PACA_EXGEN+EX_R9(r13) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + rfid + b . /* prevent speculative execution */ +#endif + +/* + * Data area reserved for FWNMI option. + */ + .= 0x7000 + .globl fwnmi_data_area +fwnmi_data_area: + +/* + * Vectors for the FWNMI option. Share common code. + */ + . = 0x8000 + .globl system_reset_fwnmi +system_reset_fwnmi: + HMT_MEDIUM + mtspr SPRG1,r13 /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) + .globl machine_check_fwnmi +machine_check_fwnmi: + HMT_MEDIUM + mtspr SPRG1,r13 /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + + /* + * Space for the initial segment table + * For LPAR, the hypervisor must fill in at least one entry + * before we get control (with relocate on) + */ + . = STAB0_PHYS_ADDR + .globl __start_stab +__start_stab: + + . = (STAB0_PHYS_ADDR + PAGE_SIZE) + .globl __end_stab +__end_stab: + + +/*** Common interrupt handlers ***/ + + STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) + + /* + * Machine check is different because we use a different + * save area: PACA_EXMC instead of PACA_EXGEN. + */ + .align 7 + .globl machine_check_common +machine_check_common: + EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) + DISABLE_INTS + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .machine_check_exception + b .ret_from_except + + STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt) + STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) + STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) + STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) + STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) + STD_EXCEPTION_COMMON(0xf00, performance_monitor, .performance_monitor_exception) + STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) +#ifdef CONFIG_ALTIVEC + STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) +#else + STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception) +#endif + +/* + * Here we have detected that the kernel stack pointer is bad. + * R9 contains the saved CR, r13 points to the paca, + * r10 contains the (bad) kernel stack pointer, + * r11 and r12 contain the saved SRR0 and SRR1. + * We switch to using the paca guard page as an emergency stack, + * save the registers there, and call kernel_bad_stack(), which panics. + */ +bad_stack: + ld r1,PACAEMERGSP(r13) + subi r1,r1,64+INT_FRAME_SIZE + std r9,_CCR(r1) + std r10,GPR1(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + mfspr r11,DAR + mfspr r12,DSISR + std r11,_DAR(r1) + std r12,_DSISR(r1) + mflr r10 + mfctr r11 + mfxer r12 + std r10,_LINK(r1) + std r11,_CTR(r1) + std r12,_XER(r1) + SAVE_GPR(0,r1) + SAVE_GPR(2,r1) + SAVE_4GPRS(3,r1) + SAVE_2GPRS(7,r1) + SAVE_10GPRS(12,r1) + SAVE_10GPRS(22,r1) + addi r11,r1,INT_FRAME_SIZE + std r11,0(r1) + li r12,0 + std r12,0(r11) + ld r2,PACATOC(r13) +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .kernel_bad_stack + b 1b + +/* + * Return from an exception with minimal checks. + * The caller is assumed to have done EXCEPTION_PROLOG_COMMON. + * If interrupts have been enabled, or anything has been + * done that might have changed the scheduling status of + * any task or sent any task a signal, you should use + * ret_from_except or ret_from_except_lite instead of this. + */ +fast_exception_return: + ld r12,_MSR(r1) + ld r11,_NIP(r1) + andi. r3,r12,MSR_RI /* check if RI is set */ + beq- unrecov_fer + ld r3,_CCR(r1) + ld r4,_LINK(r1) + ld r5,_CTR(r1) + ld r6,_XER(r1) + mtcr r3 + mtlr r4 + mtctr r5 + mtxer r6 + REST_GPR(0, r1) + REST_8GPRS(2, r1) + + mfmsr r10 + clrrdi r10,r10,2 /* clear RI (LE is 0 already) */ + mtmsrd r10,1 + + mtspr SRR1,r12 + mtspr SRR0,r11 + REST_4GPRS(10, r1) + ld r1,GPR1(r1) + rfid + b . /* prevent speculative execution */ + +unrecov_fer: + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + +/* + * Here r13 points to the paca, r9 contains the saved CR, + * SRR0 and SRR1 are saved in r11 and r12, + * r9 - r13 are saved in paca->exgen. + */ + .align 7 + .globl data_access_common +data_access_common: + mfspr r10,DAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,DSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) + ld r3,PACA_EXGEN+EX_DAR(r13) + lwz r4,PACA_EXGEN+EX_DSISR(r13) + li r5,0x300 + b .do_hash_page /* Try to handle as hpte fault */ + + .align 7 + .globl instruction_access_common +instruction_access_common: + EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) + ld r3,_NIP(r1) + andis. r4,r12,0x5820 + li r5,0x400 + b .do_hash_page /* Try to handle as hpte fault */ + + .align 7 + .globl hardware_interrupt_common + .globl hardware_interrupt_entry +hardware_interrupt_common: + EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN) +hardware_interrupt_entry: + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_IRQ + b .ret_from_except_lite + + .align 7 + .globl alignment_common +alignment_common: + mfspr r10,DAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,DSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN) + ld r3,PACA_EXGEN+EX_DAR(r13) + lwz r4,PACA_EXGEN+EX_DSISR(r13) + std r3,_DAR(r1) + std r4,_DSISR(r1) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS + bl .alignment_exception + b .ret_from_except + + .align 7 + .globl program_check_common +program_check_common: + EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS + bl .program_check_exception + b .ret_from_except + + .align 7 + .globl fp_unavailable_common +fp_unavailable_common: + EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) + bne .load_up_fpu /* if from user, just load it up */ + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS + bl .kernel_fp_unavailable_exception + BUG_OPCODE + + .align 7 + .globl altivec_unavailable_common +altivec_unavailable_common: + EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) +#ifdef CONFIG_ALTIVEC + bne .load_up_altivec /* if from user, just load it up */ +#endif + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS + bl .altivec_unavailable_exception + b .ret_from_except + +/* + * Hash table stuff + */ + .align 7 +_GLOBAL(do_hash_page) + std r3,_DAR(r1) + std r4,_DSISR(r1) + + andis. r0,r4,0xa450 /* weird error? */ + bne- .handle_page_fault /* if not, try to insert a HPTE */ +BEGIN_FTR_SECTION + andis. r0,r4,0x0020 /* Is it a segment table fault? */ + bne- .do_ste_alloc /* If so handle it */ +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + + /* + * We need to set the _PAGE_USER bit if MSR_PR is set or if we are + * accessing a userspace segment (even from the kernel). We assume + * kernel addresses always have the high bit set. + */ + rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */ + rotldi r0,r3,15 /* Move high bit into MSR_PR posn */ + orc r0,r12,r0 /* MSR_PR | ~high_bit */ + rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */ + ori r4,r4,1 /* add _PAGE_PRESENT */ + rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */ + + /* + * On iSeries, we soft-disable interrupts here, then + * hard-enable interrupts so that the hash_page code can spin on + * the hash_table_lock without problems on a shared processor. + */ + DISABLE_INTS + + /* + * r3 contains the faulting address + * r4 contains the required access permissions + * r5 contains the trap number + * + * at return r3 = 0 for success + */ + bl .hash_page /* build HPTE if possible */ + cmpdi r3,0 /* see if hash_page succeeded */ + +#ifdef DO_SOFT_DISABLE + /* + * If we had interrupts soft-enabled at the point where the + * DSI/ISI occurred, and an interrupt came in during hash_page, + * handle it now. + * We jump to ret_from_except_lite rather than fast_exception_return + * because ret_from_except_lite will check for and handle pending + * interrupts if necessary. + */ + beq .ret_from_except_lite + /* For a hash failure, we don't bother re-enabling interrupts */ + ble- 12f + + /* + * hash_page couldn't handle it, set soft interrupt enable back + * to what it was before the trap. Note that .local_irq_restore + * handles any interrupts pending at this point. + */ + ld r3,SOFTE(r1) + bl .local_irq_restore + b 11f +#else + beq fast_exception_return /* Return from exception on success */ + ble- 12f /* Failure return from hash_page */ + + /* fall through */ +#endif + +/* Here we have a page fault that hash_page can't handle. */ +_GLOBAL(handle_page_fault) + ENABLE_INTS +11: ld r4,_DAR(r1) + ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_page_fault + cmpdi r3,0 + beq+ .ret_from_except_lite + bl .save_nvgprs + mr r5,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + lwz r4,_DAR(r1) + bl .bad_page_fault + b .ret_from_except + +/* We have a page fault that hash_page could handle but HV refused + * the PTE insertion + */ +12: bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + lwz r4,_DAR(r1) + bl .low_hash_fault + b .ret_from_except + + /* here we have a segment miss */ +_GLOBAL(do_ste_alloc) + bl .ste_allocate /* try to insert stab entry */ + cmpdi r3,0 + beq+ fast_exception_return + b .handle_page_fault + +/* + * r13 points to the PACA, r9 contains the saved CR, + * r11 and r12 contain the saved SRR0 and SRR1. + * r9 - r13 are saved in paca->exslb. + * We assume we aren't going to take any exceptions during this procedure. + * We assume (DAR >> 60) == 0xc. + */ + .align 7 +_GLOBAL(do_stab_bolted) + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ + + /* Hash to the primary group */ + ld r10,PACASTABVIRT(r13) + mfspr r11,DAR + srdi r11,r11,28 + rldimi r10,r11,7,52 /* r10 = first ste of the group */ + + /* Calculate VSID */ + /* This is a kernel address, so protovsid = ESID */ + ASM_VSID_SCRAMBLE(r11, r9) + rldic r9,r11,12,16 /* r9 = vsid << 12 */ + + /* Search the primary group for a free entry */ +1: ld r11,0(r10) /* Test valid bit of the current ste */ + andi. r11,r11,0x80 + beq 2f + addi r10,r10,16 + andi. r11,r10,0x70 + bne 1b + + /* Stick for only searching the primary group for now. */ + /* At least for now, we use a very simple random castout scheme */ + /* Use the TB as a random number ; OR in 1 to avoid entry 0 */ + mftb r11 + rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */ + ori r11,r11,0x10 + + /* r10 currently points to an ste one past the group of interest */ + /* make it point to the randomly selected entry */ + subi r10,r10,128 + or r10,r10,r11 /* r10 is the entry to invalidate */ + + isync /* mark the entry invalid */ + ld r11,0(r10) + rldicl r11,r11,56,1 /* clear the valid bit */ + rotldi r11,r11,8 + std r11,0(r10) + sync + + clrrdi r11,r11,28 /* Get the esid part of the ste */ + slbie r11 + +2: std r9,8(r10) /* Store the vsid part of the ste */ + eieio + + mfspr r11,DAR /* Get the new esid */ + clrrdi r11,r11,28 /* Permits a full 32b of ESID */ + ori r11,r11,0x90 /* Turn on valid and kp */ + std r11,0(r10) /* Put new entry back into the stab */ + + sync + + /* All done -- return from exception. */ + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ + ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */ + + andi. r10,r12,MSR_RI + beq- unrecov_slb + + mtcrf 0x80,r9 /* restore CR */ + + mfmsr r10 + clrrdi r10,r10,2 + mtmsrd r10,1 + + mtspr SRR0,r11 + mtspr SRR1,r12 + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +/* + * r13 points to the PACA, r9 contains the saved CR, + * r11 and r12 contain the saved SRR0 and SRR1. + * r3 has the faulting address + * r9 - r13 are saved in paca->exslb. + * r3 is saved in paca->slb_r3 + * We assume we aren't going to take any exceptions during this procedure. + */ +_GLOBAL(do_slb_miss) + mflr r10 + + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + + bl .slb_allocate /* handle it */ + + /* All done -- return from exception. */ + + ld r10,PACA_EXSLB+EX_LR(r13) + ld r3,PACA_EXSLB+EX_R3(r13) + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ +#ifdef CONFIG_PPC_ISERIES + ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ +#endif /* CONFIG_PPC_ISERIES */ + + mtlr r10 + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- unrecov_slb + +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + +#ifdef CONFIG_PPC_ISERIES + mtspr SRR0,r11 + mtspr SRR1,r12 +#endif /* CONFIG_PPC_ISERIES */ + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +unrecov_slb: + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + + +/* + * On pSeries, secondary processors spin in the following code. + * At entry, r3 = this processor's number (physical cpu id) + */ +_GLOBAL(pSeries_secondary_smp_init) + mr r24,r3 + + /* turn on 64-bit mode */ + bl .enable_64b_mode + isync + + /* Copy some CPU settings from CPU 0 */ + bl .__restore_cpu_setup + + /* Set up a paca value for this processor. Since we have the + * physical cpu id in r3, we need to search the pacas to find + * which logical id maps to our physical one. + */ + LOADADDR(r13, paca) /* Get base vaddr of paca array */ + li r5,0 /* logical cpu id */ +1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ + cmpw r6,r24 /* Compare to our id */ + beq 2f + addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */ + addi r5,r5,1 + cmpwi r5,NR_CPUS + blt 1b + +99: HMT_LOW /* Couldn't find our CPU id */ + b 99b + +2: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ + /* From now on, r24 is expected to be logica cpuid */ + mr r24,r5 +3: HMT_LOW + lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ + /* start. */ + sync + + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + + cmpwi 0,r23,0 +#ifdef CONFIG_SMP +#ifdef SECONDARY_PROCESSORS + bne .__secondary_start +#endif +#endif + b 3b /* Loop until told to go */ + +#ifdef CONFIG_PPC_ISERIES +_STATIC(__start_initialization_iSeries) + /* Clear out the BSS */ + LOADADDR(r11,__bss_stop) + LOADADDR(r8,__bss_start) + sub r11,r11,r8 /* bss size */ + addi r11,r11,7 /* round up to an even double word */ + rldicl. r11,r11,61,3 /* shift right by 3 */ + beq 4f + addi r8,r8,-8 + li r0,0 + mtctr r11 /* zero this many doublewords */ +3: stdu r0,8(r8) + bdnz 3b +4: + LOADADDR(r1,init_thread_union) + addi r1,r1,THREAD_SIZE + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + + LOADADDR(r3,cpu_specs) + LOADADDR(r4,cur_cpu_spec) + li r5,0 + bl .identify_cpu + + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + + bl .iSeries_early_setup + + /* relocation is on at this point */ + + b .start_here_common +#endif /* CONFIG_PPC_ISERIES */ + +#ifdef CONFIG_PPC_MULTIPLATFORM + +_STATIC(__mmu_off) + mfmsr r3 + andi. r0,r3,MSR_IR|MSR_DR + beqlr + andc r3,r3,r0 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + sync + rfid + b . /* prevent speculative execution */ + + +/* + * Here is our main kernel entry point. We support currently 2 kind of entries + * depending on the value of r5. + * + * r5 != NULL -> OF entry, we go to prom_init, "legacy" parameter content + * in r3...r7 + * + * r5 == NULL -> kexec style entry. r3 is a physical pointer to the + * DT block, r4 is a physical pointer to the kernel itself + * + */ +_GLOBAL(__start_initialization_multiplatform) + /* + * Are we booted from a PROM Of-type client-interface ? + */ + cmpldi cr0,r5,0 + bne .__boot_from_prom /* yes -> prom */ + + /* Save parameters */ + mr r31,r3 + mr r30,r4 + + /* Make sure we are running in 64 bits mode */ + bl .enable_64b_mode + + /* Setup some critical 970 SPRs before switching MMU off */ + bl .__970_cpu_preinit + + /* cpu # */ + li r24,0 + + /* Switch off MMU if not already */ + LOADADDR(r4, .__after_prom_start - KERNELBASE) + add r4,r4,r30 + bl .__mmu_off + b .__after_prom_start + +_STATIC(__boot_from_prom) + /* Save parameters */ + mr r31,r3 + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + + /* Make sure we are running in 64 bits mode */ + bl .enable_64b_mode + + /* put a relocation offset into r3 */ + bl .reloc_offset + + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + + /* Relocate the TOC from a virt addr to a real addr */ + sub r2,r2,r3 + + /* Restore parameters */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + + /* Do all of the interaction with OF client interface */ + bl .prom_init + /* We never return */ + trap + +/* + * At this point, r3 contains the physical address we are running at, + * returned by prom_init() + */ +_STATIC(__after_prom_start) + +/* + * We need to run with __start at physical address 0. + * This will leave some code in the first 256B of + * real memory, which are reserved for software use. + * The remainder of the first page is loaded with the fixed + * interrupt vectors. The next two pages are filled with + * unknown exception placeholders. + * + * Note: This process overwrites the OF exception vectors. + * r26 == relocation offset + * r27 == KERNELBASE + */ + bl .reloc_offset + mr r26,r3 + SET_REG_TO_CONST(r27,KERNELBASE) + + li r3,0 /* target addr */ + + // XXX FIXME: Use phys returned by OF (r30) + sub r4,r27,r26 /* source addr */ + /* current address of _start */ + /* i.e. where we are running */ + /* the source addr */ + + LOADADDR(r5,copy_to_here) /* # bytes of memory to copy */ + sub r5,r5,r27 + + li r6,0x100 /* Start offset, the first 0x100 */ + /* bytes were copied earlier. */ + + bl .copy_and_flush /* copy the first n bytes */ + /* this includes the code being */ + /* executed here. */ + + LOADADDR(r0, 4f) /* Jump to the copy of this code */ + mtctr r0 /* that we just made/relocated */ + bctr + +4: LOADADDR(r5,klimit) + sub r5,r5,r26 + ld r5,0(r5) /* get the value of klimit */ + sub r5,r5,r27 + bl .copy_and_flush /* copy the rest */ + b .start_here_multiplatform + +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +/* + * Copy routine used to copy the kernel to start at physical address 0 + * and flush and invalidate the caches as needed. + * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset + * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. + * + * Note: this routine *only* clobbers r0, r6 and lr + */ +_GLOBAL(copy_and_flush) + addi r5,r5,-8 + addi r6,r6,-8 +4: li r0,16 /* Use the least common */ + /* denominator cache line */ + /* size. This results in */ + /* extra cache line flushes */ + /* but operation is correct. */ + /* Can't get cache line size */ + /* from NACA as it is being */ + /* moved too. */ + + mtctr r0 /* put # words/line in ctr */ +3: addi r6,r6,8 /* copy a cache line */ + ldx r0,r6,r4 + stdx r0,r6,r3 + bdnz 3b + dcbst r6,r3 /* write it to memory */ + sync + icbi r6,r3 /* flush the icache line */ + cmpld 0,r6,r5 + blt 4b + sync + addi r5,r5,8 + addi r6,r6,8 + blr + +.align 8 +copy_to_here: + +/* + * load_up_fpu(unused, unused, tsk) + * Disable FP for the task which had the FPU previously, + * and save its floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + * On SMP we know the fpu is free, since we give it up every + * switch (ie, no lazy save of the FP registers). + * On entry: r13 == 'current' && last_task_used_math != 'current' + */ +_STATIC(load_up_fpu) + mfmsr r5 /* grab the current MSR */ + ori r5,r5,MSR_FP + mtmsrd r5 /* enable use of fpu now */ + isync +/* + * For SMP, we don't do lazy FPU switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_fpu in switch_to. + * + */ +#ifndef CONFIG_SMP + ld r3,last_task_used_math@got(r2) + ld r4,0(r3) + cmpdi 0,r4,0 + beq 1f + /* Save FP state to last_task_used_math's THREAD struct */ + addi r4,r4,THREAD + SAVE_32FPRS(0, r4) + mffs fr0 + stfd fr0,THREAD_FPSCR(r4) + /* Disable FP for last_task_used_math */ + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r6,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r6 + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of FP after return */ + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + ld r4,THREAD_FPEXC_MODE(r5) + ori r12,r12,MSR_FP + or r12,r12,r4 + std r12,_MSR(r1) + lfd fr0,THREAD_FPSCR(r5) + mtfsf 0xff,fr0 + REST_32FPRS(0, r5) +#ifndef CONFIG_SMP + /* Update last_task_used_math to 'current' */ + subi r4,r5,THREAD /* Back to 'current' */ + std r4,0(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + b fast_exception_return + +/* + * disable_kernel_fp() + * Disable the FPU. + */ +_GLOBAL(disable_kernel_fp) + mfmsr r3 + rldicl r0,r3,(63-MSR_FP_LG),1 + rldicl r3,r0,(MSR_FP_LG+1),0 + mtmsrd r3 /* disable use of fpu now */ + isync + blr + +/* + * giveup_fpu(tsk) + * Disable FP for the task given as the argument, + * and save the floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + */ +_GLOBAL(giveup_fpu) + mfmsr r5 + ori r5,r5,MSR_FP + mtmsrd r5 /* enable use of fpu now */ + isync + cmpdi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + ld r5,PT_REGS(r3) + cmpdi 0,r5,0 + SAVE_32FPRS(0, r3) + mffs fr0 + stfd fr0,THREAD_FPSCR(r3) + beq 1f + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r3,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r3 /* disable FP for previous task */ + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + ld r4,last_task_used_math@got(r2) + std r5,0(r4) +#endif /* CONFIG_SMP */ + blr + + +#ifdef CONFIG_ALTIVEC + +/* + * load_up_altivec(unused, unused, tsk) + * Disable VMX for the task which had it previously, + * and save its vector registers in its thread_struct. + * Enables the VMX for use in the kernel on return. + * On SMP we know the VMX is free, since we give it up every + * switch (ie, no lazy save of the vector registers). + * On entry: r13 == 'current' && last_task_used_altivec != 'current' + */ +_STATIC(load_up_altivec) + mfmsr r5 /* grab the current MSR */ + oris r5,r5,MSR_VEC@h + mtmsrd r5 /* enable use of VMX now */ + isync + +/* + * For SMP, we don't do lazy VMX switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_altvec in switch_to. + * VRSAVE isn't dealt with here, that is done in the normal context + * switch code. Note that we could rely on vrsave value to eventually + * avoid saving all of the VREGs here... + */ +#ifndef CONFIG_SMP + ld r3,last_task_used_altivec@got(r2) + ld r4,0(r3) + cmpdi 0,r4,0 + beq 1f + /* Save VMX state to last_task_used_altivec's THREAD struct */ + addi r4,r4,THREAD + SAVE_32VRS(0,r5,r4) + mfvscr vr0 + li r10,THREAD_VSCR + stvx vr0,r10,r4 + /* Disable VMX for last_task_used_altivec */ + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r6,MSR_VEC@h + andc r4,r4,r6 + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* Hack: if we get an altivec unavailable trap with VRSAVE + * set to all zeros, we assume this is a broken application + * that fails to set it properly, and thus we switch it to + * all 1's + */ + mfspr r4,SPRN_VRSAVE + cmpdi 0,r4,0 + bne+ 1f + li r4,-1 + mtspr SPRN_VRSAVE,r4 +1: + /* enable use of VMX after return */ + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + oris r12,r12,MSR_VEC@h + std r12,_MSR(r1) + li r4,1 + li r10,THREAD_VSCR + stw r4,THREAD_USED_VR(r5) + lvx vr0,r10,r5 + mtvscr vr0 + REST_32VRS(0,r4,r5) +#ifndef CONFIG_SMP + /* Update last_task_used_math to 'current' */ + subi r4,r5,THREAD /* Back to 'current' */ + std r4,0(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + b fast_exception_return + +/* + * disable_kernel_altivec() + * Disable the VMX. + */ +_GLOBAL(disable_kernel_altivec) + mfmsr r3 + rldicl r0,r3,(63-MSR_VEC_LG),1 + rldicl r3,r0,(MSR_VEC_LG+1),0 + mtmsrd r3 /* disable use of VMX now */ + isync + blr + +/* + * giveup_altivec(tsk) + * Disable VMX for the task given as the argument, + * and save the vector registers in its thread_struct. + * Enables the VMX for use in the kernel on return. + */ +_GLOBAL(giveup_altivec) + mfmsr r5 + oris r5,r5,MSR_VEC@h + mtmsrd r5 /* enable use of VMX now */ + isync + cmpdi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + ld r5,PT_REGS(r3) + cmpdi 0,r5,0 + SAVE_32VRS(0,r4,r3) + mfvscr vr0 + li r4,THREAD_VSCR + stvx vr0,r4,r3 + beq 1f + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r3,MSR_VEC@h + andc r4,r4,r3 /* disable FP for previous task */ + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + ld r4,last_task_used_altivec@got(r2) + std r5,0(r4) +#endif /* CONFIG_SMP */ + blr + +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_SMP +#ifdef CONFIG_PPC_PMAC +/* + * On PowerMac, secondary processors starts from the reset vector, which + * is temporarily turned into a call to one of the functions below. + */ + .section ".text"; + .align 2 ; + + .globl pmac_secondary_start_1 +pmac_secondary_start_1: + li r24, 1 + b .pmac_secondary_start + + .globl pmac_secondary_start_2 +pmac_secondary_start_2: + li r24, 2 + b .pmac_secondary_start + + .globl pmac_secondary_start_3 +pmac_secondary_start_3: + li r24, 3 + b .pmac_secondary_start + +_GLOBAL(pmac_secondary_start) + /* turn on 64-bit mode */ + bl .enable_64b_mode + isync + + /* Copy some CPU settings from CPU 0 */ + bl .__restore_cpu_setup + + /* pSeries do that early though I don't think we really need it */ + mfmsr r3 + ori r3,r3,MSR_RI + mtmsrd r3 /* RI on */ + + /* Set up a paca value for this processor. */ + LOADADDR(r4, paca) /* Get base vaddr of paca array */ + mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ + add r13,r13,r4 /* for this processor. */ + mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ + + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + + b .__secondary_start + +#endif /* CONFIG_PPC_PMAC */ + +/* + * This function is called after the master CPU has released the + * secondary processors. The execution environment is relocation off. + * The paca for this processor has the following fields initialized at + * this point: + * 1. Processor number + * 2. Segment table pointer (virtual address) + * On entry the following are set: + * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries + * r24 = cpu# (in Linux terms) + * r13 = paca virtual address + * SPRG3 = paca virtual address + */ +_GLOBAL(__secondary_start) + + HMT_MEDIUM /* Set thread priority to MEDIUM */ + + ld r2,PACATOC(r13) + li r6,0 + stb r6,PACAPROCENABLED(r13) + +#ifndef CONFIG_PPC_ISERIES + /* Initialize the page table pointer register. */ + LOADADDR(r6,_SDR1) + ld r6,0(r6) /* get the value of _SDR1 */ + mtspr SDR1,r6 /* set the htab location */ +#endif + /* Initialize the first segment table (or SLB) entry */ + ld r3,PACASTABVIRT(r13) /* get addr of segment table */ + bl .stab_initialize + + /* Initialize the kernel stack. Just a repeat for iSeries. */ + LOADADDR(r3,current_set) + sldi r28,r24,3 /* get current_set[cpu#] */ + ldx r1,r3,r28 + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + std r1,PACAKSAVE(r13) + + ld r3,PACASTABREAL(r13) /* get raddr of segment table */ + ori r4,r3,1 /* turn on valid bit */ + +#ifdef CONFIG_PPC_ISERIES + li r0,-1 /* hypervisor call */ + li r3,1 + sldi r3,r3,63 /* 0x8000000000000000 */ + ori r3,r3,4 /* 0x8000000000000004 */ + sc /* HvCall_setASR */ +#else + /* set the ASR */ + ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ + lwz r3,PLATFORM(r3) /* r3 = platform flags */ + cmpldi r3,PLATFORM_PSERIES_LPAR + bne 98f + mfspr r3,PVR + srwi r3,r3,16 + cmpwi r3,0x37 /* SStar */ + beq 97f + cmpwi r3,0x36 /* IStar */ + beq 97f + cmpwi r3,0x34 /* Pulsar */ + bne 98f +97: li r3,H_SET_ASR /* hcall = H_SET_ASR */ + HVSC /* Invoking hcall */ + b 99f +98: /* !(rpa hypervisor) || !(star) */ + mtasr r4 /* set the stab location */ +99: +#endif + li r7,0 + mtlr r7 + + /* enable MMU and jump to start_secondary */ + LOADADDR(r3,.start_secondary_prolog) + SET_REG_TO_CONST(r4, MSR_KERNEL) +#ifdef DO_SOFT_DISABLE + ori r4,r4,MSR_EE +#endif + mtspr SRR0,r3 + mtspr SRR1,r4 + rfid + b . /* prevent speculative execution */ + +/* + * Running with relocation on at this point. All we want to do is + * zero the stack back-chain pointer before going into C code. + */ +_GLOBAL(start_secondary_prolog) + li r3,0 + std r3,0(r1) /* Zero the stack frame pointer */ + bl .start_secondary +#endif + +/* + * This subroutine clobbers r11 and r12 + */ +_GLOBAL(enable_64b_mode) + mfmsr r11 /* grab the current MSR */ + li r12,1 + rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) + or r11,r11,r12 + li r12,1 + rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) + or r11,r11,r12 + mtmsrd r11 + isync + blr + +#ifdef CONFIG_PPC_MULTIPLATFORM +/* + * This is where the main kernel code starts. + */ +_STATIC(start_here_multiplatform) + /* get a new offset, now that the kernel has moved. */ + bl .reloc_offset + mr r26,r3 + + /* Clear out the BSS. It may have been done in prom_init, + * already but that's irrelevant since prom_init will soon + * be detached from the kernel completely. Besides, we need + * to clear it now for kexec-style entry. + */ + LOADADDR(r11,__bss_stop) + LOADADDR(r8,__bss_start) + sub r11,r11,r8 /* bss size */ + addi r11,r11,7 /* round up to an even double word */ + rldicl. r11,r11,61,3 /* shift right by 3 */ + beq 4f + addi r8,r8,-8 + li r0,0 + mtctr r11 /* zero this many doublewords */ +3: stdu r0,8(r8) + bdnz 3b +4: + + mfmsr r6 + ori r6,r6,MSR_RI + mtmsrd r6 /* RI on */ + +#ifdef CONFIG_HMT + /* Start up the second thread on cpu 0 */ + mfspr r3,PVR + srwi r3,r3,16 + cmpwi r3,0x34 /* Pulsar */ + beq 90f + cmpwi r3,0x36 /* Icestar */ + beq 90f + cmpwi r3,0x37 /* SStar */ + beq 90f + b 91f /* HMT not supported */ +90: li r3,0 + bl .hmt_start_secondary +91: +#endif + + /* The following gets the stack and TOC set up with the regs */ + /* pointing to the real addr of the kernel stack. This is */ + /* all done to support the C function call below which sets */ + /* up the htab. This is done because we have relocated the */ + /* kernel but are still running in real mode. */ + + LOADADDR(r3,init_thread_union) + sub r3,r3,r26 + + /* set up a stack pointer (physical address) */ + addi r1,r3,THREAD_SIZE + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + + /* set up the TOC (physical address) */ + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + sub r2,r2,r26 + + LOADADDR(r3,cpu_specs) + sub r3,r3,r26 + LOADADDR(r4,cur_cpu_spec) + sub r4,r4,r26 + mr r5,r26 + bl .identify_cpu + + /* Save some low level config HIDs of CPU0 to be copied to + * other CPUs later on, or used for suspend/resume + */ + bl .__save_cpu_setup + sync + + /* Setup a valid physical PACA pointer in SPRG3 for early_setup + * note that boot_cpuid can always be 0 nowadays since there is + * nowhere it can be initialized differently before we reach this + * code + */ + LOADADDR(r27, boot_cpuid) + sub r27,r27,r26 + lwz r27,0(r27) + + LOADADDR(r24, paca) /* Get base vaddr of paca array */ + mulli r13,r27,PACA_SIZE /* Calculate vaddr of right paca */ + add r13,r13,r24 /* for this processor. */ + sub r13,r13,r26 /* convert to physical addr */ + mtspr SPRG3,r13 /* PPPBBB: Temp... -Peter */ + + /* Do very early kernel initializations, including initial hash table, + * stab and slb setup before we turn on relocation. */ + + /* Restore parameters passed from prom_init/kexec */ + mr r3,r31 + bl .early_setup + + /* set the ASR */ + ld r3,PACASTABREAL(r13) + ori r4,r3,1 /* turn on valid bit */ + ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ + lwz r3,PLATFORM(r3) /* r3 = platform flags */ + cmpldi r3,PLATFORM_PSERIES_LPAR + bne 98f + mfspr r3,PVR + srwi r3,r3,16 + cmpwi r3,0x37 /* SStar */ + beq 97f + cmpwi r3,0x36 /* IStar */ + beq 97f + cmpwi r3,0x34 /* Pulsar */ + bne 98f +97: li r3,H_SET_ASR /* hcall = H_SET_ASR */ + HVSC /* Invoking hcall */ + b 99f +98: /* !(rpa hypervisor) || !(star) */ + mtasr r4 /* set the stab location */ +99: + /* Set SDR1 (hash table pointer) */ + ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ + lwz r3,PLATFORM(r3) /* r3 = platform flags */ + /* Test if bit 0 is set (LPAR bit) */ + andi. r3,r3,0x1 + bne 98f + LOADADDR(r6,_SDR1) /* Only if NOT LPAR */ + sub r6,r6,r26 + ld r6,0(r6) /* get the value of _SDR1 */ + mtspr SDR1,r6 /* set the htab location */ +98: + LOADADDR(r3,.start_here_common) + SET_REG_TO_CONST(r4, MSR_KERNEL) + mtspr SRR0,r3 + mtspr SRR1,r4 + rfid + b . /* prevent speculative execution */ +#endif /* CONFIG_PPC_MULTIPLATFORM */ + + /* This is where all platforms converge execution */ +_STATIC(start_here_common) + /* relocation is on at this point */ + + /* The following code sets up the SP and TOC now that we are */ + /* running with translation enabled. */ + + LOADADDR(r3,init_thread_union) + + /* set up the stack */ + addi r1,r3,THREAD_SIZE + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + + /* Apply the CPUs-specific fixups (nop out sections not relevant + * to this CPU + */ + li r3,0 + bl .do_cpu_ftr_fixups + + LOADADDR(r26, boot_cpuid) + lwz r26,0(r26) + + LOADADDR(r24, paca) /* Get base vaddr of paca array */ + mulli r13,r26,PACA_SIZE /* Calculate vaddr of right paca */ + add r13,r13,r24 /* for this processor. */ + mtspr SPRG3,r13 + + /* ptr to current */ + LOADADDR(r4,init_task) + std r4,PACACURRENT(r13) + + /* Load the TOC */ + ld r2,PACATOC(r13) + std r1,PACAKSAVE(r13) + + bl .setup_system + + /* Load up the kernel context */ +5: +#ifdef DO_SOFT_DISABLE + li r5,0 + stb r5,PACAPROCENABLED(r13) /* Soft Disabled */ + mfmsr r5 + ori r5,r5,MSR_EE /* Hard Enabled */ + mtmsrd r5 +#endif + + bl .start_kernel + +_GLOBAL(__setup_cpu_power3) + blr + +_GLOBAL(hmt_init) +#ifdef CONFIG_HMT + LOADADDR(r5, hmt_thread_data) + mfspr r7,PVR + srwi r7,r7,16 + cmpwi r7,0x34 /* Pulsar */ + beq 90f + cmpwi r7,0x36 /* Icestar */ + beq 91f + cmpwi r7,0x37 /* SStar */ + beq 91f + b 101f +90: mfspr r6,PIR + andi. r6,r6,0x1f + b 92f +91: mfspr r6,PIR + andi. r6,r6,0x3ff +92: sldi r4,r24,3 + stwx r6,r5,r4 + bl .hmt_start_secondary + b 101f + +__hmt_secondary_hold: + LOADADDR(r5, hmt_thread_data) + clrldi r5,r5,4 + li r7,0 + mfspr r6,PIR + mfspr r8,PVR + srwi r8,r8,16 + cmpwi r8,0x34 + bne 93f + andi. r6,r6,0x1f + b 103f +93: andi. r6,r6,0x3f + +103: lwzx r8,r5,r7 + cmpw r8,r6 + beq 104f + addi r7,r7,8 + b 103b + +104: addi r7,r7,4 + lwzx r9,r5,r7 + mr r24,r9 +101: +#endif + mr r3,r24 + b .pSeries_secondary_smp_init + +#ifdef CONFIG_HMT +_GLOBAL(hmt_start_secondary) + LOADADDR(r4,__hmt_secondary_hold) + clrldi r4,r4,4 + mtspr NIADORM, r4 + mfspr r4, MSRDORM + li r5, -65 + and r4, r4, r5 + mtspr MSRDORM, r4 + lis r4,0xffef + ori r4,r4,0x7403 + mtspr TSC, r4 + li r4,0x1f4 + mtspr TST, r4 + mfspr r4, HID0 + ori r4, r4, 0x1 + mtspr HID0, r4 + mfspr r4, CTRLF + oris r4, r4, 0x40 + mtspr CTRLT, r4 + blr +#endif + +#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES) +_GLOBAL(smp_release_cpus) + /* All secondary cpus are spinning on a common + * spinloop, release them all now so they can start + * to spin on their individual paca spinloops. + * For non SMP kernels, the secondary cpus never + * get out of the common spinloop. + */ + li r3,1 + LOADADDR(r5,__secondary_hold_spinloop) + std r3,0(r5) + sync + blr +#endif /* CONFIG_SMP && !CONFIG_PPC_ISERIES */ + + +/* + * We put a few things here that have to be page-aligned. + * This stuff goes at the beginning of the data segment, + * which is page-aligned. + */ + .data + .align 12 + .globl sdata +sdata: + .globl empty_zero_page +empty_zero_page: + .space 4096 + + .globl swapper_pg_dir +swapper_pg_dir: + .space 4096 + + .globl ioremap_dir +ioremap_dir: + .space 4096 + +#ifdef CONFIG_SMP +/* 1 page segment table per cpu (max 48, cpu0 allocated at STAB0_PHYS_ADDR) */ + .globl stab_array +stab_array: + .space 4096 * 48 +#endif + +/* + * This space gets a copy of optional info passed to us by the bootstrap + * Used to pass parameters into the kernel like root=/dev/sda1, etc. + */ + .globl cmd_line +cmd_line: + .space COMMAND_LINE_SIZE diff --git a/arch/ppc64/kernel/hvCall.S b/arch/ppc64/kernel/hvCall.S new file mode 100644 index 00000000000..4c699eab1b9 --- /dev/null +++ b/arch/ppc64/kernel/hvCall.S @@ -0,0 +1,98 @@ +/* + * arch/ppc64/kernel/hvCall.S + * + * + * This file contains the code to perform calls to the + * iSeries LPAR hypervisor + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + + .text + +/* + * Hypervisor call + * + * Invoke the iSeries hypervisor via the System Call instruction + * Parameters are passed to this routine in registers r3 - r10 + * + * r3 contains the HV function to be called + * r4-r10 contain the operands to the hypervisor function + * + */ + +_GLOBAL(HvCall) +_GLOBAL(HvCall0) +_GLOBAL(HvCall1) +_GLOBAL(HvCall2) +_GLOBAL(HvCall3) +_GLOBAL(HvCall4) +_GLOBAL(HvCall5) +_GLOBAL(HvCall6) +_GLOBAL(HvCall7) + + + mfcr r0 + std r0,-8(r1) + stdu r1,-(STACK_FRAME_OVERHEAD+16)(r1) + + /* r0 = 0xffffffffffffffff indicates a hypervisor call */ + + li r0,-1 + + /* Invoke the hypervisor */ + + sc + + ld r1,0(r1) + ld r0,-8(r1) + mtcrf 0xff,r0 + + /* return to caller, return value in r3 */ + + blr + +_GLOBAL(HvCall0Ret16) +_GLOBAL(HvCall1Ret16) +_GLOBAL(HvCall2Ret16) +_GLOBAL(HvCall3Ret16) +_GLOBAL(HvCall4Ret16) +_GLOBAL(HvCall5Ret16) +_GLOBAL(HvCall6Ret16) +_GLOBAL(HvCall7Ret16) + + mfcr r0 + std r0,-8(r1) + std r31,-16(r1) + stdu r1,-(STACK_FRAME_OVERHEAD+32)(r1) + + mr r31,r4 + li r0,-1 + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + + sc + + std r3,0(r31) + std r4,8(r31) + + mr r3,r5 + + ld r1,0(r1) + ld r0,-8(r1) + mtcrf 0xff,r0 + ld r31,-16(r1) + + blr + + diff --git a/arch/ppc64/kernel/hvconsole.c b/arch/ppc64/kernel/hvconsole.c new file mode 100644 index 00000000000..c72fb8ffe97 --- /dev/null +++ b/arch/ppc64/kernel/hvconsole.c @@ -0,0 +1,121 @@ +/* + * hvconsole.c + * Copyright (C) 2004 Hollis Blanchard, IBM Corporation + * Copyright (C) 2004 IBM Corporation + * + * Additional Author(s): + * Ryan S. Arnold + * + * LPAR console support. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include + +/** + * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper + * @vtermno: The vtermno or unit_address of the adapter from which to fetch the + * data. + * @buf: The character buffer into which to put the character data fetched from + * firmware. + * @count: not used? + */ +int hvc_get_chars(uint32_t vtermno, char *buf, int count) +{ + unsigned long got; + + if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got, + (unsigned long *)buf, (unsigned long *)buf+1) == H_Success) { + /* + * Work around a HV bug where it gives us a null + * after every \r. -- paulus + */ + if (got > 0) { + int i; + for (i = 1; i < got; ++i) { + if (buf[i] == 0 && buf[i-1] == '\r') { + --got; + if (i < got) + memmove(&buf[i], &buf[i+1], + got - i); + } + } + } + return got; + } + return 0; +} + +EXPORT_SYMBOL(hvc_get_chars); + +/** + * hvc_put_chars: send characters to firmware for denoted vterm adapter + * @vtermno: The vtermno or unit_address of the adapter from which the data + * originated. + * @buf: The character buffer that contains the character data to send to + * firmware. + * @count: Send this number of characters. + */ +int hvc_put_chars(uint32_t vtermno, const char *buf, int count) +{ + unsigned long *lbuf = (unsigned long *) buf; + long ret; + + ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count, lbuf[0], + lbuf[1]); + if (ret == H_Success) + return count; + if (ret == H_Busy) + return 0; + return -EIO; +} + +EXPORT_SYMBOL(hvc_put_chars); + +/* + * We hope/assume that the first vty found corresponds to the first console + * device. + */ +int hvc_find_vtys(void) +{ + struct device_node *vty; + int num_found = 0; + + for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL; + vty = of_find_node_by_name(vty, "vty")) { + uint32_t *vtermno; + + /* We have statically defined space for only a certain number of + * console adapters. */ + if (num_found >= MAX_NR_HVC_CONSOLES) + break; + + vtermno = (uint32_t *)get_property(vty, "reg", NULL); + if (!vtermno) + continue; + + if (device_is_compatible(vty, "hvterm1")) { + hvc_instantiate(*vtermno, num_found); + ++num_found; + } + } + + return num_found; +} diff --git a/arch/ppc64/kernel/hvcserver.c b/arch/ppc64/kernel/hvcserver.c new file mode 100644 index 00000000000..bde8f42da85 --- /dev/null +++ b/arch/ppc64/kernel/hvcserver.c @@ -0,0 +1,249 @@ +/* + * hvcserver.c + * Copyright (C) 2004 Ryan S Arnold, IBM Corporation + * + * PPC64 virtual I/O console server support. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include + +#define HVCS_ARCH_VERSION "1.0.0" + +MODULE_AUTHOR("Ryan S. Arnold "); +MODULE_DESCRIPTION("IBM hvcs ppc64 API"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(HVCS_ARCH_VERSION); + +/* + * Convert arch specific return codes into relevant errnos. The hvcs + * functions aren't performance sensitive, so this conversion isn't an + * issue. + */ +int hvcs_convert(long to_convert) +{ + switch (to_convert) { + case H_Success: + return 0; + case H_Parameter: + return -EINVAL; + case H_Hardware: + return -EIO; + case H_Busy: + case H_LongBusyOrder1msec: + case H_LongBusyOrder10msec: + case H_LongBusyOrder100msec: + case H_LongBusyOrder1sec: + case H_LongBusyOrder10sec: + case H_LongBusyOrder100sec: + return -EBUSY; + case H_Function: /* fall through */ + default: + return -EPERM; + } +} + +/** + * hvcs_free_partner_info - free pi allocated by hvcs_get_partner_info + * @head: list_head pointer for an allocated list of partner info structs to + * free. + * + * This function is used to free the partner info list that was returned by + * calling hvcs_get_partner_info(). + */ +int hvcs_free_partner_info(struct list_head *head) +{ + struct hvcs_partner_info *pi; + struct list_head *element; + + if (!head) + return -EINVAL; + + while (!list_empty(head)) { + element = head->next; + pi = list_entry(element, struct hvcs_partner_info, node); + list_del(element); + kfree(pi); + } + + return 0; +} +EXPORT_SYMBOL(hvcs_free_partner_info); + +/* Helper function for hvcs_get_partner_info */ +int hvcs_next_partner(uint32_t unit_address, + unsigned long last_p_partition_ID, + unsigned long last_p_unit_address, unsigned long *pi_buff) + +{ + long retval; + retval = plpar_hcall_norets(H_VTERM_PARTNER_INFO, unit_address, + last_p_partition_ID, + last_p_unit_address, virt_to_phys(pi_buff)); + return hvcs_convert(retval); +} + +/** + * hvcs_get_partner_info - Get all of the partner info for a vty-server adapter + * @unit_address: The unit_address of the vty-server adapter for which this + * function is fetching partner info. + * @head: An initialized list_head pointer to an empty list to use to return the + * list of partner info fetched from the hypervisor to the caller. + * @pi_buff: A page sized buffer pre-allocated prior to calling this function + * that is to be used to be used by firmware as an iterator to keep track + * of the partner info retrieval. + * + * This function returns non-zero on success, or if there is no partner info. + * + * The pi_buff is pre-allocated prior to calling this function because this + * function may be called with a spin_lock held and kmalloc of a page is not + * recommended as GFP_ATOMIC. + * + * The first long of this buffer is used to store a partner unit address. The + * second long is used to store a partner partition ID and starting at + * pi_buff[2] is the 79 character Converged Location Code (diff size than the + * unsigned longs, hence the casting mumbo jumbo you see later). + * + * Invocation of this function should always be followed by an invocation of + * hvcs_free_partner_info() using a pointer to the SAME list head instance + * that was passed as a parameter to this function. + */ +int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head, + unsigned long *pi_buff) +{ + /* + * Dealt with as longs because of the hcall interface even though the + * values are uint32_t. + */ + unsigned long last_p_partition_ID; + unsigned long last_p_unit_address; + struct hvcs_partner_info *next_partner_info = NULL; + int more = 1; + int retval; + + memset(pi_buff, 0x00, PAGE_SIZE); + /* invalid parameters */ + if (!head || !pi_buff) + return -EINVAL; + + last_p_partition_ID = last_p_unit_address = ~0UL; + INIT_LIST_HEAD(head); + + do { + retval = hvcs_next_partner(unit_address, last_p_partition_ID, + last_p_unit_address, pi_buff); + if (retval) { + /* + * Don't indicate that we've failed if we have + * any list elements. + */ + if (!list_empty(head)) + return 0; + return retval; + } + + last_p_partition_ID = pi_buff[0]; + last_p_unit_address = pi_buff[1]; + + /* This indicates that there are no further partners */ + if (last_p_partition_ID == ~0UL + && last_p_unit_address == ~0UL) + break; + + /* This is a very small struct and will be freed soon in + * hvcs_free_partner_info(). */ + next_partner_info = kmalloc(sizeof(struct hvcs_partner_info), + GFP_ATOMIC); + + if (!next_partner_info) { + printk(KERN_WARNING "HVCONSOLE: kmalloc() failed to" + " allocate partner info struct.\n"); + hvcs_free_partner_info(head); + return -ENOMEM; + } + + next_partner_info->unit_address + = (unsigned int)last_p_unit_address; + next_partner_info->partition_ID + = (unsigned int)last_p_partition_ID; + + /* copy the Null-term char too */ + strncpy(&next_partner_info->location_code[0], + (char *)&pi_buff[2], + strlen((char *)&pi_buff[2]) + 1); + + list_add_tail(&(next_partner_info->node), head); + next_partner_info = NULL; + + } while (more); + + return 0; +} +EXPORT_SYMBOL(hvcs_get_partner_info); + +/** + * hvcs_register_connection - establish a connection between this vty-server and + * a vty. + * @unit_address: The unit address of the vty-server adapter that is to be + * establish a connection. + * @p_partition_ID: The partition ID of the vty adapter that is to be connected. + * @p_unit_address: The unit address of the vty adapter to which the vty-server + * is to be connected. + * + * If this function is called once and -EINVAL is returned it may + * indicate that the partner info needs to be refreshed for the + * target unit address at which point the caller must invoke + * hvcs_get_partner_info() and then call this function again. If, + * for a second time, -EINVAL is returned then it indicates that + * there is probably already a partner connection registered to a + * different vty-server adapter. It is also possible that a second + * -EINVAL may indicate that one of the parms is not valid, for + * instance if the link was removed between the vty-server adapter + * and the vty adapter that you are trying to open. Don't shoot the + * messenger. Firmware implemented it this way. + */ +int hvcs_register_connection( uint32_t unit_address, + uint32_t p_partition_ID, uint32_t p_unit_address) +{ + long retval; + retval = plpar_hcall_norets(H_REGISTER_VTERM, unit_address, + p_partition_ID, p_unit_address); + return hvcs_convert(retval); +} +EXPORT_SYMBOL(hvcs_register_connection); + +/** + * hvcs_free_connection - free the connection between a vty-server and vty + * @unit_address: The unit address of the vty-server that is to have its + * connection severed. + * + * This function is used to free the partner connection between a vty-server + * adapter and a vty adapter. + * + * If -EBUSY is returned continue to call this function until 0 is returned. + */ +int hvcs_free_connection(uint32_t unit_address) +{ + long retval; + retval = plpar_hcall_norets(H_FREE_VTERM, unit_address); + return hvcs_convert(retval); +} +EXPORT_SYMBOL(hvcs_free_connection); diff --git a/arch/ppc64/kernel/i8259.c b/arch/ppc64/kernel/i8259.c new file mode 100644 index 00000000000..74dcfd68fc7 --- /dev/null +++ b/arch/ppc64/kernel/i8259.c @@ -0,0 +1,177 @@ +/* + * c 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "i8259.h" + +unsigned char cached_8259[2] = { 0xff, 0xff }; +#define cached_A1 (cached_8259[0]) +#define cached_21 (cached_8259[1]) + +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(i8259_lock); + +static int i8259_pic_irq_offset; +static int i8259_present; + +int i8259_irq(int cpu) +{ + int irq; + + spin_lock/*_irqsave*/(&i8259_lock/*, flags*/); + /* + * Perform an interrupt acknowledge cycle on controller 1 + */ + outb(0x0C, 0x20); + irq = inb(0x20) & 7; + if (irq == 2) + { + /* + * Interrupt is cascaded so perform interrupt + * acknowledge on controller 2 + */ + outb(0x0C, 0xA0); + irq = (inb(0xA0) & 7) + 8; + } + else if (irq==7) + { + /* + * This may be a spurious interrupt + * + * Read the interrupt status register. If the most + * significant bit is not set then there is no valid + * interrupt + */ + outb(0x0b, 0x20); + if(~inb(0x20)&0x80) { + spin_unlock/*_irqrestore*/(&i8259_lock/*, flags*/); + return -1; + } + } + spin_unlock/*_irqrestore*/(&i8259_lock/*, flags*/); + return irq; +} + +static void i8259_mask_and_ack_irq(unsigned int irq_nr) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259_lock, flags); + if ( irq_nr >= i8259_pic_irq_offset ) + irq_nr -= i8259_pic_irq_offset; + + if (irq_nr > 7) { + cached_A1 |= 1 << (irq_nr-8); + inb(0xA1); /* DUMMY */ + outb(cached_A1,0xA1); + outb(0x20,0xA0); /* Non-specific EOI */ + outb(0x20,0x20); /* Non-specific EOI to cascade */ + } else { + cached_21 |= 1 << irq_nr; + inb(0x21); /* DUMMY */ + outb(cached_21,0x21); + outb(0x20,0x20); /* Non-specific EOI */ + } + spin_unlock_irqrestore(&i8259_lock, flags); +} + +static void i8259_set_irq_mask(int irq_nr) +{ + outb(cached_A1,0xA1); + outb(cached_21,0x21); +} + +static void i8259_mask_irq(unsigned int irq_nr) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259_lock, flags); + if ( irq_nr >= i8259_pic_irq_offset ) + irq_nr -= i8259_pic_irq_offset; + if ( irq_nr < 8 ) + cached_21 |= 1 << irq_nr; + else + cached_A1 |= 1 << (irq_nr-8); + i8259_set_irq_mask(irq_nr); + spin_unlock_irqrestore(&i8259_lock, flags); +} + +static void i8259_unmask_irq(unsigned int irq_nr) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259_lock, flags); + if ( irq_nr >= i8259_pic_irq_offset ) + irq_nr -= i8259_pic_irq_offset; + if ( irq_nr < 8 ) + cached_21 &= ~(1 << irq_nr); + else + cached_A1 &= ~(1 << (irq_nr-8)); + i8259_set_irq_mask(irq_nr); + spin_unlock_irqrestore(&i8259_lock, flags); +} + +static void i8259_end_irq(unsigned int irq) +{ + if (!(get_irq_desc(irq)->status & (IRQ_DISABLED|IRQ_INPROGRESS)) && + get_irq_desc(irq)->action) + i8259_unmask_irq(irq); +} + +struct hw_interrupt_type i8259_pic = { + .typename = " i8259 ", + .enable = i8259_unmask_irq, + .disable = i8259_mask_irq, + .ack = i8259_mask_and_ack_irq, + .end = i8259_end_irq, +}; + +void __init i8259_init(int offset) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259_lock, flags); + i8259_pic_irq_offset = offset; + i8259_present = 1; + /* init master interrupt controller */ + outb(0x11, 0x20); /* Start init sequence */ + outb(0x00, 0x21); /* Vector base */ + outb(0x04, 0x21); /* edge tiggered, Cascade (slave) on IRQ2 */ + outb(0x01, 0x21); /* Select 8086 mode */ + outb(0xFF, 0x21); /* Mask all */ + /* init slave interrupt controller */ + outb(0x11, 0xA0); /* Start init sequence */ + outb(0x08, 0xA1); /* Vector base */ + outb(0x02, 0xA1); /* edge triggered, Cascade (slave) on IRQ2 */ + outb(0x01, 0xA1); /* Select 8086 mode */ + outb(0xFF, 0xA1); /* Mask all */ + outb(cached_A1, 0xA1); + outb(cached_21, 0x21); + spin_unlock_irqrestore(&i8259_lock, flags); + +} + +static int i8259_request_cascade(void) +{ + if (!i8259_present) + return -ENODEV; + + request_irq( i8259_pic_irq_offset + 2, no_action, SA_INTERRUPT, + "82c59 secondary cascade", NULL ); + + return 0; +} + +arch_initcall(i8259_request_cascade); diff --git a/arch/ppc64/kernel/i8259.h b/arch/ppc64/kernel/i8259.h new file mode 100644 index 00000000000..f74764ba0bf --- /dev/null +++ b/arch/ppc64/kernel/i8259.h @@ -0,0 +1,17 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _PPC_KERNEL_i8259_H +#define _PPC_KERNEL_i8259_H + +extern struct hw_interrupt_type i8259_pic; + +extern void i8259_init(int offset); +extern int i8259_irq(int); + +#endif /* _PPC_KERNEL_i8259_H */ diff --git a/arch/ppc64/kernel/iSeries_VpdInfo.c b/arch/ppc64/kernel/iSeries_VpdInfo.c new file mode 100644 index 00000000000..a6f0ff2d023 --- /dev/null +++ b/arch/ppc64/kernel/iSeries_VpdInfo.c @@ -0,0 +1,277 @@ +/************************************************************************/ +/* File iSeries_vpdInfo.c created by Allan Trautman on Fri Feb 2 2001. */ +/************************************************************************/ +/* This code gets the card location of the hardware */ +/* Copyright (C) 20yy */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +/* Change Activity: */ +/* Created, Feb 2, 2001 */ +/* Ported to ppc64, August 20, 2001 */ +/* End Change Activity */ +/************************************************************************/ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "pci.h" + +/* + * Size of Bus VPD data + */ +#define BUS_VPDSIZE 1024 +/* + * Bus Vpd Tags + */ +#define VpdEndOfDataTag 0x78 +#define VpdEndOfAreaTag 0x79 +#define VpdIdStringTag 0x82 +#define VpdVendorAreaTag 0x84 +/* + * Mfg Area Tags + */ +#define VpdFruFlag 0x4647 // "FG" +#define VpdFruFrameId 0x4649 // "FI" +#define VpdSlotMapFormat 0x4D46 // "MF" +#define VpdAsmPartNumber 0x504E // "PN" +#define VpdFruSerial 0x534E // "SN" +#define VpdSlotMap 0x534D // "SM" + +/* + * Structures of the areas + */ +struct MfgVpdAreaStruct { + u16 Tag; + u8 TagLength; + u8 AreaData1; + u8 AreaData2; +}; +typedef struct MfgVpdAreaStruct MfgArea; +#define MFG_ENTRY_SIZE 3 + +struct SlotMapStruct { + u8 AgentId; + u8 SecondaryAgentId; + u8 PhbId; + char CardLocation[3]; + char Parms[8]; + char Reserved[2]; +}; +typedef struct SlotMapStruct SlotMap; +#define SLOT_ENTRY_SIZE 16 + +/* + * Formats the device information. + * - Pass in pci_dev* pointer to the device. + * - Pass in buffer to place the data. Danger here is the buffer must + * be as big as the client says it is. Should be at least 128 bytes. + * Return will the length of the string data put in the buffer. + * Format: + * PCI: Bus 0, Device 26, Vendor 0x12AE Frame 1, Card C10 Ethernet + * controller + */ +int iSeries_Device_Information(struct pci_dev *PciDev, char *buffer, + int BufferSize) +{ + struct iSeries_Device_Node *DevNode = + (struct iSeries_Device_Node *)PciDev->sysdata; + int len; + + if (DevNode == NULL) + return sprintf(buffer, + "PCI: iSeries_Device_Information DevNode is NULL"); + + if (BufferSize < 128) + return 0; + + len = sprintf(buffer, "PCI: Bus%3d, Device%3d, Vendor %04X ", + ISERIES_BUS(DevNode), PCI_SLOT(PciDev->devfn), + PciDev->vendor); + len += sprintf(buffer + len, "Frame%3d, Card %4s ", + DevNode->FrameId, DevNode->CardLocation); +#ifdef CONFIG_PCI + if (pci_class_name(PciDev->class >> 8) == 0) + len += sprintf(buffer + len, "0x%04X ", + (int)(PciDev->class >> 8)); + else + len += sprintf(buffer + len, "%s", + pci_class_name(PciDev->class >> 8)); +#endif + return len; +} + +/* + * Parse the Slot Area + */ +void iSeries_Parse_SlotArea(SlotMap *MapPtr, int MapLen, + struct iSeries_Device_Node *DevNode) +{ + int SlotMapLen = MapLen; + SlotMap *SlotMapPtr = MapPtr; + + /* + * Parse Slot label until we find the one requrested + */ + while (SlotMapLen > 0) { + if (SlotMapPtr->AgentId == DevNode->AgentId ) { + /* + * If Phb wasn't found, grab the entry first one found. + */ + if (DevNode->PhbId == 0xff) + DevNode->PhbId = SlotMapPtr->PhbId; + /* Found it, extract the data. */ + if (SlotMapPtr->PhbId == DevNode->PhbId ) { + memcpy(&DevNode->CardLocation, + &SlotMapPtr->CardLocation, 3); + DevNode->CardLocation[3] = 0; + break; + } + } + /* Point to the next Slot */ + SlotMapPtr = (SlotMap *)((char *)SlotMapPtr + SLOT_ENTRY_SIZE); + SlotMapLen -= SLOT_ENTRY_SIZE; + } +} + +/* + * Parse the Mfg Area + */ +static void iSeries_Parse_MfgArea(u8 *AreaData, int AreaLen, + struct iSeries_Device_Node *DevNode) +{ + MfgArea *MfgAreaPtr = (MfgArea *)AreaData; + int MfgAreaLen = AreaLen; + u16 SlotMapFmt = 0; + + /* Parse Mfg Data */ + while (MfgAreaLen > 0) { + int MfgTagLen = MfgAreaPtr->TagLength; + /* Frame ID (FI 4649020310 ) */ + if (MfgAreaPtr->Tag == VpdFruFrameId) /* FI */ + DevNode->FrameId = MfgAreaPtr->AreaData1; + /* Slot Map Format (MF 4D46020004 ) */ + else if (MfgAreaPtr->Tag == VpdSlotMapFormat) /* MF */ + SlotMapFmt = (MfgAreaPtr->AreaData1 * 256) + + MfgAreaPtr->AreaData2; + /* Slot Map (SM 534D90 */ + else if (MfgAreaPtr->Tag == VpdSlotMap) { /* SM */ + SlotMap *SlotMapPtr; + + if (SlotMapFmt == 0x1004) + SlotMapPtr = (SlotMap *)((char *)MfgAreaPtr + + MFG_ENTRY_SIZE + 1); + else + SlotMapPtr = (SlotMap *)((char *)MfgAreaPtr + + MFG_ENTRY_SIZE); + iSeries_Parse_SlotArea(SlotMapPtr, MfgTagLen, DevNode); + } + /* + * Point to the next Mfg Area + * Use defined size, sizeof give wrong answer + */ + MfgAreaPtr = (MfgArea *)((char *)MfgAreaPtr + MfgTagLen + + MFG_ENTRY_SIZE); + MfgAreaLen -= (MfgTagLen + MFG_ENTRY_SIZE); + } +} + +/* + * Look for "BUS".. Data is not Null terminated. + * PHBID of 0xFF indicates PHB was not found in VPD Data. + */ +static int iSeries_Parse_PhbId(u8 *AreaPtr, int AreaLength) +{ + u8 *PhbPtr = AreaPtr; + int DataLen = AreaLength; + char PhbId = 0xFF; + + while (DataLen > 0) { + if ((*PhbPtr == 'B') && (*(PhbPtr + 1) == 'U') + && (*(PhbPtr + 2) == 'S')) { + PhbPtr += 3; + while (*PhbPtr == ' ') + ++PhbPtr; + PhbId = (*PhbPtr & 0x0F); + break; + } + ++PhbPtr; + --DataLen; + } + return PhbId; +} + +/* + * Parse out the VPD Areas + */ +static void iSeries_Parse_Vpd(u8 *VpdData, int VpdDataLen, + struct iSeries_Device_Node *DevNode) +{ + u8 *TagPtr = VpdData; + int DataLen = VpdDataLen - 3; + + while ((*TagPtr != VpdEndOfAreaTag) && (DataLen > 0)) { + int AreaLen = *(TagPtr + 1) + (*(TagPtr + 2) * 256); + u8 *AreaData = TagPtr + 3; + + if (*TagPtr == VpdIdStringTag) + DevNode->PhbId = iSeries_Parse_PhbId(AreaData, AreaLen); + else if (*TagPtr == VpdVendorAreaTag) + iSeries_Parse_MfgArea(AreaData, AreaLen, DevNode); + /* Point to next Area. */ + TagPtr = AreaData + AreaLen; + DataLen -= AreaLen; + } +} + +void iSeries_Get_Location_Code(struct iSeries_Device_Node *DevNode) +{ + int BusVpdLen = 0; + u8 *BusVpdPtr = (u8 *)kmalloc(BUS_VPDSIZE, GFP_KERNEL); + + if (BusVpdPtr == NULL) { + printk("PCI: Bus VPD Buffer allocation failure.\n"); + return; + } + BusVpdLen = HvCallPci_getBusVpd(ISERIES_BUS(DevNode), + ISERIES_HV_ADDR(BusVpdPtr), + BUS_VPDSIZE); + if (BusVpdLen == 0) { + kfree(BusVpdPtr); + printk("PCI: Bus VPD Buffer zero length.\n"); + return; + } + /* printk("PCI: BusVpdPtr: %p, %d\n",BusVpdPtr, BusVpdLen); */ + /* Make sure this is what I think it is */ + if (*BusVpdPtr != VpdIdStringTag) { /* 0x82 */ + printk("PCI: Bus VPD Buffer missing starting tag.\n"); + kfree(BusVpdPtr); + return; + } + iSeries_Parse_Vpd(BusVpdPtr,BusVpdLen, DevNode); + sprintf(DevNode->Location, "Frame%3d, Card %-4s", DevNode->FrameId, + DevNode->CardLocation); + kfree(BusVpdPtr); +} diff --git a/arch/ppc64/kernel/iSeries_htab.c b/arch/ppc64/kernel/iSeries_htab.c new file mode 100644 index 00000000000..aa9e8fdd1a4 --- /dev/null +++ b/arch/ppc64/kernel/iSeries_htab.c @@ -0,0 +1,242 @@ +/* + * iSeries hashtable management. + * Derived from pSeries_htab.c + * + * SMP scalability work: + * Copyright (C) 2001 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include + +static spinlock_t iSeries_hlocks[64] __cacheline_aligned_in_smp = { [0 ... 63] = SPIN_LOCK_UNLOCKED}; + +/* + * Very primitive algorithm for picking up a lock + */ +static inline void iSeries_hlock(unsigned long slot) +{ + if (slot & 0x8) + slot = ~slot; + spin_lock(&iSeries_hlocks[(slot >> 4) & 0x3f]); +} + +static inline void iSeries_hunlock(unsigned long slot) +{ + if (slot & 0x8) + slot = ~slot; + spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]); +} + +static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, + unsigned long prpn, int secondary, + unsigned long hpteflags, int bolted, int large) +{ + long slot; + HPTE lhpte; + + /* + * The hypervisor tries both primary and secondary. + * If we are being called to insert in the secondary, + * it means we have already tried both primary and secondary, + * so we return failure immediately. + */ + if (secondary) + return -1; + + iSeries_hlock(hpte_group); + + slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT); + BUG_ON(lhpte.dw0.dw0.v); + + if (slot == -1) { /* No available entry found in either group */ + iSeries_hunlock(hpte_group); + return -1; + } + + if (slot < 0) { /* MSB set means secondary group */ + secondary = 1; + slot &= 0x7fffffffffffffff; + } + + lhpte.dw1.dword1 = 0; + lhpte.dw1.dw1.rpn = physRpn_to_absRpn(prpn); + lhpte.dw1.flags.flags = hpteflags; + + lhpte.dw0.dword0 = 0; + lhpte.dw0.dw0.avpn = va >> 23; + lhpte.dw0.dw0.h = secondary; + lhpte.dw0.dw0.bolted = bolted; + lhpte.dw0.dw0.v = 1; + + /* Now fill in the actual HPTE */ + HvCallHpt_addValidate(slot, secondary, &lhpte); + + iSeries_hunlock(hpte_group); + + return (secondary << 3) | (slot & 7); +} + +static unsigned long iSeries_hpte_getword0(unsigned long slot) +{ + unsigned long dword0; + HPTE hpte; + + HvCallHpt_get(&hpte, slot); + dword0 = hpte.dw0.dword0; + + return dword0; +} + +static long iSeries_hpte_remove(unsigned long hpte_group) +{ + unsigned long slot_offset; + int i; + HPTE lhpte; + + /* Pick a random slot to start at */ + slot_offset = mftb() & 0x7; + + iSeries_hlock(hpte_group); + + for (i = 0; i < HPTES_PER_GROUP; i++) { + lhpte.dw0.dword0 = + iSeries_hpte_getword0(hpte_group + slot_offset); + + if (!lhpte.dw0.dw0.bolted) { + HvCallHpt_invalidateSetSwBitsGet(hpte_group + + slot_offset, 0, 0); + iSeries_hunlock(hpte_group); + return i; + } + + slot_offset++; + slot_offset &= 0x7; + } + + iSeries_hunlock(hpte_group); + + return -1; +} + +/* + * The HyperVisor expects the "flags" argument in this form: + * bits 0..59 : reserved + * bit 60 : N + * bits 61..63 : PP2,PP1,PP0 + */ +static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long va, int large, int local) +{ + HPTE hpte; + unsigned long avpn = va >> 23; + + iSeries_hlock(slot); + + HvCallHpt_get(&hpte, slot); + if ((hpte.dw0.dw0.avpn == avpn) && (hpte.dw0.dw0.v)) { + /* + * Hypervisor expects bits as NPPP, which is + * different from how they are mapped in our PP. + */ + HvCallHpt_setPp(slot, (newpp & 0x3) | ((newpp & 0x4) << 1)); + iSeries_hunlock(slot); + return 0; + } + iSeries_hunlock(slot); + + return -1; +} + +/* + * Functions used to find the PTE for a particular virtual address. + * Only used during boot when bolting pages. + * + * Input : vpn : virtual page number + * Output: PTE index within the page table of the entry + * -1 on failure + */ +static long iSeries_hpte_find(unsigned long vpn) +{ + HPTE hpte; + long slot; + + /* + * The HvCallHpt_findValid interface is as follows: + * 0xffffffffffffffff : No entry found. + * 0x00000000xxxxxxxx : Entry found in primary group, slot x + * 0x80000000xxxxxxxx : Entry found in secondary group, slot x + */ + slot = HvCallHpt_findValid(&hpte, vpn); + if (hpte.dw0.dw0.v) { + if (slot < 0) { + slot &= 0x7fffffffffffffff; + slot = -slot; + } + } else + slot = -1; + return slot; +} + +/* + * Update the page protection bits. Intended to be used to create + * guard pages for kernel data structures on pages which are bolted + * in the HPT. Assumes pages being operated on will not be stolen. + * Does not work on large pages. + * + * No need to lock here because we should be the only user. + */ +static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) +{ + unsigned long vsid,va,vpn; + long slot; + + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0x0fffffff); + vpn = va >> PAGE_SHIFT; + slot = iSeries_hpte_find(vpn); + if (slot == -1) + panic("updateboltedpp: Could not find page to bolt\n"); + HvCallHpt_setPp(slot, newpp); +} + +static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va, + int large, int local) +{ + HPTE lhpte; + unsigned long avpn = va >> 23; + unsigned long flags; + + local_irq_save(flags); + + iSeries_hlock(slot); + + lhpte.dw0.dword0 = iSeries_hpte_getword0(slot); + + if ((lhpte.dw0.dw0.avpn == avpn) && lhpte.dw0.dw0.v) + HvCallHpt_invalidateSetSwBitsGet(slot, 0, 0); + + iSeries_hunlock(slot); + + local_irq_restore(flags); +} + +void hpte_init_iSeries(void) +{ + ppc_md.hpte_invalidate = iSeries_hpte_invalidate; + ppc_md.hpte_updatepp = iSeries_hpte_updatepp; + ppc_md.hpte_updateboltedpp = iSeries_hpte_updateboltedpp; + ppc_md.hpte_insert = iSeries_hpte_insert; + ppc_md.hpte_remove = iSeries_hpte_remove; + + htab_finish_init(); +} diff --git a/arch/ppc64/kernel/iSeries_iommu.c b/arch/ppc64/kernel/iSeries_iommu.c new file mode 100644 index 00000000000..4e1a47c8a80 --- /dev/null +++ b/arch/ppc64/kernel/iSeries_iommu.c @@ -0,0 +1,175 @@ +/* + * arch/ppc64/kernel/iSeries_iommu.c + * + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * + * Rewrite, cleanup: + * + * Copyright (C) 2004 Olof Johansson , IBM Corporation + * + * Dynamic DMA mapping support, iSeries-specific parts. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include + +#include +#include +#include +#include + +extern struct list_head iSeries_Global_Device_List; + + +static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction) +{ + u64 rc; + union tce_entry tce; + + while (npages--) { + tce.te_word = 0; + tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> PAGE_SHIFT; + + if (tbl->it_type == TCE_VB) { + /* Virtual Bus */ + tce.te_bits.tb_valid = 1; + tce.te_bits.tb_allio = 1; + if (direction != DMA_TO_DEVICE) + tce.te_bits.tb_rdwr = 1; + } else { + /* PCI Bus */ + tce.te_bits.tb_rdwr = 1; /* Read allowed */ + if (direction != DMA_TO_DEVICE) + tce.te_bits.tb_pciwr = 1; + } + + rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, + tce.te_word); + if (rc) + panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", + rc); + index++; + uaddr += PAGE_SIZE; + } +} + +static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) +{ + u64 rc; + + while (npages--) { + rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0); + if (rc) + panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", + rc); + index++; + } +} + + +/* + * This function compares the known tables to find an iommu_table + * that has already been built for hardware TCEs. + */ +static struct iommu_table *iommu_table_find(struct iommu_table * tbl) +{ + struct iSeries_Device_Node *dp; + + list_for_each_entry(dp, &iSeries_Global_Device_List, Device_List) { + if ((dp->iommu_table != NULL) && + (dp->iommu_table->it_type == TCE_PCI) && + (dp->iommu_table->it_offset == tbl->it_offset) && + (dp->iommu_table->it_index == tbl->it_index) && + (dp->iommu_table->it_size == tbl->it_size)) + return dp->iommu_table; + } + return NULL; +} + +/* + * Call Hv with the architected data structure to get TCE table info. + * info. Put the returned data into the Linux representation of the + * TCE table data. + * The Hardware Tce table comes in three flavors. + * 1. TCE table shared between Buses. + * 2. TCE table per Bus. + * 3. TCE Table per IOA. + */ +static void iommu_table_getparms(struct iSeries_Device_Node* dn, + struct iommu_table* tbl) +{ + struct iommu_table_cb *parms; + + parms = kmalloc(sizeof(*parms), GFP_KERNEL); + if (parms == NULL) + panic("PCI_DMA: TCE Table Allocation failed."); + + memset(parms, 0, sizeof(*parms)); + + parms->itc_busno = ISERIES_BUS(dn); + parms->itc_slotno = dn->LogicalSlot; + parms->itc_virtbus = 0; + + HvCallXm_getTceTableParms(ISERIES_HV_ADDR(parms)); + + if (parms->itc_size == 0) + panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms); + + /* itc_size is in pages worth of table, it_size is in # of entries */ + tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry); + tbl->it_busno = parms->itc_busno; + tbl->it_offset = parms->itc_offset; + tbl->it_index = parms->itc_index; + tbl->it_blocksize = 1; + tbl->it_type = TCE_PCI; + + kfree(parms); +} + + +void iommu_devnode_init_iSeries(struct iSeries_Device_Node *dn) +{ + struct iommu_table *tbl; + + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + iommu_table_getparms(dn, tbl); + + /* Look for existing tce table */ + dn->iommu_table = iommu_table_find(tbl); + if (dn->iommu_table == NULL) + dn->iommu_table = iommu_init_table(tbl); + else + kfree(tbl); +} + +static void iommu_dev_setup_iSeries(struct pci_dev *dev) { } +static void iommu_bus_setup_iSeries(struct pci_bus *bus) { } + +void iommu_init_early_iSeries(void) +{ + ppc_md.tce_build = tce_build_iSeries; + ppc_md.tce_free = tce_free_iSeries; + + ppc_md.iommu_dev_setup = iommu_dev_setup_iSeries; + ppc_md.iommu_bus_setup = iommu_bus_setup_iSeries; + + pci_iommu_init(); +} diff --git a/arch/ppc64/kernel/iSeries_irq.c b/arch/ppc64/kernel/iSeries_irq.c new file mode 100644 index 00000000000..f831d259dbb --- /dev/null +++ b/arch/ppc64/kernel/iSeries_irq.c @@ -0,0 +1,209 @@ +/************************************************************************/ +/* This module supports the iSeries PCI bus interrupt handling */ +/* Copyright (C) 20yy */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +/* Change Activity: */ +/* Created, December 13, 2000 by Wayne Holm */ +/* End Change Activity */ +/************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +static unsigned int iSeries_startup_IRQ(unsigned int irq); +static void iSeries_shutdown_IRQ(unsigned int irq); +static void iSeries_enable_IRQ(unsigned int irq); +static void iSeries_disable_IRQ(unsigned int irq); +static void iSeries_end_IRQ(unsigned int irq); + +static hw_irq_controller iSeries_IRQ_handler = { + .typename = "iSeries irq controller", + .startup = iSeries_startup_IRQ, + .shutdown = iSeries_shutdown_IRQ, + .enable = iSeries_enable_IRQ, + .disable = iSeries_disable_IRQ, + .end = iSeries_end_IRQ +}; + +/* This maps virtual irq numbers to real irqs */ +unsigned int virt_irq_to_real_map[NR_IRQS]; + +/* The next available virtual irq number */ +/* Note: the pcnet32 driver assumes irq numbers < 2 aren't valid. :( */ +static int next_virtual_irq = 2; + +/* This is called by init_IRQ. set in ppc_md.init_IRQ by iSeries_setup.c */ +void __init iSeries_init_IRQ(void) +{ + /* Register PCI event handler and open an event path */ + XmPciLpEvent_init(); +} + +/* + * This is called out of iSeries_scan_slot to allocate an IRQ for an EADS slot + * It calculates the irq value for the slot. + * Note that subBusNumber is always 0 (at the moment at least). + */ +int __init iSeries_allocate_IRQ(HvBusNumber busNumber, + HvSubBusNumber subBusNumber, HvAgentId deviceId) +{ + unsigned int realirq, virtirq; + u8 idsel = (deviceId >> 4); + u8 function = deviceId & 7; + + virtirq = next_virtual_irq++; + realirq = ((busNumber - 1) << 6) + ((idsel - 1) << 3) + function; + virt_irq_to_real_map[virtirq] = realirq; + + irq_desc[virtirq].handler = &iSeries_IRQ_handler; + return virtirq; +} + +#define REAL_IRQ_TO_BUS(irq) ((((irq) >> 6) & 0xff) + 1) +#define REAL_IRQ_TO_IDSEL(irq) ((((irq) >> 3) & 7) + 1) +#define REAL_IRQ_TO_FUNC(irq) ((irq) & 7) + +/* This is called by iSeries_activate_IRQs */ +static unsigned int iSeries_startup_IRQ(unsigned int irq) +{ + u32 bus, deviceId, function, mask; + const u32 subBus = 0; + unsigned int rirq = virt_irq_to_real_map[irq]; + + bus = REAL_IRQ_TO_BUS(rirq); + function = REAL_IRQ_TO_FUNC(rirq); + deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function; + + /* Link the IRQ number to the bridge */ + HvCallXm_connectBusUnit(bus, subBus, deviceId, irq); + + /* Unmask bridge interrupts in the FISR */ + mask = 0x01010000 << function; + HvCallPci_unmaskFisr(bus, subBus, deviceId, mask); + iSeries_enable_IRQ(irq); + return 0; +} + +/* + * This is called out of iSeries_fixup to activate interrupt + * generation for usable slots + */ +void __init iSeries_activate_IRQs() +{ + int irq; + unsigned long flags; + + for_each_irq (irq) { + irq_desc_t *desc = get_irq_desc(irq); + + if (desc && desc->handler && desc->handler->startup) { + spin_lock_irqsave(&desc->lock, flags); + desc->handler->startup(irq); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* this is not called anywhere currently */ +static void iSeries_shutdown_IRQ(unsigned int irq) +{ + u32 bus, deviceId, function, mask; + const u32 subBus = 0; + unsigned int rirq = virt_irq_to_real_map[irq]; + + /* irq should be locked by the caller */ + bus = REAL_IRQ_TO_BUS(rirq); + function = REAL_IRQ_TO_FUNC(rirq); + deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function; + + /* Invalidate the IRQ number in the bridge */ + HvCallXm_connectBusUnit(bus, subBus, deviceId, 0); + + /* Mask bridge interrupts in the FISR */ + mask = 0x01010000 << function; + HvCallPci_maskFisr(bus, subBus, deviceId, mask); +} + +/* + * This will be called by device drivers (via disable_IRQ) + * to disable INTA in the bridge interrupt status register. + */ +static void iSeries_disable_IRQ(unsigned int irq) +{ + u32 bus, deviceId, function, mask; + const u32 subBus = 0; + unsigned int rirq = virt_irq_to_real_map[irq]; + + /* The IRQ has already been locked by the caller */ + bus = REAL_IRQ_TO_BUS(rirq); + function = REAL_IRQ_TO_FUNC(rirq); + deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function; + + /* Mask secondary INTA */ + mask = 0x80000000; + HvCallPci_maskInterrupts(bus, subBus, deviceId, mask); + PPCDBG(PPCDBG_BUSWALK, "iSeries_disable_IRQ 0x%02X.%02X.%02X 0x%04X\n", + bus, subBus, deviceId, irq); +} + +/* + * This will be called by device drivers (via enable_IRQ) + * to enable INTA in the bridge interrupt status register. + */ +static void iSeries_enable_IRQ(unsigned int irq) +{ + u32 bus, deviceId, function, mask; + const u32 subBus = 0; + unsigned int rirq = virt_irq_to_real_map[irq]; + + /* The IRQ has already been locked by the caller */ + bus = REAL_IRQ_TO_BUS(rirq); + function = REAL_IRQ_TO_FUNC(rirq); + deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function; + + /* Unmask secondary INTA */ + mask = 0x80000000; + HvCallPci_unmaskInterrupts(bus, subBus, deviceId, mask); + PPCDBG(PPCDBG_BUSWALK, "iSeries_enable_IRQ 0x%02X.%02X.%02X 0x%04X\n", + bus, subBus, deviceId, irq); +} + +/* + * Need to define this so ppc_irq_dispatch_handler will NOT call + * enable_IRQ at the end of interrupt handling. However, this does + * nothing because there is not enough information provided to do + * the EOI HvCall. This is done by XmPciLpEvent.c + */ +static void iSeries_end_IRQ(unsigned int irq) +{ +} diff --git a/arch/ppc64/kernel/iSeries_pci.c b/arch/ppc64/kernel/iSeries_pci.c new file mode 100644 index 00000000000..bd4c2554f1a --- /dev/null +++ b/arch/ppc64/kernel/iSeries_pci.c @@ -0,0 +1,912 @@ +/* + * iSeries_pci.c + * + * Copyright (C) 2001 Allan Trautman, IBM Corporation + * + * iSeries specific routines for PCI. + * + * Based on code from pci.c and iSeries_pci.c 32bit + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "pci.h" + +extern unsigned long io_page_mask; + +/* + * Forward declares of prototypes. + */ +static struct iSeries_Device_Node *find_Device_Node(int bus, int devfn); +static void scan_PHB_slots(struct pci_controller *Phb); +static void scan_EADS_bridge(HvBusNumber Bus, HvSubBusNumber SubBus, int IdSel); +static int scan_bridge_slot(HvBusNumber Bus, struct HvCallPci_BridgeInfo *Info); + +LIST_HEAD(iSeries_Global_Device_List); + +static int DeviceCount; + +/* Counters and control flags. */ +static long Pci_Io_Read_Count; +static long Pci_Io_Write_Count; +#if 0 +static long Pci_Cfg_Read_Count; +static long Pci_Cfg_Write_Count; +#endif +static long Pci_Error_Count; + +static int Pci_Retry_Max = 3; /* Only retry 3 times */ +static int Pci_Error_Flag = 1; /* Set Retry Error on. */ + +static struct pci_ops iSeries_pci_ops; + +/* + * Table defines + * Each Entry size is 4 MB * 1024 Entries = 4GB I/O address space. + */ +#define IOMM_TABLE_MAX_ENTRIES 1024 +#define IOMM_TABLE_ENTRY_SIZE 0x0000000000400000UL +#define BASE_IO_MEMORY 0xE000000000000000UL + +static unsigned long max_io_memory = 0xE000000000000000UL; +static long current_iomm_table_entry; + +/* + * Lookup Tables. + */ +static struct iSeries_Device_Node **iomm_table; +static u8 *iobar_table; + +/* + * Static and Global variables + */ +static char *pci_io_text = "iSeries PCI I/O"; +static DEFINE_SPINLOCK(iomm_table_lock); + +/* + * iomm_table_initialize + * + * Allocates and initalizes the Address Translation Table and Bar + * Tables to get them ready for use. Must be called before any + * I/O space is handed out to the device BARs. + */ +static void iomm_table_initialize(void) +{ + spin_lock(&iomm_table_lock); + iomm_table = kmalloc(sizeof(*iomm_table) * IOMM_TABLE_MAX_ENTRIES, + GFP_KERNEL); + iobar_table = kmalloc(sizeof(*iobar_table) * IOMM_TABLE_MAX_ENTRIES, + GFP_KERNEL); + spin_unlock(&iomm_table_lock); + if ((iomm_table == NULL) || (iobar_table == NULL)) + panic("PCI: I/O tables allocation failed.\n"); +} + +/* + * iomm_table_allocate_entry + * + * Adds pci_dev entry in address translation table + * + * - Allocates the number of entries required in table base on BAR + * size. + * - Allocates starting at BASE_IO_MEMORY and increases. + * - The size is round up to be a multiple of entry size. + * - CurrentIndex is incremented to keep track of the last entry. + * - Builds the resource entry for allocated BARs. + */ +static void iomm_table_allocate_entry(struct pci_dev *dev, int bar_num) +{ + struct resource *bar_res = &dev->resource[bar_num]; + long bar_size = pci_resource_len(dev, bar_num); + + /* + * No space to allocate, quick exit, skip Allocation. + */ + if (bar_size == 0) + return; + /* + * Set Resource values. + */ + spin_lock(&iomm_table_lock); + bar_res->name = pci_io_text; + bar_res->start = + IOMM_TABLE_ENTRY_SIZE * current_iomm_table_entry; + bar_res->start += BASE_IO_MEMORY; + bar_res->end = bar_res->start + bar_size - 1; + /* + * Allocate the number of table entries needed for BAR. + */ + while (bar_size > 0 ) { + iomm_table[current_iomm_table_entry] = dev->sysdata; + iobar_table[current_iomm_table_entry] = bar_num; + bar_size -= IOMM_TABLE_ENTRY_SIZE; + ++current_iomm_table_entry; + } + max_io_memory = BASE_IO_MEMORY + + (IOMM_TABLE_ENTRY_SIZE * current_iomm_table_entry); + spin_unlock(&iomm_table_lock); +} + +/* + * allocate_device_bars + * + * - Allocates ALL pci_dev BAR's and updates the resources with the + * BAR value. BARS with zero length will have the resources + * The HvCallPci_getBarParms is used to get the size of the BAR + * space. It calls iomm_table_allocate_entry to allocate + * each entry. + * - Loops through The Bar resources(0 - 5) including the ROM + * is resource(6). + */ +static void allocate_device_bars(struct pci_dev *dev) +{ + struct resource *bar_res; + int bar_num; + + for (bar_num = 0; bar_num <= PCI_ROM_RESOURCE; ++bar_num) { + bar_res = &dev->resource[bar_num]; + iomm_table_allocate_entry(dev, bar_num); + } +} + +/* + * Log error information to system console. + * Filter out the device not there errors. + * PCI: EADs Connect Failed 0x18.58.10 Rc: 0x00xx + * PCI: Read Vendor Failed 0x18.58.10 Rc: 0x00xx + * PCI: Connect Bus Unit Failed 0x18.58.10 Rc: 0x00xx + */ +static void pci_Log_Error(char *Error_Text, int Bus, int SubBus, + int AgentId, int HvRc) +{ + if (HvRc == 0x0302) + return; + printk(KERN_ERR "PCI: %s Failed: 0x%02X.%02X.%02X Rc: 0x%04X", + Error_Text, Bus, SubBus, AgentId, HvRc); +} + +/* + * build_device_node(u16 Bus, int SubBus, u8 DevFn) + */ +static struct iSeries_Device_Node *build_device_node(HvBusNumber Bus, + HvSubBusNumber SubBus, int AgentId, int Function) +{ + struct iSeries_Device_Node *node; + + PPCDBG(PPCDBG_BUSWALK, + "-build_device_node 0x%02X.%02X.%02X Function: %02X\n", + Bus, SubBus, AgentId, Function); + + node = kmalloc(sizeof(struct iSeries_Device_Node), GFP_KERNEL); + if (node == NULL) + return NULL; + + memset(node, 0, sizeof(struct iSeries_Device_Node)); + list_add_tail(&node->Device_List, &iSeries_Global_Device_List); +#if 0 + node->DsaAddr = ((u64)Bus << 48) + ((u64)SubBus << 40) + ((u64)0x10 << 32); +#endif + node->DsaAddr.DsaAddr = 0; + node->DsaAddr.Dsa.busNumber = Bus; + node->DsaAddr.Dsa.subBusNumber = SubBus; + node->DsaAddr.Dsa.deviceId = 0x10; + node->AgentId = AgentId; + node->DevFn = PCI_DEVFN(ISERIES_ENCODE_DEVICE(AgentId), Function); + node->IoRetry = 0; + iSeries_Get_Location_Code(node); + return node; +} + +/* + * unsigned long __init find_and_init_phbs(void) + * + * Description: + * This function checks for all possible system PCI host bridges that connect + * PCI buses. The system hypervisor is queried as to the guest partition + * ownership status. A pci_controller is built for any bus which is partially + * owned or fully owned by this guest partition. + */ +unsigned long __init find_and_init_phbs(void) +{ + struct pci_controller *phb; + HvBusNumber bus; + + PPCDBG(PPCDBG_BUSWALK, "find_and_init_phbs Entry\n"); + + /* Check all possible buses. */ + for (bus = 0; bus < 256; bus++) { + int ret = HvCallXm_testBus(bus); + if (ret == 0) { + printk("bus %d appears to exist\n", bus); + + phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), GFP_KERNEL); + if (phb == NULL) + return -ENOMEM; + pci_setup_pci_controller(phb); + + phb->pci_mem_offset = phb->local_number = bus; + phb->first_busno = bus; + phb->last_busno = bus; + phb->ops = &iSeries_pci_ops; + + PPCDBG(PPCDBG_BUSWALK, "PCI:Create iSeries pci_controller(%p), Bus: %04X\n", + phb, bus); + + /* Find and connect the devices. */ + scan_PHB_slots(phb); + } + /* + * Check for Unexpected Return code, a clue that something + * has gone wrong. + */ + else if (ret != 0x0301) + printk(KERN_ERR "Unexpected Return on Probe(0x%04X): 0x%04X", + bus, ret); + } + return 0; +} + +/* + * iSeries_pcibios_init + * + * Chance to initialize and structures or variable before PCI Bus walk. + */ +void iSeries_pcibios_init(void) +{ + PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Entry.\n"); + iomm_table_initialize(); + find_and_init_phbs(); + io_page_mask = -1; + PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Exit.\n"); +} + +/* + * iSeries_pci_final_fixup(void) + */ +void __init iSeries_pci_final_fixup(void) +{ + struct pci_dev *pdev = NULL; + struct iSeries_Device_Node *node; + char Buffer[256]; + int DeviceCount = 0; + + PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup Entry.\n"); + + /* Fix up at the device node and pci_dev relationship */ + mf_display_src(0xC9000100); + + printk("pcibios_final_fixup\n"); + for_each_pci_dev(pdev) { + node = find_Device_Node(pdev->bus->number, pdev->devfn); + printk("pci dev %p (%x.%x), node %p\n", pdev, + pdev->bus->number, pdev->devfn, node); + + if (node != NULL) { + ++DeviceCount; + pdev->sysdata = (void *)node; + node->PciDev = pdev; + PPCDBG(PPCDBG_BUSWALK, + "pdev 0x%p <==> DevNode 0x%p\n", + pdev, node); + allocate_device_bars(pdev); + iSeries_Device_Information(pdev, Buffer, + sizeof(Buffer)); + printk("%d. %s\n", DeviceCount, Buffer); + iommu_devnode_init_iSeries(node); + } else + printk("PCI: Device Tree not found for 0x%016lX\n", + (unsigned long)pdev); + pdev->irq = node->Irq; + } + iSeries_activate_IRQs(); + mf_display_src(0xC9000200); +} + +void pcibios_fixup_bus(struct pci_bus *PciBus) +{ + PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup_bus(0x%04X) Entry.\n", + PciBus->number); +} + +void pcibios_fixup_resources(struct pci_dev *pdev) +{ + PPCDBG(PPCDBG_BUSWALK, "fixup_resources pdev %p\n", pdev); +} + +/* + * Loop through each node function to find usable EADs bridges. + */ +static void scan_PHB_slots(struct pci_controller *Phb) +{ + struct HvCallPci_DeviceInfo *DevInfo; + HvBusNumber bus = Phb->local_number; /* System Bus */ + const HvSubBusNumber SubBus = 0; /* EADs is always 0. */ + int HvRc = 0; + int IdSel; + const int MaxAgents = 8; + + DevInfo = (struct HvCallPci_DeviceInfo*) + kmalloc(sizeof(struct HvCallPci_DeviceInfo), GFP_KERNEL); + if (DevInfo == NULL) + return; + + /* + * Probe for EADs Bridges + */ + for (IdSel = 1; IdSel < MaxAgents; ++IdSel) { + HvRc = HvCallPci_getDeviceInfo(bus, SubBus, IdSel, + ISERIES_HV_ADDR(DevInfo), + sizeof(struct HvCallPci_DeviceInfo)); + if (HvRc == 0) { + if (DevInfo->deviceType == HvCallPci_NodeDevice) + scan_EADS_bridge(bus, SubBus, IdSel); + else + printk("PCI: Invalid System Configuration(0x%02X)" + " for bus 0x%02x id 0x%02x.\n", + DevInfo->deviceType, bus, IdSel); + } + else + pci_Log_Error("getDeviceInfo", bus, SubBus, IdSel, HvRc); + } + kfree(DevInfo); +} + +static void scan_EADS_bridge(HvBusNumber bus, HvSubBusNumber SubBus, + int IdSel) +{ + struct HvCallPci_BridgeInfo *BridgeInfo; + HvAgentId AgentId; + int Function; + int HvRc; + + BridgeInfo = (struct HvCallPci_BridgeInfo *) + kmalloc(sizeof(struct HvCallPci_BridgeInfo), GFP_KERNEL); + if (BridgeInfo == NULL) + return; + + /* Note: hvSubBus and irq is always be 0 at this level! */ + for (Function = 0; Function < 8; ++Function) { + AgentId = ISERIES_PCI_AGENTID(IdSel, Function); + HvRc = HvCallXm_connectBusUnit(bus, SubBus, AgentId, 0); + if (HvRc == 0) { + printk("found device at bus %d idsel %d func %d (AgentId %x)\n", + bus, IdSel, Function, AgentId); + /* Connect EADs: 0x18.00.12 = 0x00 */ + PPCDBG(PPCDBG_BUSWALK, + "PCI:Connect EADs: 0x%02X.%02X.%02X\n", + bus, SubBus, AgentId); + HvRc = HvCallPci_getBusUnitInfo(bus, SubBus, AgentId, + ISERIES_HV_ADDR(BridgeInfo), + sizeof(struct HvCallPci_BridgeInfo)); + if (HvRc == 0) { + printk("bridge info: type %x subbus %x maxAgents %x maxsubbus %x logslot %x\n", + BridgeInfo->busUnitInfo.deviceType, + BridgeInfo->subBusNumber, + BridgeInfo->maxAgents, + BridgeInfo->maxSubBusNumber, + BridgeInfo->logicalSlotNumber); + PPCDBG(PPCDBG_BUSWALK, + "PCI: BridgeInfo, Type:0x%02X, SubBus:0x%02X, MaxAgents:0x%02X, MaxSubBus: 0x%02X, LSlot: 0x%02X\n", + BridgeInfo->busUnitInfo.deviceType, + BridgeInfo->subBusNumber, + BridgeInfo->maxAgents, + BridgeInfo->maxSubBusNumber, + BridgeInfo->logicalSlotNumber); + + if (BridgeInfo->busUnitInfo.deviceType == + HvCallPci_BridgeDevice) { + /* Scan_Bridge_Slot...: 0x18.00.12 */ + scan_bridge_slot(bus, BridgeInfo); + } else + printk("PCI: Invalid Bridge Configuration(0x%02X)", + BridgeInfo->busUnitInfo.deviceType); + } + } else if (HvRc != 0x000B) + pci_Log_Error("EADs Connect", + bus, SubBus, AgentId, HvRc); + } + kfree(BridgeInfo); +} + +/* + * This assumes that the node slot is always on the primary bus! + */ +static int scan_bridge_slot(HvBusNumber Bus, + struct HvCallPci_BridgeInfo *BridgeInfo) +{ + struct iSeries_Device_Node *node; + HvSubBusNumber SubBus = BridgeInfo->subBusNumber; + u16 VendorId = 0; + int HvRc = 0; + u8 Irq = 0; + int IdSel = ISERIES_GET_DEVICE_FROM_SUBBUS(SubBus); + int Function = ISERIES_GET_FUNCTION_FROM_SUBBUS(SubBus); + HvAgentId EADsIdSel = ISERIES_PCI_AGENTID(IdSel, Function); + + /* iSeries_allocate_IRQ.: 0x18.00.12(0xA3) */ + Irq = iSeries_allocate_IRQ(Bus, 0, EADsIdSel); + PPCDBG(PPCDBG_BUSWALK, + "PCI:- allocate and assign IRQ 0x%02X.%02X.%02X = 0x%02X\n", + Bus, 0, EADsIdSel, Irq); + + /* + * Connect all functions of any device found. + */ + for (IdSel = 1; IdSel <= BridgeInfo->maxAgents; ++IdSel) { + for (Function = 0; Function < 8; ++Function) { + HvAgentId AgentId = ISERIES_PCI_AGENTID(IdSel, Function); + HvRc = HvCallXm_connectBusUnit(Bus, SubBus, + AgentId, Irq); + if (HvRc != 0) { + pci_Log_Error("Connect Bus Unit", + Bus, SubBus, AgentId, HvRc); + continue; + } + + HvRc = HvCallPci_configLoad16(Bus, SubBus, AgentId, + PCI_VENDOR_ID, &VendorId); + if (HvRc != 0) { + pci_Log_Error("Read Vendor", + Bus, SubBus, AgentId, HvRc); + continue; + } + printk("read vendor ID: %x\n", VendorId); + + /* FoundDevice: 0x18.28.10 = 0x12AE */ + PPCDBG(PPCDBG_BUSWALK, + "PCI:- FoundDevice: 0x%02X.%02X.%02X = 0x%04X, irq %d\n", + Bus, SubBus, AgentId, VendorId, Irq); + HvRc = HvCallPci_configStore8(Bus, SubBus, AgentId, + PCI_INTERRUPT_LINE, Irq); + if (HvRc != 0) + pci_Log_Error("PciCfgStore Irq Failed!", + Bus, SubBus, AgentId, HvRc); + + ++DeviceCount; + node = build_device_node(Bus, SubBus, EADsIdSel, Function); + node->Vendor = VendorId; + node->Irq = Irq; + node->LogicalSlot = BridgeInfo->logicalSlotNumber; + + } /* for (Function = 0; Function < 8; ++Function) */ + } /* for (IdSel = 1; IdSel <= MaxAgents; ++IdSel) */ + return HvRc; +} + +/* + * I/0 Memory copy MUST use mmio commands on iSeries + * To do; For performance, include the hv call directly + */ +void iSeries_memset_io(volatile void __iomem *dest, char c, size_t Count) +{ + u8 ByteValue = c; + long NumberOfBytes = Count; + + while (NumberOfBytes > 0) { + iSeries_Write_Byte(ByteValue, dest++); + -- NumberOfBytes; + } +} +EXPORT_SYMBOL(iSeries_memset_io); + +void iSeries_memcpy_toio(volatile void __iomem *dest, void *source, size_t count) +{ + char *src = source; + long NumberOfBytes = count; + + while (NumberOfBytes > 0) { + iSeries_Write_Byte(*src++, dest++); + -- NumberOfBytes; + } +} +EXPORT_SYMBOL(iSeries_memcpy_toio); + +void iSeries_memcpy_fromio(void *dest, const volatile void __iomem *src, size_t count) +{ + char *dst = dest; + long NumberOfBytes = count; + + while (NumberOfBytes > 0) { + *dst++ = iSeries_Read_Byte(src++); + -- NumberOfBytes; + } +} +EXPORT_SYMBOL(iSeries_memcpy_fromio); + +/* + * Look down the chain to find the matching Device Device + */ +static struct iSeries_Device_Node *find_Device_Node(int bus, int devfn) +{ + struct list_head *pos; + + list_for_each(pos, &iSeries_Global_Device_List) { + struct iSeries_Device_Node *node = + list_entry(pos, struct iSeries_Device_Node, Device_List); + + if ((bus == ISERIES_BUS(node)) && (devfn == node->DevFn)) + return node; + } + return NULL; +} + +#if 0 +/* + * Returns the device node for the passed pci_dev + * Sanity Check Node PciDev to passed pci_dev + * If none is found, returns a NULL which the client must handle. + */ +static struct iSeries_Device_Node *get_Device_Node(struct pci_dev *pdev) +{ + struct iSeries_Device_Node *node; + + node = pdev->sysdata; + if (node == NULL || node->PciDev != pdev) + node = find_Device_Node(pdev->bus->number, pdev->devfn); + return node; +} +#endif + +/* + * Config space read and write functions. + * For now at least, we look for the device node for the bus and devfn + * that we are asked to access. It may be possible to translate the devfn + * to a subbus and deviceid more directly. + */ +static u64 hv_cfg_read_func[4] = { + HvCallPciConfigLoad8, HvCallPciConfigLoad16, + HvCallPciConfigLoad32, HvCallPciConfigLoad32 +}; + +static u64 hv_cfg_write_func[4] = { + HvCallPciConfigStore8, HvCallPciConfigStore16, + HvCallPciConfigStore32, HvCallPciConfigStore32 +}; + +/* + * Read PCI config space + */ +static int iSeries_pci_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int size, u32 *val) +{ + struct iSeries_Device_Node *node = find_Device_Node(bus->number, devfn); + u64 fn; + struct HvCallPci_LoadReturn ret; + + if (node == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset > 255) { + *val = ~0; + return PCIBIOS_BAD_REGISTER_NUMBER; + } + + fn = hv_cfg_read_func[(size - 1) & 3]; + HvCall3Ret16(fn, &ret, node->DsaAddr.DsaAddr, offset, 0); + + if (ret.rc != 0) { + *val = ~0; + return PCIBIOS_DEVICE_NOT_FOUND; /* or something */ + } + + *val = ret.value; + return 0; +} + +/* + * Write PCI config space + */ + +static int iSeries_pci_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int size, u32 val) +{ + struct iSeries_Device_Node *node = find_Device_Node(bus->number, devfn); + u64 fn; + u64 ret; + + if (node == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset > 255) + return PCIBIOS_BAD_REGISTER_NUMBER; + + fn = hv_cfg_write_func[(size - 1) & 3]; + ret = HvCall4(fn, node->DsaAddr.DsaAddr, offset, val, 0); + + if (ret != 0) + return PCIBIOS_DEVICE_NOT_FOUND; + + return 0; +} + +static struct pci_ops iSeries_pci_ops = { + .read = iSeries_pci_read_config, + .write = iSeries_pci_write_config +}; + +/* + * Check Return Code + * -> On Failure, print and log information. + * Increment Retry Count, if exceeds max, panic partition. + * -> If in retry, print and log success + * + * PCI: Device 23.90 ReadL I/O Error( 0): 0x1234 + * PCI: Device 23.90 ReadL Retry( 1) + * PCI: Device 23.90 ReadL Retry Successful(1) + */ +static int CheckReturnCode(char *TextHdr, struct iSeries_Device_Node *DevNode, + u64 ret) +{ + if (ret != 0) { + ++Pci_Error_Count; + ++DevNode->IoRetry; + printk("PCI: %s: Device 0x%04X:%02X I/O Error(%2d): 0x%04X\n", + TextHdr, DevNode->DsaAddr.Dsa.busNumber, DevNode->DevFn, + DevNode->IoRetry, (int)ret); + /* + * Bump the retry and check for retry count exceeded. + * If, Exceeded, panic the system. + */ + if ((DevNode->IoRetry > Pci_Retry_Max) && + (Pci_Error_Flag > 0)) { + mf_display_src(0xB6000103); + panic_timeout = 0; + panic("PCI: Hardware I/O Error, SRC B6000103, " + "Automatic Reboot Disabled.\n"); + } + return -1; /* Retry Try */ + } + /* If retry was in progress, log success and rest retry count */ + if (DevNode->IoRetry > 0) + DevNode->IoRetry = 0; + return 0; +} + +/* + * Translate the I/O Address into a device node, bar, and bar offset. + * Note: Make sure the passed variable end up on the stack to avoid + * the exposure of being device global. + */ +static inline struct iSeries_Device_Node *xlate_iomm_address( + const volatile void __iomem *IoAddress, + u64 *dsaptr, u64 *BarOffsetPtr) +{ + unsigned long OrigIoAddr; + unsigned long BaseIoAddr; + unsigned long TableIndex; + struct iSeries_Device_Node *DevNode; + + OrigIoAddr = (unsigned long __force)IoAddress; + if ((OrigIoAddr < BASE_IO_MEMORY) || (OrigIoAddr >= max_io_memory)) + return NULL; + BaseIoAddr = OrigIoAddr - BASE_IO_MEMORY; + TableIndex = BaseIoAddr / IOMM_TABLE_ENTRY_SIZE; + DevNode = iomm_table[TableIndex]; + + if (DevNode != NULL) { + int barnum = iobar_table[TableIndex]; + *dsaptr = DevNode->DsaAddr.DsaAddr | (barnum << 24); + *BarOffsetPtr = BaseIoAddr % IOMM_TABLE_ENTRY_SIZE; + } else + panic("PCI: Invalid PCI IoAddress detected!\n"); + return DevNode; +} + +/* + * Read MM I/O Instructions for the iSeries + * On MM I/O error, all ones are returned and iSeries_pci_IoError is cal + * else, data is returned in big Endian format. + * + * iSeries_Read_Byte = Read Byte ( 8 bit) + * iSeries_Read_Word = Read Word (16 bit) + * iSeries_Read_Long = Read Long (32 bit) + */ +u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + struct HvCallPci_LoadReturn ret; + struct iSeries_Device_Node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Read_Byte: invalid access at IO address %p\n", IoAddress); + return 0xff; + } + do { + ++Pci_Io_Read_Count; + HvCall3Ret16(HvCallPciBarLoad8, &ret, dsa, BarOffset, 0); + } while (CheckReturnCode("RDB", DevNode, ret.rc) != 0); + + return (u8)ret.value; +} +EXPORT_SYMBOL(iSeries_Read_Byte); + +u16 iSeries_Read_Word(const volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + struct HvCallPci_LoadReturn ret; + struct iSeries_Device_Node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Read_Word: invalid access at IO address %p\n", IoAddress); + return 0xffff; + } + do { + ++Pci_Io_Read_Count; + HvCall3Ret16(HvCallPciBarLoad16, &ret, dsa, + BarOffset, 0); + } while (CheckReturnCode("RDW", DevNode, ret.rc) != 0); + + return swab16((u16)ret.value); +} +EXPORT_SYMBOL(iSeries_Read_Word); + +u32 iSeries_Read_Long(const volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + struct HvCallPci_LoadReturn ret; + struct iSeries_Device_Node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Read_Long: invalid access at IO address %p\n", IoAddress); + return 0xffffffff; + } + do { + ++Pci_Io_Read_Count; + HvCall3Ret16(HvCallPciBarLoad32, &ret, dsa, + BarOffset, 0); + } while (CheckReturnCode("RDL", DevNode, ret.rc) != 0); + + return swab32((u32)ret.value); +} +EXPORT_SYMBOL(iSeries_Read_Long); + +/* + * Write MM I/O Instructions for the iSeries + * + * iSeries_Write_Byte = Write Byte (8 bit) + * iSeries_Write_Word = Write Word(16 bit) + * iSeries_Write_Long = Write Long(32 bit) + */ +void iSeries_Write_Byte(u8 data, volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + u64 rc; + struct iSeries_Device_Node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Write_Byte: invalid access at IO address %p\n", IoAddress); + return; + } + do { + ++Pci_Io_Write_Count; + rc = HvCall4(HvCallPciBarStore8, dsa, BarOffset, data, 0); + } while (CheckReturnCode("WWB", DevNode, rc) != 0); +} +EXPORT_SYMBOL(iSeries_Write_Byte); + +void iSeries_Write_Word(u16 data, volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + u64 rc; + struct iSeries_Device_Node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Write_Word: invalid access at IO address %p\n", IoAddress); + return; + } + do { + ++Pci_Io_Write_Count; + rc = HvCall4(HvCallPciBarStore16, dsa, BarOffset, swab16(data), 0); + } while (CheckReturnCode("WWW", DevNode, rc) != 0); +} +EXPORT_SYMBOL(iSeries_Write_Word); + +void iSeries_Write_Long(u32 data, volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + u64 rc; + struct iSeries_Device_Node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Write_Long: invalid access at IO address %p\n", IoAddress); + return; + } + do { + ++Pci_Io_Write_Count; + rc = HvCall4(HvCallPciBarStore32, dsa, BarOffset, swab32(data), 0); + } while (CheckReturnCode("WWL", DevNode, rc) != 0); +} +EXPORT_SYMBOL(iSeries_Write_Long); diff --git a/arch/ppc64/kernel/iSeries_pci_reset.c b/arch/ppc64/kernel/iSeries_pci_reset.c new file mode 100644 index 00000000000..0f785e4584f --- /dev/null +++ b/arch/ppc64/kernel/iSeries_pci_reset.c @@ -0,0 +1,104 @@ +#define PCIFR(...) +/************************************************************************/ +/* File iSeries_pci_reset.c created by Allan Trautman on Mar 21 2001. */ +/************************************************************************/ +/* This code supports the pci interface on the IBM iSeries systems. */ +/* Copyright (C) 20yy */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the: */ +/* Free Software Foundation, Inc., */ +/* 59 Temple Place, Suite 330, */ +/* Boston, MA 02111-1307 USA */ +/************************************************************************/ +/* Change Activity: */ +/* Created, March 20, 2001 */ +/* April 30, 2001, Added return codes on functions. */ +/* September 10, 2001, Ported to ppc64. */ +/* End Change Activity */ +/************************************************************************/ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include "pci.h" + +/* + * Interface to toggle the reset line + * Time is in .1 seconds, need for seconds. + */ +int iSeries_Device_ToggleReset(struct pci_dev *PciDev, int AssertTime, + int DelayTime) +{ + unsigned int AssertDelay, WaitDelay; + struct iSeries_Device_Node *DeviceNode = + (struct iSeries_Device_Node *)PciDev->sysdata; + + if (DeviceNode == NULL) { + printk("PCI: Pci Reset Failed, Device Node not found for pci_dev %p\n", + PciDev); + return -1; + } + /* + * Set defaults, Assert is .5 second, Wait is 3 seconds. + */ + if (AssertTime == 0) + AssertDelay = 500; + else + AssertDelay = AssertTime * 100; + + if (DelayTime == 0) + WaitDelay = 3000; + else + WaitDelay = DelayTime * 100; + + /* + * Assert reset + */ + DeviceNode->ReturnCode = HvCallPci_setSlotReset(ISERIES_BUS(DeviceNode), + 0x00, DeviceNode->AgentId, 1); + if (DeviceNode->ReturnCode == 0) { + msleep(AssertDelay); /* Sleep for the time */ + DeviceNode->ReturnCode = + HvCallPci_setSlotReset(ISERIES_BUS(DeviceNode), + 0x00, DeviceNode->AgentId, 0); + + /* + * Wait for device to reset + */ + msleep(WaitDelay); + } + if (DeviceNode->ReturnCode == 0) + PCIFR("Slot 0x%04X.%02 Reset\n", ISERIES_BUS(DeviceNode), + DeviceNode->AgentId); + else { + printk("PCI: Slot 0x%04X.%02X Reset Failed, RCode: %04X\n", + ISERIES_BUS(DeviceNode), DeviceNode->AgentId, + DeviceNode->ReturnCode); + PCIFR("Slot 0x%04X.%02X Reset Failed, RCode: %04X\n", + ISERIES_BUS(DeviceNode), DeviceNode->AgentId, + DeviceNode->ReturnCode); + } + return DeviceNode->ReturnCode; +} +EXPORT_SYMBOL(iSeries_Device_ToggleReset); diff --git a/arch/ppc64/kernel/iSeries_proc.c b/arch/ppc64/kernel/iSeries_proc.c new file mode 100644 index 00000000000..0cc58ddf48d --- /dev/null +++ b/arch/ppc64/kernel/iSeries_proc.c @@ -0,0 +1,162 @@ +/* + * iSeries_proc.c + * Copyright (C) 2001 Kyle A. Lucke IBM Corporation + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include /* for HZ */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int __init iseries_proc_create(void) +{ + struct proc_dir_entry *e = proc_mkdir("iSeries", 0); + if (!e) + return 1; + + return 0; +} +core_initcall(iseries_proc_create); + +static char *event_types[9] = { + "Hypervisor\t\t", + "Machine Facilities\t", + "Session Manager\t", + "SPD I/O\t\t", + "Virtual Bus\t\t", + "PCI I/O\t\t", + "RIO I/O\t\t", + "Virtual Lan\t\t", + "Virtual I/O\t\t" +}; + +static int proc_lpevents_show(struct seq_file *m, void *v) +{ + unsigned int i; + + seq_printf(m, "LpEventQueue 0\n"); + seq_printf(m, " events processed:\t%lu\n", + (unsigned long)xItLpQueue.xLpIntCount); + + for (i = 0; i < 9; ++i) + seq_printf(m, " %s %10lu\n", event_types[i], + (unsigned long)xItLpQueue.xLpIntCountByType[i]); + + seq_printf(m, "\n events processed by processor:\n"); + + for_each_online_cpu(i) + seq_printf(m, " CPU%02d %10u\n", i, paca[i].lpevent_count); + + return 0; +} + +static int proc_lpevents_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_lpevents_show, NULL); +} + +static struct file_operations proc_lpevents_operations = { + .open = proc_lpevents_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static unsigned long startTitan = 0; +static unsigned long startTb = 0; + +static int proc_titantod_show(struct seq_file *m, void *v) +{ + unsigned long tb0, titan_tod; + + tb0 = get_tb(); + titan_tod = HvCallXm_loadTod(); + + seq_printf(m, "Titan\n" ); + seq_printf(m, " time base = %016lx\n", tb0); + seq_printf(m, " titan tod = %016lx\n", titan_tod); + seq_printf(m, " xProcFreq = %016x\n", + xIoHriProcessorVpd[0].xProcFreq); + seq_printf(m, " xTimeBaseFreq = %016x\n", + xIoHriProcessorVpd[0].xTimeBaseFreq); + seq_printf(m, " tb_ticks_per_jiffy = %lu\n", tb_ticks_per_jiffy); + seq_printf(m, " tb_ticks_per_usec = %lu\n", tb_ticks_per_usec); + + if (!startTitan) { + startTitan = titan_tod; + startTb = tb0; + } else { + unsigned long titan_usec = (titan_tod - startTitan) >> 12; + unsigned long tb_ticks = (tb0 - startTb); + unsigned long titan_jiffies = titan_usec / (1000000/HZ); + unsigned long titan_jiff_usec = titan_jiffies * (1000000/HZ); + unsigned long titan_jiff_rem_usec = titan_usec - titan_jiff_usec; + unsigned long tb_jiffies = tb_ticks / tb_ticks_per_jiffy; + unsigned long tb_jiff_ticks = tb_jiffies * tb_ticks_per_jiffy; + unsigned long tb_jiff_rem_ticks = tb_ticks - tb_jiff_ticks; + unsigned long tb_jiff_rem_usec = tb_jiff_rem_ticks / tb_ticks_per_usec; + unsigned long new_tb_ticks_per_jiffy = (tb_ticks * (1000000/HZ))/titan_usec; + + seq_printf(m, " titan elapsed = %lu uSec\n", titan_usec); + seq_printf(m, " tb elapsed = %lu ticks\n", tb_ticks); + seq_printf(m, " titan jiffies = %lu.%04lu \n", titan_jiffies, + titan_jiff_rem_usec); + seq_printf(m, " tb jiffies = %lu.%04lu\n", tb_jiffies, + tb_jiff_rem_usec); + seq_printf(m, " new tb_ticks_per_jiffy = %lu\n", + new_tb_ticks_per_jiffy); + } + + return 0; +} + +static int proc_titantod_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_titantod_show, NULL); +} + +static struct file_operations proc_titantod_operations = { + .open = proc_titantod_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init iseries_proc_init(void) +{ + struct proc_dir_entry *e; + + e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL); + if (e) + e->proc_fops = &proc_lpevents_operations; + + e = create_proc_entry("iSeries/titanTod", S_IFREG|S_IRUGO, NULL); + if (e) + e->proc_fops = &proc_titantod_operations; + + return 0; +} +__initcall(iseries_proc_init); diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c new file mode 100644 index 00000000000..da20120f226 --- /dev/null +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -0,0 +1,877 @@ +/* + * Copyright (c) 2000 Mike Corrigan + * Copyright (c) 1999-2000 Grant Erickson + * + * Module name: iSeries_setup.c + * + * Description: + * Architecture- / platform-specific boot-time initialization code for + * the IBM iSeries LPAR. Adapted from original code by Grant Erickson and + * code by Gary Thomas, Cort Dougan , and Dan Malek + * . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "iSeries_setup.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void hvlog(char *fmt, ...); + +#ifdef DEBUG +#define DBG(fmt...) hvlog(fmt) +#else +#define DBG(fmt...) +#endif + +/* Function Prototypes */ +extern void ppcdbg_initialize(void); + +static void build_iSeries_Memory_Map(void); +static void setup_iSeries_cache_sizes(void); +static void iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr); +extern void iSeries_pci_final_fixup(void); + +/* Global Variables */ +static unsigned long procFreqHz; +static unsigned long procFreqMhz; +static unsigned long procFreqMhzHundreths; + +static unsigned long tbFreqHz; +static unsigned long tbFreqMhz; +static unsigned long tbFreqMhzHundreths; + +int piranha_simulator; + +extern int rd_size; /* Defined in drivers/block/rd.c */ +extern unsigned long klimit; +extern unsigned long embedded_sysmap_start; +extern unsigned long embedded_sysmap_end; + +extern unsigned long iSeries_recal_tb; +extern unsigned long iSeries_recal_titan; + +static int mf_initialized; + +struct MemoryBlock { + unsigned long absStart; + unsigned long absEnd; + unsigned long logicalStart; + unsigned long logicalEnd; +}; + +/* + * Process the main store vpd to determine where the holes in memory are + * and return the number of physical blocks and fill in the array of + * block data. + */ +static unsigned long iSeries_process_Condor_mainstore_vpd( + struct MemoryBlock *mb_array, unsigned long max_entries) +{ + unsigned long holeFirstChunk, holeSizeChunks; + unsigned long numMemoryBlocks = 1; + struct IoHriMainStoreSegment4 *msVpd = + (struct IoHriMainStoreSegment4 *)xMsVpd; + unsigned long holeStart = msVpd->nonInterleavedBlocksStartAdr; + unsigned long holeEnd = msVpd->nonInterleavedBlocksEndAdr; + unsigned long holeSize = holeEnd - holeStart; + + printk("Mainstore_VPD: Condor\n"); + /* + * Determine if absolute memory has any + * holes so that we can interpret the + * access map we get back from the hypervisor + * correctly. + */ + mb_array[0].logicalStart = 0; + mb_array[0].logicalEnd = 0x100000000; + mb_array[0].absStart = 0; + mb_array[0].absEnd = 0x100000000; + + if (holeSize) { + numMemoryBlocks = 2; + holeStart = holeStart & 0x000fffffffffffff; + holeStart = addr_to_chunk(holeStart); + holeFirstChunk = holeStart; + holeSize = addr_to_chunk(holeSize); + holeSizeChunks = holeSize; + printk( "Main store hole: start chunk = %0lx, size = %0lx chunks\n", + holeFirstChunk, holeSizeChunks ); + mb_array[0].logicalEnd = holeFirstChunk; + mb_array[0].absEnd = holeFirstChunk; + mb_array[1].logicalStart = holeFirstChunk; + mb_array[1].logicalEnd = 0x100000000 - holeSizeChunks; + mb_array[1].absStart = holeFirstChunk + holeSizeChunks; + mb_array[1].absEnd = 0x100000000; + } + return numMemoryBlocks; +} + +#define MaxSegmentAreas 32 +#define MaxSegmentAdrRangeBlocks 128 +#define MaxAreaRangeBlocks 4 + +static unsigned long iSeries_process_Regatta_mainstore_vpd( + struct MemoryBlock *mb_array, unsigned long max_entries) +{ + struct IoHriMainStoreSegment5 *msVpdP = + (struct IoHriMainStoreSegment5 *)xMsVpd; + unsigned long numSegmentBlocks = 0; + u32 existsBits = msVpdP->msAreaExists; + unsigned long area_num; + + printk("Mainstore_VPD: Regatta\n"); + + for (area_num = 0; area_num < MaxSegmentAreas; ++area_num ) { + unsigned long numAreaBlocks; + struct IoHriMainStoreArea4 *currentArea; + + if (existsBits & 0x80000000) { + unsigned long block_num; + + currentArea = &msVpdP->msAreaArray[area_num]; + numAreaBlocks = currentArea->numAdrRangeBlocks; + printk("ms_vpd: processing area %2ld blocks=%ld", + area_num, numAreaBlocks); + for (block_num = 0; block_num < numAreaBlocks; + ++block_num ) { + /* Process an address range block */ + struct MemoryBlock tempBlock; + unsigned long i; + + tempBlock.absStart = + (unsigned long)currentArea->xAdrRangeBlock[block_num].blockStart; + tempBlock.absEnd = + (unsigned long)currentArea->xAdrRangeBlock[block_num].blockEnd; + tempBlock.logicalStart = 0; + tempBlock.logicalEnd = 0; + printk("\n block %ld absStart=%016lx absEnd=%016lx", + block_num, tempBlock.absStart, + tempBlock.absEnd); + + for (i = 0; i < numSegmentBlocks; ++i) { + if (mb_array[i].absStart == + tempBlock.absStart) + break; + } + if (i == numSegmentBlocks) { + if (numSegmentBlocks == max_entries) + panic("iSeries_process_mainstore_vpd: too many memory blocks"); + mb_array[numSegmentBlocks] = tempBlock; + ++numSegmentBlocks; + } else + printk(" (duplicate)"); + } + printk("\n"); + } + existsBits <<= 1; + } + /* Now sort the blocks found into ascending sequence */ + if (numSegmentBlocks > 1) { + unsigned long m, n; + + for (m = 0; m < numSegmentBlocks - 1; ++m) { + for (n = numSegmentBlocks - 1; m < n; --n) { + if (mb_array[n].absStart < + mb_array[n-1].absStart) { + struct MemoryBlock tempBlock; + + tempBlock = mb_array[n]; + mb_array[n] = mb_array[n-1]; + mb_array[n-1] = tempBlock; + } + } + } + } + /* + * Assign "logical" addresses to each block. These + * addresses correspond to the hypervisor "bitmap" space. + * Convert all addresses into units of 256K chunks. + */ + { + unsigned long i, nextBitmapAddress; + + printk("ms_vpd: %ld sorted memory blocks\n", numSegmentBlocks); + nextBitmapAddress = 0; + for (i = 0; i < numSegmentBlocks; ++i) { + unsigned long length = mb_array[i].absEnd - + mb_array[i].absStart; + + mb_array[i].logicalStart = nextBitmapAddress; + mb_array[i].logicalEnd = nextBitmapAddress + length; + nextBitmapAddress += length; + printk(" Bitmap range: %016lx - %016lx\n" + " Absolute range: %016lx - %016lx\n", + mb_array[i].logicalStart, + mb_array[i].logicalEnd, + mb_array[i].absStart, mb_array[i].absEnd); + mb_array[i].absStart = addr_to_chunk(mb_array[i].absStart & + 0x000fffffffffffff); + mb_array[i].absEnd = addr_to_chunk(mb_array[i].absEnd & + 0x000fffffffffffff); + mb_array[i].logicalStart = + addr_to_chunk(mb_array[i].logicalStart); + mb_array[i].logicalEnd = addr_to_chunk(mb_array[i].logicalEnd); + } + } + + return numSegmentBlocks; +} + +static unsigned long iSeries_process_mainstore_vpd(struct MemoryBlock *mb_array, + unsigned long max_entries) +{ + unsigned long i; + unsigned long mem_blocks = 0; + + if (cpu_has_feature(CPU_FTR_SLB)) + mem_blocks = iSeries_process_Regatta_mainstore_vpd(mb_array, + max_entries); + else + mem_blocks = iSeries_process_Condor_mainstore_vpd(mb_array, + max_entries); + + printk("Mainstore_VPD: numMemoryBlocks = %ld \n", mem_blocks); + for (i = 0; i < mem_blocks; ++i) { + printk("Mainstore_VPD: block %3ld logical chunks %016lx - %016lx\n" + " abs chunks %016lx - %016lx\n", + i, mb_array[i].logicalStart, mb_array[i].logicalEnd, + mb_array[i].absStart, mb_array[i].absEnd); + } + return mem_blocks; +} + +static void __init iSeries_get_cmdline(void) +{ + char *p, *q; + + /* copy the command line parameter from the primary VSP */ + HvCallEvent_dmaToSp(cmd_line, 2 * 64* 1024, 256, + HvLpDma_Direction_RemoteToLocal); + + p = cmd_line; + q = cmd_line + 255; + while(p < q) { + if (!*p || *p == '\n') + break; + ++p; + } + *p = 0; +} + +static void __init iSeries_init_early(void) +{ + extern unsigned long memory_limit; + + DBG(" -> iSeries_init_early()\n"); + + ppcdbg_initialize(); + +#if defined(CONFIG_BLK_DEV_INITRD) + /* + * If the init RAM disk has been configured and there is + * a non-zero starting address for it, set it up + */ + if (naca.xRamDisk) { + initrd_start = (unsigned long)__va(naca.xRamDisk); + initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE; + initrd_below_start_ok = 1; // ramdisk in kernel space + ROOT_DEV = Root_RAM0; + if (((rd_size * 1024) / PAGE_SIZE) < naca.xRamDiskSize) + rd_size = (naca.xRamDiskSize * PAGE_SIZE) / 1024; + } else +#endif /* CONFIG_BLK_DEV_INITRD */ + { + /* ROOT_DEV = MKDEV(VIODASD_MAJOR, 1); */ + } + + iSeries_recal_tb = get_tb(); + iSeries_recal_titan = HvCallXm_loadTod(); + + /* + * Cache sizes must be initialized before hpte_init_iSeries is called + * as the later need them for flush_icache_range() + */ + setup_iSeries_cache_sizes(); + + /* + * Initialize the hash table management pointers + */ + hpte_init_iSeries(); + + /* + * Initialize the DMA/TCE management + */ + iommu_init_early_iSeries(); + + /* + * Initialize the table which translate Linux physical addresses to + * AS/400 absolute addresses + */ + build_iSeries_Memory_Map(); + + iSeries_get_cmdline(); + + /* Save unparsed command line copy for /proc/cmdline */ + strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); + + /* Parse early parameters, in particular mem=x */ + parse_early_param(); + + if (memory_limit) { + if (memory_limit < systemcfg->physicalMemorySize) + systemcfg->physicalMemorySize = memory_limit; + else { + printk("Ignoring mem=%lu >= ram_top.\n", memory_limit); + memory_limit = 0; + } + } + + /* Bolt kernel mappings for all of memory (or just a bit if we've got a limit) */ + iSeries_bolt_kernel(0, systemcfg->physicalMemorySize); + + lmb_init(); + lmb_add(0, systemcfg->physicalMemorySize); + lmb_analyze(); + lmb_reserve(0, __pa(klimit)); + + /* Initialize machine-dependency vectors */ +#ifdef CONFIG_SMP + smp_init_iSeries(); +#endif + if (itLpNaca.xPirEnvironMode == 0) + piranha_simulator = 1; + + /* Associate Lp Event Queue 0 with processor 0 */ + HvCallEvent_setLpEventQueueInterruptProc(0, 0); + + mf_init(); + mf_initialized = 1; + mb(); + + /* If we were passed an initrd, set the ROOT_DEV properly if the values + * look sensible. If not, clear initrd reference. + */ +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && + initrd_end > initrd_start) + ROOT_DEV = Root_RAM0; + else + initrd_start = initrd_end = 0; +#endif /* CONFIG_BLK_DEV_INITRD */ + + DBG(" <- iSeries_init_early()\n"); +} + +/* + * The iSeries may have very large memories ( > 128 GB ) and a partition + * may get memory in "chunks" that may be anywhere in the 2**52 real + * address space. The chunks are 256K in size. To map this to the + * memory model Linux expects, the AS/400 specific code builds a + * translation table to translate what Linux thinks are "physical" + * addresses to the actual real addresses. This allows us to make + * it appear to Linux that we have contiguous memory starting at + * physical address zero while in fact this could be far from the truth. + * To avoid confusion, I'll let the words physical and/or real address + * apply to the Linux addresses while I'll use "absolute address" to + * refer to the actual hardware real address. + * + * build_iSeries_Memory_Map gets information from the Hypervisor and + * looks at the Main Store VPD to determine the absolute addresses + * of the memory that has been assigned to our partition and builds + * a table used to translate Linux's physical addresses to these + * absolute addresses. Absolute addresses are needed when + * communicating with the hypervisor (e.g. to build HPT entries) + */ + +static void __init build_iSeries_Memory_Map(void) +{ + u32 loadAreaFirstChunk, loadAreaLastChunk, loadAreaSize; + u32 nextPhysChunk; + u32 hptFirstChunk, hptLastChunk, hptSizeChunks, hptSizePages; + u32 num_ptegs; + u32 totalChunks,moreChunks; + u32 currChunk, thisChunk, absChunk; + u32 currDword; + u32 chunkBit; + u64 map; + struct MemoryBlock mb[32]; + unsigned long numMemoryBlocks, curBlock; + + /* Chunk size on iSeries is 256K bytes */ + totalChunks = (u32)HvLpConfig_getMsChunks(); + klimit = msChunks_alloc(klimit, totalChunks, 1UL << 18); + + /* + * Get absolute address of our load area + * and map it to physical address 0 + * This guarantees that the loadarea ends up at physical 0 + * otherwise, it might not be returned by PLIC as the first + * chunks + */ + + loadAreaFirstChunk = (u32)addr_to_chunk(itLpNaca.xLoadAreaAddr); + loadAreaSize = itLpNaca.xLoadAreaChunks; + + /* + * Only add the pages already mapped here. + * Otherwise we might add the hpt pages + * The rest of the pages of the load area + * aren't in the HPT yet and can still + * be assigned an arbitrary physical address + */ + if ((loadAreaSize * 64) > HvPagesToMap) + loadAreaSize = HvPagesToMap / 64; + + loadAreaLastChunk = loadAreaFirstChunk + loadAreaSize - 1; + + /* + * TODO Do we need to do something if the HPT is in the 64MB load area? + * This would be required if the itLpNaca.xLoadAreaChunks includes + * the HPT size + */ + + printk("Mapping load area - physical addr = 0000000000000000\n" + " absolute addr = %016lx\n", + chunk_to_addr(loadAreaFirstChunk)); + printk("Load area size %dK\n", loadAreaSize * 256); + + for (nextPhysChunk = 0; nextPhysChunk < loadAreaSize; ++nextPhysChunk) + msChunks.abs[nextPhysChunk] = + loadAreaFirstChunk + nextPhysChunk; + + /* + * Get absolute address of our HPT and remember it so + * we won't map it to any physical address + */ + hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress()); + hptSizePages = (u32)HvCallHpt_getHptPages(); + hptSizeChunks = hptSizePages >> (msChunks.chunk_shift - PAGE_SHIFT); + hptLastChunk = hptFirstChunk + hptSizeChunks - 1; + + printk("HPT absolute addr = %016lx, size = %dK\n", + chunk_to_addr(hptFirstChunk), hptSizeChunks * 256); + + /* Fill in the hashed page table hash mask */ + num_ptegs = hptSizePages * + (PAGE_SIZE / (sizeof(HPTE) * HPTES_PER_GROUP)); + htab_hash_mask = num_ptegs - 1; + + /* + * The actual hashed page table is in the hypervisor, + * we have no direct access + */ + htab_address = NULL; + + /* + * Determine if absolute memory has any + * holes so that we can interpret the + * access map we get back from the hypervisor + * correctly. + */ + numMemoryBlocks = iSeries_process_mainstore_vpd(mb, 32); + + /* + * Process the main store access map from the hypervisor + * to build up our physical -> absolute translation table + */ + curBlock = 0; + currChunk = 0; + currDword = 0; + moreChunks = totalChunks; + + while (moreChunks) { + map = HvCallSm_get64BitsOfAccessMap(itLpNaca.xLpIndex, + currDword); + thisChunk = currChunk; + while (map) { + chunkBit = map >> 63; + map <<= 1; + if (chunkBit) { + --moreChunks; + while (thisChunk >= mb[curBlock].logicalEnd) { + ++curBlock; + if (curBlock >= numMemoryBlocks) + panic("out of memory blocks"); + } + if (thisChunk < mb[curBlock].logicalStart) + panic("memory block error"); + + absChunk = mb[curBlock].absStart + + (thisChunk - mb[curBlock].logicalStart); + if (((absChunk < hptFirstChunk) || + (absChunk > hptLastChunk)) && + ((absChunk < loadAreaFirstChunk) || + (absChunk > loadAreaLastChunk))) { + msChunks.abs[nextPhysChunk] = absChunk; + ++nextPhysChunk; + } + } + ++thisChunk; + } + ++currDword; + currChunk += 64; + } + + /* + * main store size (in chunks) is + * totalChunks - hptSizeChunks + * which should be equal to + * nextPhysChunk + */ + systemcfg->physicalMemorySize = chunk_to_addr(nextPhysChunk); +} + +/* + * Set up the variables that describe the cache line sizes + * for this machine. + */ +static void __init setup_iSeries_cache_sizes(void) +{ + unsigned int i, n; + unsigned int procIx = get_paca()->lppaca.dyn_hv_phys_proc_index; + + systemcfg->icache_size = + ppc64_caches.isize = xIoHriProcessorVpd[procIx].xInstCacheSize * 1024; + systemcfg->icache_line_size = + ppc64_caches.iline_size = + xIoHriProcessorVpd[procIx].xInstCacheOperandSize; + systemcfg->dcache_size = + ppc64_caches.dsize = + xIoHriProcessorVpd[procIx].xDataL1CacheSizeKB * 1024; + systemcfg->dcache_line_size = + ppc64_caches.dline_size = + xIoHriProcessorVpd[procIx].xDataCacheOperandSize; + ppc64_caches.ilines_per_page = PAGE_SIZE / ppc64_caches.iline_size; + ppc64_caches.dlines_per_page = PAGE_SIZE / ppc64_caches.dline_size; + + i = ppc64_caches.iline_size; + n = 0; + while ((i = (i / 2))) + ++n; + ppc64_caches.log_iline_size = n; + + i = ppc64_caches.dline_size; + n = 0; + while ((i = (i / 2))) + ++n; + ppc64_caches.log_dline_size = n; + + printk("D-cache line size = %d\n", + (unsigned int)ppc64_caches.dline_size); + printk("I-cache line size = %d\n", + (unsigned int)ppc64_caches.iline_size); +} + +/* + * Create a pte. Used during initialization only. + */ +static void iSeries_make_pte(unsigned long va, unsigned long pa, + int mode) +{ + HPTE local_hpte, rhpte; + unsigned long hash, vpn; + long slot; + + vpn = va >> PAGE_SHIFT; + hash = hpt_hash(vpn, 0); + + local_hpte.dw1.dword1 = pa | mode; + local_hpte.dw0.dword0 = 0; + local_hpte.dw0.dw0.avpn = va >> 23; + local_hpte.dw0.dw0.bolted = 1; /* bolted */ + local_hpte.dw0.dw0.v = 1; + + slot = HvCallHpt_findValid(&rhpte, vpn); + if (slot < 0) { + /* Must find space in primary group */ + panic("hash_page: hpte already exists\n"); + } + HvCallHpt_addValidate(slot, 0, (HPTE *)&local_hpte ); +} + +/* + * Bolt the kernel addr space into the HPT + */ +static void __init iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr) +{ + unsigned long pa; + unsigned long mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX; + HPTE hpte; + + for (pa = saddr; pa < eaddr ;pa += PAGE_SIZE) { + unsigned long ea = (unsigned long)__va(pa); + unsigned long vsid = get_kernel_vsid(ea); + unsigned long va = (vsid << 28) | (pa & 0xfffffff); + unsigned long vpn = va >> PAGE_SHIFT; + unsigned long slot = HvCallHpt_findValid(&hpte, vpn); + + /* Make non-kernel text non-executable */ + if (!in_kernel_text(ea)) + mode_rw |= HW_NO_EXEC; + + if (hpte.dw0.dw0.v) { + /* HPTE exists, so just bolt it */ + HvCallHpt_setSwBits(slot, 0x10, 0); + /* And make sure the pp bits are correct */ + HvCallHpt_setPp(slot, PP_RWXX); + } else + /* No HPTE exists, so create a new bolted one */ + iSeries_make_pte(va, phys_to_abs(pa), mode_rw); + } +} + +extern unsigned long ppc_proc_freq; +extern unsigned long ppc_tb_freq; + +/* + * Document me. + */ +static void __init iSeries_setup_arch(void) +{ + void *eventStack; + unsigned procIx = get_paca()->lppaca.dyn_hv_phys_proc_index; + + /* Add an eye catcher and the systemcfg layout version number */ + strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); + systemcfg->version.major = SYSTEMCFG_MAJOR; + systemcfg->version.minor = SYSTEMCFG_MINOR; + + /* Setup the Lp Event Queue */ + + /* Allocate a page for the Event Stack + * The hypervisor wants the absolute real address, so + * we subtract out the KERNELBASE and add in the + * absolute real address of the kernel load area + */ + eventStack = alloc_bootmem_pages(LpEventStackSize); + memset(eventStack, 0, LpEventStackSize); + + /* Invoke the hypervisor to initialize the event stack */ + HvCallEvent_setLpEventStack(0, eventStack, LpEventStackSize); + + /* Initialize fields in our Lp Event Queue */ + xItLpQueue.xSlicEventStackPtr = (char *)eventStack; + xItLpQueue.xSlicCurEventPtr = (char *)eventStack; + xItLpQueue.xSlicLastValidEventPtr = (char *)eventStack + + (LpEventStackSize - LpEventMaxSize); + xItLpQueue.xIndex = 0; + + /* Compute processor frequency */ + procFreqHz = ((1UL << 34) * 1000000) / + xIoHriProcessorVpd[procIx].xProcFreq; + procFreqMhz = procFreqHz / 1000000; + procFreqMhzHundreths = (procFreqHz / 10000) - (procFreqMhz * 100); + ppc_proc_freq = procFreqHz; + + /* Compute time base frequency */ + tbFreqHz = ((1UL << 32) * 1000000) / + xIoHriProcessorVpd[procIx].xTimeBaseFreq; + tbFreqMhz = tbFreqHz / 1000000; + tbFreqMhzHundreths = (tbFreqHz / 10000) - (tbFreqMhz * 100); + ppc_tb_freq = tbFreqHz; + + printk("Max logical processors = %d\n", + itVpdAreas.xSlicMaxLogicalProcs); + printk("Max physical processors = %d\n", + itVpdAreas.xSlicMaxPhysicalProcs); + printk("Processor frequency = %lu.%02lu\n", procFreqMhz, + procFreqMhzHundreths); + printk("Time base frequency = %lu.%02lu\n", tbFreqMhz, + tbFreqMhzHundreths); + systemcfg->processor = xIoHriProcessorVpd[procIx].xPVR; + printk("Processor version = %x\n", systemcfg->processor); +} + +static void iSeries_get_cpuinfo(struct seq_file *m) +{ + seq_printf(m, "machine\t\t: 64-bit iSeries Logical Partition\n"); +} + +/* + * Document me. + * and Implement me. + */ +static int iSeries_get_irq(struct pt_regs *regs) +{ + /* -2 means ignore this interrupt */ + return -2; +} + +/* + * Document me. + */ +static void iSeries_restart(char *cmd) +{ + mf_reboot(); +} + +/* + * Document me. + */ +static void iSeries_power_off(void) +{ + mf_power_off(); +} + +/* + * Document me. + */ +static void iSeries_halt(void) +{ + mf_power_off(); +} + +extern void setup_default_decr(void); + +/* + * void __init iSeries_calibrate_decr() + * + * Description: + * This routine retrieves the internal processor frequency from the VPD, + * and sets up the kernel timer decrementer based on that value. + * + */ +static void __init iSeries_calibrate_decr(void) +{ + unsigned long cyclesPerUsec; + struct div_result divres; + + /* Compute decrementer (and TB) frequency in cycles/sec */ + cyclesPerUsec = ppc_tb_freq / 1000000; + + /* + * Set the amount to refresh the decrementer by. This + * is the number of decrementer ticks it takes for + * 1/HZ seconds. + */ + tb_ticks_per_jiffy = ppc_tb_freq / HZ; + +#if 0 + /* TEST CODE FOR ADJTIME */ + tb_ticks_per_jiffy += tb_ticks_per_jiffy / 5000; + /* END OF TEST CODE */ +#endif + + /* + * tb_ticks_per_sec = freq; would give better accuracy + * but tb_ticks_per_sec = tb_ticks_per_jiffy*HZ; assures + * that jiffies (and xtime) will match the time returned + * by do_gettimeofday. + */ + tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; + tb_ticks_per_usec = cyclesPerUsec; + tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); + div128_by_32(1024 * 1024, 0, tb_ticks_per_sec, &divres); + tb_to_xs = divres.result_low; + setup_default_decr(); +} + +static void __init iSeries_progress(char * st, unsigned short code) +{ + printk("Progress: [%04x] - %s\n", (unsigned)code, st); + if (!piranha_simulator && mf_initialized) { + if (code != 0xffff) + mf_display_progress(code); + else + mf_clear_src(); + } +} + +static void __init iSeries_fixup_klimit(void) +{ + /* + * Change klimit to take into account any ram disk + * that may be included + */ + if (naca.xRamDisk) + klimit = KERNELBASE + (u64)naca.xRamDisk + + (naca.xRamDiskSize * PAGE_SIZE); + else { + /* + * No ram disk was included - check and see if there + * was an embedded system map. Change klimit to take + * into account any embedded system map + */ + if (embedded_sysmap_end) + klimit = KERNELBASE + ((embedded_sysmap_end + 4095) & + 0xfffffffffffff000); + } +} + +static int __init iSeries_src_init(void) +{ + /* clear the progress line */ + ppc_md.progress(" ", 0xffff); + return 0; +} + +late_initcall(iSeries_src_init); + +void __init iSeries_early_setup(void) +{ + iSeries_fixup_klimit(); + + ppc_md.setup_arch = iSeries_setup_arch; + ppc_md.get_cpuinfo = iSeries_get_cpuinfo; + ppc_md.init_IRQ = iSeries_init_IRQ; + ppc_md.get_irq = iSeries_get_irq; + ppc_md.init_early = iSeries_init_early, + + ppc_md.pcibios_fixup = iSeries_pci_final_fixup; + + ppc_md.restart = iSeries_restart; + ppc_md.power_off = iSeries_power_off; + ppc_md.halt = iSeries_halt; + + ppc_md.get_boot_time = iSeries_get_boot_time; + ppc_md.set_rtc_time = iSeries_set_rtc_time; + ppc_md.get_rtc_time = iSeries_get_rtc_time; + ppc_md.calibrate_decr = iSeries_calibrate_decr; + ppc_md.progress = iSeries_progress; +} + diff --git a/arch/ppc64/kernel/iSeries_setup.h b/arch/ppc64/kernel/iSeries_setup.h new file mode 100644 index 00000000000..c6eb29a245a --- /dev/null +++ b/arch/ppc64/kernel/iSeries_setup.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2000 Mike Corrigan + * Copyright (c) 1999-2000 Grant Erickson + * + * Module name: as400_setup.h + * + * Description: + * Architecture- / platform-specific boot-time initialization code for + * the IBM AS/400 LPAR. Adapted from original code by Grant Erickson and + * code by Gary Thomas, Cort Dougan , and Dan Malek + * . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ISERIES_SETUP_H__ +#define __ISERIES_SETUP_H__ + +extern void iSeries_get_boot_time(struct rtc_time *tm); +extern int iSeries_set_rtc_time(struct rtc_time *tm); +extern void iSeries_get_rtc_time(struct rtc_time *tm); + +#endif /* __ISERIES_SETUP_H__ */ diff --git a/arch/ppc64/kernel/iSeries_smp.c b/arch/ppc64/kernel/iSeries_smp.c new file mode 100644 index 00000000000..ba1f084d546 --- /dev/null +++ b/arch/ppc64/kernel/iSeries_smp.c @@ -0,0 +1,151 @@ +/* + * SMP support for iSeries machines. + * + * Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * + * Plus various changes from other IBM teams... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned long iSeries_smp_message[NR_CPUS]; + +void iSeries_smp_message_recv( struct pt_regs * regs ) +{ + int cpu = smp_processor_id(); + int msg; + + if ( num_online_cpus() < 2 ) + return; + + for ( msg = 0; msg < 4; ++msg ) + if ( test_and_clear_bit( msg, &iSeries_smp_message[cpu] ) ) + smp_message_recv( msg, regs ); +} + +static inline void smp_iSeries_do_message(int cpu, int msg) +{ + set_bit(msg, &iSeries_smp_message[cpu]); + HvCall_sendIPI(&(paca[cpu])); +} + +static void smp_iSeries_message_pass(int target, int msg) +{ + int i; + + if (target < NR_CPUS) + smp_iSeries_do_message(target, msg); + else { + for_each_online_cpu(i) { + if (target == MSG_ALL_BUT_SELF + && i == smp_processor_id()) + continue; + smp_iSeries_do_message(i, msg); + } + } +} + +static int smp_iSeries_numProcs(void) +{ + unsigned np, i; + + np = 0; + for (i=0; i < NR_CPUS; ++i) { + if (paca[i].lppaca.dyn_proc_status < 2) { + cpu_set(i, cpu_possible_map); + cpu_set(i, cpu_present_map); + cpu_set(i, cpu_sibling_map[i]); + ++np; + } + } + return np; +} + +static int smp_iSeries_probe(void) +{ + unsigned i; + unsigned np = 0; + + for (i=0; i < NR_CPUS; ++i) { + if (paca[i].lppaca.dyn_proc_status < 2) { + /*paca[i].active = 1;*/ + ++np; + } + } + + return np; +} + +static void smp_iSeries_kick_cpu(int nr) +{ + BUG_ON(nr < 0 || nr >= NR_CPUS); + + /* Verify that our partition has a processor nr */ + if (paca[nr].lppaca.dyn_proc_status >= 2) + return; + + /* The processor is currently spinning, waiting + * for the cpu_start field to become non-zero + * After we set cpu_start, the processor will + * continue on to secondary_start in iSeries_head.S + */ + paca[nr].cpu_start = 1; +} + +static void __devinit smp_iSeries_setup_cpu(int nr) +{ +} + +static struct smp_ops_t iSeries_smp_ops = { + .message_pass = smp_iSeries_message_pass, + .probe = smp_iSeries_probe, + .kick_cpu = smp_iSeries_kick_cpu, + .setup_cpu = smp_iSeries_setup_cpu, +}; + +/* This is called very early. */ +void __init smp_init_iSeries(void) +{ + smp_ops = &iSeries_smp_ops; + systemcfg->processorCount = smp_iSeries_numProcs(); +} + diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c new file mode 100644 index 00000000000..6abc621d3ba --- /dev/null +++ b/arch/ppc64/kernel/idle.c @@ -0,0 +1,380 @@ +/* + * Idle daemon for PowerPC. Idle daemon will handle any action + * that needs to be taken when the system becomes idle. + * + * Originally Written by Cort Dougan (cort@cs.nmt.edu) + * + * iSeries supported added by Mike Corrigan + * + * Additional shared processor, SMT, and firmware support + * Copyright (c) 2003 Dave Engebretsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void power4_idle(void); + +static int (*idle_loop)(void); + +#ifdef CONFIG_PPC_ISERIES +static unsigned long maxYieldTime = 0; +static unsigned long minYieldTime = 0xffffffffffffffffUL; + +static void yield_shared_processor(void) +{ + unsigned long tb; + unsigned long yieldTime; + + HvCall_setEnabledInterrupts(HvCall_MaskIPI | + HvCall_MaskLpEvent | + HvCall_MaskLpProd | + HvCall_MaskTimeout); + + tb = get_tb(); + /* Compute future tb value when yield should expire */ + HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); + + yieldTime = get_tb() - tb; + if (yieldTime > maxYieldTime) + maxYieldTime = yieldTime; + + if (yieldTime < minYieldTime) + minYieldTime = yieldTime; + + /* + * The decrementer stops during the yield. Force a fake decrementer + * here and let the timer_interrupt code sort out the actual time. + */ + get_paca()->lppaca.int_dword.fields.decr_int = 1; + process_iSeries_events(); +} + +static int iSeries_idle(void) +{ + struct paca_struct *lpaca; + long oldval; + unsigned long CTRL; + + /* ensure iSeries run light will be out when idle */ + clear_thread_flag(TIF_RUN_LIGHT); + CTRL = mfspr(CTRLF); + CTRL &= ~RUNLATCH; + mtspr(CTRLT, CTRL); + + lpaca = get_paca(); + + while (1) { + if (lpaca->lppaca.shared_proc) { + if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr)) + process_iSeries_events(); + if (!need_resched()) + yield_shared_processor(); + } else { + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + + while (!need_resched()) { + HMT_medium(); + if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr)) + process_iSeries_events(); + HMT_low(); + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + } + + schedule(); + } + + return 0; +} + +#else + +static int default_idle(void) +{ + long oldval; + unsigned int cpu = smp_processor_id(); + + while (1) { + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + + while (!need_resched() && !cpu_is_offline(cpu)) { + barrier(); + /* + * Go into low thread priority and possibly + * low power mode. + */ + HMT_low(); + HMT_very_low(); + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + + schedule(); + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } + + return 0; +} + +#ifdef CONFIG_PPC_PSERIES + +DECLARE_PER_CPU(unsigned long, smt_snooze_delay); + +int dedicated_idle(void) +{ + long oldval; + struct paca_struct *lpaca = get_paca(), *ppaca; + unsigned long start_snooze; + unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); + unsigned int cpu = smp_processor_id(); + + ppaca = &paca[cpu ^ 1]; + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + start_snooze = __get_tb() + + *smt_snooze_delay * tb_ticks_per_usec; + while (!need_resched() && !cpu_is_offline(cpu)) { + /* + * Go into low thread priority and possibly + * low power mode. + */ + HMT_low(); + HMT_very_low(); + + if (*smt_snooze_delay == 0 || + __get_tb() < start_snooze) + continue; + + HMT_medium(); + + if (!(ppaca->lppaca.idle)) { + local_irq_disable(); + + /* + * We are about to sleep the thread + * and so wont be polling any + * more. + */ + clear_thread_flag(TIF_POLLING_NRFLAG); + + /* + * SMT dynamic mode. Cede will result + * in this thread going dormant, if the + * partner thread is still doing work. + * Thread wakes up if partner goes idle, + * an interrupt is presented, or a prod + * occurs. Returning from the cede + * enables external interrupts. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + } else { + /* + * Give the HV an opportunity at the + * processor, since we are not doing + * any work. + */ + poll_pending(); + } + } + + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + + HMT_medium(); + lpaca->lppaca.idle = 0; + schedule(); + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } + return 0; +} + +static int shared_idle(void) +{ + struct paca_struct *lpaca = get_paca(); + unsigned int cpu = smp_processor_id(); + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + while (!need_resched() && !cpu_is_offline(cpu)) { + local_irq_disable(); + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + * + * Check need_resched() again with interrupts disabled + * to avoid a race. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + } + + HMT_medium(); + lpaca->lppaca.idle = 0; + schedule(); + if (cpu_is_offline(smp_processor_id()) && + system_state == SYSTEM_RUNNING) + cpu_die(); + } + + return 0; +} + +#endif /* CONFIG_PPC_PSERIES */ + +static int native_idle(void) +{ + while(1) { + /* check CPU type here */ + if (!need_resched()) + power4_idle(); + if (need_resched()) + schedule(); + + if (cpu_is_offline(_smp_processor_id()) && + system_state == SYSTEM_RUNNING) + cpu_die(); + } + return 0; +} + +#endif /* CONFIG_PPC_ISERIES */ + +void cpu_idle(void) +{ + idle_loop(); +} + +int powersave_nap; + +#ifdef CONFIG_SYSCTL +/* + * Register the sysctl to set/clear powersave_nap. + */ +static ctl_table powersave_nap_ctl_table[]={ + { + .ctl_name = KERN_PPC_POWERSAVE_NAP, + .procname = "powersave-nap", + .data = &powersave_nap, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { 0, }, +}; +static ctl_table powersave_nap_sysctl_root[] = { + { 1, "kernel", NULL, 0, 0755, powersave_nap_ctl_table, }, + { 0,}, +}; + +static int __init +register_powersave_nap_sysctl(void) +{ + register_sysctl_table(powersave_nap_sysctl_root, 0); + + return 0; +} +__initcall(register_powersave_nap_sysctl); +#endif + +int idle_setup(void) +{ + /* + * Move that junk to each platform specific file, eventually define + * a pSeries_idle for shared processor stuff + */ +#ifdef CONFIG_PPC_ISERIES + idle_loop = iSeries_idle; + return 1; +#else + idle_loop = default_idle; +#endif +#ifdef CONFIG_PPC_PSERIES + if (systemcfg->platform & PLATFORM_PSERIES) { + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (get_paca()->lppaca.shared_proc) { + printk(KERN_INFO "Using shared processor idle loop\n"); + idle_loop = shared_idle; + } else { + printk(KERN_INFO "Using dedicated idle loop\n"); + idle_loop = dedicated_idle; + } + } else { + printk(KERN_INFO "Using default idle loop\n"); + idle_loop = default_idle; + } + } +#endif /* CONFIG_PPC_PSERIES */ +#ifndef CONFIG_PPC_ISERIES + if (systemcfg->platform == PLATFORM_POWERMAC || + systemcfg->platform == PLATFORM_MAPLE) { + printk(KERN_INFO "Using native/NAP idle loop\n"); + idle_loop = native_idle; + } +#endif /* CONFIG_PPC_ISERIES */ + + return 1; +} diff --git a/arch/ppc64/kernel/idle_power4.S b/arch/ppc64/kernel/idle_power4.S new file mode 100644 index 00000000000..97e4a265504 --- /dev/null +++ b/arch/ppc64/kernel/idle_power4.S @@ -0,0 +1,79 @@ +/* + * This file contains the power_save function for 6xx & 7xxx CPUs + * rewritten in assembler + * + * Warning ! This code assumes that if your machine has a 750fx + * it will have PLL 1 set to low speed mode (used during NAP/DOZE). + * if this is not the case some additional changes will have to + * be done to check a runtime var (a bit like powersave-nap) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + + .text + +/* + * Here is the power_save_6xx function. This could eventually be + * split into several functions & changing the function pointer + * depending on the various features. + */ +_GLOBAL(power4_idle) +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) + /* We must dynamically check for the NAP feature as it + * can be cleared by CPU init after the fixups are done + */ + LOADBASE(r3,cur_cpu_spec) + ld r4,cur_cpu_spec@l(r3) + ld r4,CPU_SPEC_FEATURES(r4) + andi. r0,r4,CPU_FTR_CAN_NAP + beqlr + /* Now check if user or arch enabled NAP mode */ + LOADBASE(r3,powersave_nap) + lwz r4,powersave_nap@l(r3) + cmpwi 0,r4,0 + beqlr + + /* Clear MSR:EE */ + mfmsr r7 + li r4,0 + ori r4,r4,MSR_EE + andc r0,r7,r4 + mtmsrd r0 + + /* Check current_thread_info()->flags */ + clrrdi r4,r1,THREAD_SHIFT + ld r4,TI_FLAGS(r4) + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f + mtmsrd r7 /* out of line this ? */ + blr +1: + /* Go to NAP now */ +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + oris r7,r7,MSR_POW@h + sync + isync + mtmsrd r7 + isync + sync + blr + diff --git a/arch/ppc64/kernel/init_task.c b/arch/ppc64/kernel/init_task.c new file mode 100644 index 00000000000..941043ae040 --- /dev/null +++ b/arch/ppc64/kernel/init_task.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +struct mm_struct init_mm = INIT_MM(init_mm); + +EXPORT_SYMBOL(init_mm); + +/* + * Initial thread structure. + * + * We need to make sure that this is 16384-byte aligned due to the + * way process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ +union thread_union init_thread_union + __attribute__((__section__(".data.init_task"))) = + { INIT_THREAD_INFO(init_task) }; + +/* + * Initial task structure. + * + * All other task structs will be allocated on slabs in fork.c + */ +struct task_struct init_task = INIT_TASK(init_task); + +EXPORT_SYMBOL(init_task); diff --git a/arch/ppc64/kernel/ioctl32.c b/arch/ppc64/kernel/ioctl32.c new file mode 100644 index 00000000000..a8005db23ec --- /dev/null +++ b/arch/ppc64/kernel/ioctl32.c @@ -0,0 +1,51 @@ +/* + * ioctl32.c: Conversion between 32bit and 64bit native ioctls. + * + * Based on sparc64 ioctl32.c by: + * + * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) + * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) + * + * ppc64 changes: + * + * Copyright (C) 2000 Ken Aaker (kdaaker@rchland.vnet.ibm.com) + * Copyright (C) 2001 Anton Blanchard (antonb@au.ibm.com) + * + * These routines maintain argument size conversion between 32bit and 64bit + * ioctls. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define INCLUDES +#include "compat_ioctl.c" +#include + +#define CODE +#include "compat_ioctl.c" + +#define HANDLE_IOCTL(cmd,handler) { cmd, (ioctl_trans_handler_t)handler, NULL }, +#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl) + +#define IOCTL_TABLE_START \ + struct ioctl_trans ioctl_start[] = { +#define IOCTL_TABLE_END \ + }; + +IOCTL_TABLE_START +#include +#define DECLARES +#include "compat_ioctl.c" +COMPATIBLE_IOCTL(TIOCSTART) +COMPATIBLE_IOCTL(TIOCSTOP) +COMPATIBLE_IOCTL(TIOCSLTC) +/* Little p (/dev/rtc, /dev/envctrl, etc.) */ +COMPATIBLE_IOCTL(_IOR('p', 20, int[7])) /* RTCGET */ +COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */ + +IOCTL_TABLE_END + +int ioctl_table_size = ARRAY_SIZE(ioctl_start); diff --git a/arch/ppc64/kernel/iomap.c b/arch/ppc64/kernel/iomap.c new file mode 100644 index 00000000000..153cc8b0f13 --- /dev/null +++ b/arch/ppc64/kernel/iomap.c @@ -0,0 +1,126 @@ +/* + * arch/ppc64/kernel/iomap.c + * + * ppc64 "iomap" interface implementation. + * + * (C) Copyright 2004 Linus Torvalds + */ +#include +#include +#include +#include + +/* + * Here comes the ppc64 implementation of the IOMAP + * interfaces. + */ +unsigned int fastcall ioread8(void __iomem *addr) +{ + return readb(addr); +} +unsigned int fastcall ioread16(void __iomem *addr) +{ + return readw(addr); +} +unsigned int fastcall ioread32(void __iomem *addr) +{ + return readl(addr); +} +EXPORT_SYMBOL(ioread8); +EXPORT_SYMBOL(ioread16); +EXPORT_SYMBOL(ioread32); + +void fastcall iowrite8(u8 val, void __iomem *addr) +{ + writeb(val, addr); +} +void fastcall iowrite16(u16 val, void __iomem *addr) +{ + writew(val, addr); +} +void fastcall iowrite32(u32 val, void __iomem *addr) +{ + writel(val, addr); +} +EXPORT_SYMBOL(iowrite8); +EXPORT_SYMBOL(iowrite16); +EXPORT_SYMBOL(iowrite32); + +/* + * These are the "repeat read/write" functions. Note the + * non-CPU byte order. We do things in "IO byteorder" + * here. + * + * FIXME! We could make these do EEH handling if we really + * wanted. Not clear if we do. + */ +void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +{ + _insb((u8 __force *) addr, dst, count); +} +void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +{ + _insw_ns((u16 __force *) addr, dst, count); +} +void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +{ + _insl_ns((u32 __force *) addr, dst, count); +} +EXPORT_SYMBOL(ioread8_rep); +EXPORT_SYMBOL(ioread16_rep); +EXPORT_SYMBOL(ioread32_rep); + +void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) +{ + _outsb((u8 __force *) addr, src, count); +} +void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) +{ + _outsw_ns((u16 __force *) addr, src, count); +} +void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) +{ + _outsl_ns((u32 __force *) addr, src, count); +} +EXPORT_SYMBOL(iowrite8_rep); +EXPORT_SYMBOL(iowrite16_rep); +EXPORT_SYMBOL(iowrite32_rep); + +void __iomem *ioport_map(unsigned long port, unsigned int len) +{ + if (!_IO_IS_VALID(port)) + return NULL; + return (void __iomem *) (port+pci_io_base); +} + +void ioport_unmap(void __iomem *addr) +{ + /* Nothing to do */ +} +EXPORT_SYMBOL(ioport_map); +EXPORT_SYMBOL(ioport_unmap); + +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) +{ + unsigned long start = pci_resource_start(dev, bar); + unsigned long len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (!len) + return NULL; + if (max && len > max) + len = max; + if (flags & IORESOURCE_IO) + return ioport_map(start, len); + if (flags & IORESOURCE_MEM) + return ioremap(start, len); + /* What? */ + return NULL; +} + +void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ + /* Nothing to do */ +} +EXPORT_SYMBOL(pci_iomap); +EXPORT_SYMBOL(pci_iounmap); diff --git a/arch/ppc64/kernel/iommu.c b/arch/ppc64/kernel/iommu.c new file mode 100644 index 00000000000..344164681d2 --- /dev/null +++ b/arch/ppc64/kernel/iommu.c @@ -0,0 +1,567 @@ +/* + * arch/ppc64/kernel/iommu.c + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * + * Rewrite, cleanup, new allocation schemes, virtual merging: + * Copyright (C) 2004 Olof Johansson, IBM Corporation + * and Ben. Herrenschmidt, IBM Corporation + * + * Dynamic DMA mapping support, bus-independent parts. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DBG(...) + +#ifdef CONFIG_IOMMU_VMERGE +static int novmerge = 0; +#else +static int novmerge = 1; +#endif + +static int __init setup_iommu(char *str) +{ + if (!strcmp(str, "novmerge")) + novmerge = 1; + else if (!strcmp(str, "vmerge")) + novmerge = 0; + return 1; +} + +__setup("iommu=", setup_iommu); + +static unsigned long iommu_range_alloc(struct iommu_table *tbl, + unsigned long npages, + unsigned long *handle, + unsigned int align_order) +{ + unsigned long n, end, i, start; + unsigned long limit; + int largealloc = npages > 15; + int pass = 0; + unsigned long align_mask; + + align_mask = 0xffffffffffffffffl >> (64 - align_order); + + /* This allocator was derived from x86_64's bit string search */ + + /* Sanity check */ + if (unlikely(npages) == 0) { + if (printk_ratelimit()) + WARN_ON(1); + return DMA_ERROR_CODE; + } + + if (handle && *handle) + start = *handle; + else + start = largealloc ? tbl->it_largehint : tbl->it_hint; + + /* Use only half of the table for small allocs (15 pages or less) */ + limit = largealloc ? tbl->it_size : tbl->it_halfpoint; + + if (largealloc && start < tbl->it_halfpoint) + start = tbl->it_halfpoint; + + /* The case below can happen if we have a small segment appended + * to a large, or when the previous alloc was at the very end of + * the available space. If so, go back to the initial start. + */ + if (start >= limit) + start = largealloc ? tbl->it_largehint : tbl->it_hint; + + again: + + n = find_next_zero_bit(tbl->it_map, limit, start); + + /* Align allocation */ + n = (n + align_mask) & ~align_mask; + + end = n + npages; + + if (unlikely(end >= limit)) { + if (likely(pass < 2)) { + /* First failure, just rescan the half of the table. + * Second failure, rescan the other half of the table. + */ + start = (largealloc ^ pass) ? tbl->it_halfpoint : 0; + limit = pass ? tbl->it_size : limit; + pass++; + goto again; + } else { + /* Third failure, give up */ + return DMA_ERROR_CODE; + } + } + + for (i = n; i < end; i++) + if (test_bit(i, tbl->it_map)) { + start = i+1; + goto again; + } + + for (i = n; i < end; i++) + __set_bit(i, tbl->it_map); + + /* Bump the hint to a new block for small allocs. */ + if (largealloc) { + /* Don't bump to new block to avoid fragmentation */ + tbl->it_largehint = end; + } else { + /* Overflow will be taken care of at the next allocation */ + tbl->it_hint = (end + tbl->it_blocksize - 1) & + ~(tbl->it_blocksize - 1); + } + + /* Update handle for SG allocations */ + if (handle) + *handle = end; + + return n; +} + +static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page, + unsigned int npages, enum dma_data_direction direction, + unsigned int align_order) +{ + unsigned long entry, flags; + dma_addr_t ret = DMA_ERROR_CODE; + + spin_lock_irqsave(&(tbl->it_lock), flags); + + entry = iommu_range_alloc(tbl, npages, NULL, align_order); + + if (unlikely(entry == DMA_ERROR_CODE)) { + spin_unlock_irqrestore(&(tbl->it_lock), flags); + return DMA_ERROR_CODE; + } + + entry += tbl->it_offset; /* Offset into real TCE table */ + ret = entry << PAGE_SHIFT; /* Set the return dma address */ + + /* Put the TCEs in the HW table */ + ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK, + direction); + + + /* Flush/invalidate TLB caches if necessary */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); + + /* Make sure updates are seen by hardware */ + mb(); + + return ret; +} + +static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, + unsigned int npages) +{ + unsigned long entry, free_entry; + unsigned long i; + + entry = dma_addr >> PAGE_SHIFT; + free_entry = entry - tbl->it_offset; + + if (((free_entry + npages) > tbl->it_size) || + (entry < tbl->it_offset)) { + if (printk_ratelimit()) { + printk(KERN_INFO "iommu_free: invalid entry\n"); + printk(KERN_INFO "\tentry = 0x%lx\n", entry); + printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr); + printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl); + printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno); + printk(KERN_INFO "\tsize = 0x%lx\n", (u64)tbl->it_size); + printk(KERN_INFO "\tstartOff = 0x%lx\n", (u64)tbl->it_offset); + printk(KERN_INFO "\tindex = 0x%lx\n", (u64)tbl->it_index); + WARN_ON(1); + } + return; + } + + ppc_md.tce_free(tbl, entry, npages); + + for (i = 0; i < npages; i++) + __clear_bit(free_entry+i, tbl->it_map); +} + +static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, + unsigned int npages) +{ + unsigned long flags; + + spin_lock_irqsave(&(tbl->it_lock), flags); + + __iommu_free(tbl, dma_addr, npages); + + /* Make sure TLB cache is flushed if the HW needs it. We do + * not do an mb() here on purpose, it is not needed on any of + * the current platforms. + */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); +} + +int iommu_map_sg(struct device *dev, struct iommu_table *tbl, + struct scatterlist *sglist, int nelems, + enum dma_data_direction direction) +{ + dma_addr_t dma_next = 0, dma_addr; + unsigned long flags; + struct scatterlist *s, *outs, *segstart; + int outcount; + unsigned long handle; + + BUG_ON(direction == DMA_NONE); + + if ((nelems == 0) || !tbl) + return 0; + + outs = s = segstart = &sglist[0]; + outcount = 1; + handle = 0; + + /* Init first segment length for backout at failure */ + outs->dma_length = 0; + + DBG("mapping %d elements:\n", nelems); + + spin_lock_irqsave(&(tbl->it_lock), flags); + + for (s = outs; nelems; nelems--, s++) { + unsigned long vaddr, npages, entry, slen; + + slen = s->length; + /* Sanity check */ + if (slen == 0) { + dma_next = 0; + continue; + } + /* Allocate iommu entries for that segment */ + vaddr = (unsigned long)page_address(s->page) + s->offset; + npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK); + npages >>= PAGE_SHIFT; + entry = iommu_range_alloc(tbl, npages, &handle, 0); + + DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); + + /* Handle failure */ + if (unlikely(entry == DMA_ERROR_CODE)) { + if (printk_ratelimit()) + printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx" + " npages %lx\n", tbl, vaddr, npages); + goto failure; + } + + /* Convert entry to a dma_addr_t */ + entry += tbl->it_offset; + dma_addr = entry << PAGE_SHIFT; + dma_addr |= s->offset; + + DBG(" - %lx pages, entry: %lx, dma_addr: %lx\n", + npages, entry, dma_addr); + + /* Insert into HW table */ + ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction); + + /* If we are in an open segment, try merging */ + if (segstart != s) { + DBG(" - trying merge...\n"); + /* We cannot merge if: + * - allocated dma_addr isn't contiguous to previous allocation + */ + if (novmerge || (dma_addr != dma_next)) { + /* Can't merge: create a new segment */ + segstart = s; + outcount++; outs++; + DBG(" can't merge, new segment.\n"); + } else { + outs->dma_length += s->length; + DBG(" merged, new len: %lx\n", outs->dma_length); + } + } + + if (segstart == s) { + /* This is a new segment, fill entries */ + DBG(" - filling new segment.\n"); + outs->dma_address = dma_addr; + outs->dma_length = slen; + } + + /* Calculate next page pointer for contiguous check */ + dma_next = dma_addr + slen; + + DBG(" - dma next is: %lx\n", dma_next); + } + + /* Flush/invalidate TLB caches if necessary */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); + + /* Make sure updates are seen by hardware */ + mb(); + + DBG("mapped %d elements:\n", outcount); + + /* For the sake of iommu_free_sg, we clear out the length in the + * next entry of the sglist if we didn't fill the list completely + */ + if (outcount < nelems) { + outs++; + outs->dma_address = DMA_ERROR_CODE; + outs->dma_length = 0; + } + return outcount; + + failure: + for (s = &sglist[0]; s <= outs; s++) { + if (s->dma_length != 0) { + unsigned long vaddr, npages; + + vaddr = s->dma_address & PAGE_MASK; + npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr) + >> PAGE_SHIFT; + __iommu_free(tbl, vaddr, npages); + } + } + spin_unlock_irqrestore(&(tbl->it_lock), flags); + return 0; +} + + +void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + unsigned long flags; + + BUG_ON(direction == DMA_NONE); + + if (!tbl) + return; + + spin_lock_irqsave(&(tbl->it_lock), flags); + + while (nelems--) { + unsigned int npages; + dma_addr_t dma_handle = sglist->dma_address; + + if (sglist->dma_length == 0) + break; + npages = (PAGE_ALIGN(dma_handle + sglist->dma_length) + - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT; + __iommu_free(tbl, dma_handle, npages); + sglist++; + } + + /* Flush/invalidate TLBs if necessary. As for iommu_free(), we + * do not do an mb() here, the affected platforms do not need it + * when freeing. + */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); +} + +/* + * Build a iommu_table structure. This contains a bit map which + * is used to manage allocation of the tce space. + */ +struct iommu_table *iommu_init_table(struct iommu_table *tbl) +{ + unsigned long sz; + static int welcomed = 0; + + /* Set aside 1/4 of the table for large allocations. */ + tbl->it_halfpoint = tbl->it_size * 3 / 4; + + /* number of bytes needed for the bitmap */ + sz = (tbl->it_size + 7) >> 3; + + tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz)); + if (!tbl->it_map) + panic("iommu_init_table: Can't allocate %ld bytes\n", sz); + + memset(tbl->it_map, 0, sz); + + tbl->it_hint = 0; + tbl->it_largehint = tbl->it_halfpoint; + spin_lock_init(&tbl->it_lock); + + if (!welcomed) { + printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", + novmerge ? "disabled" : "enabled"); + welcomed = 1; + } + + return tbl; +} + +void iommu_free_table(struct device_node *dn) +{ + struct iommu_table *tbl = dn->iommu_table; + unsigned long bitmap_sz, i; + unsigned int order; + + if (!tbl || !tbl->it_map) { + printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__, + dn->full_name); + return; + } + + /* verify that table contains no entries */ + /* it_size is in entries, and we're examining 64 at a time */ + for (i = 0; i < (tbl->it_size/64); i++) { + if (tbl->it_map[i] != 0) { + printk(KERN_WARNING "%s: Unexpected TCEs for %s\n", + __FUNCTION__, dn->full_name); + break; + } + } + + /* calculate bitmap size in bytes */ + bitmap_sz = (tbl->it_size + 7) / 8; + + /* free bitmap */ + order = get_order(bitmap_sz); + free_pages((unsigned long) tbl->it_map, order); + + /* free table */ + kfree(tbl); +} + +/* Creates TCEs for a user provided buffer. The user buffer must be + * contiguous real kernel storage (not vmalloc). The address of the buffer + * passed here is the kernel (virtual) address of the buffer. The buffer + * need not be page aligned, the dma_addr_t returned will point to the same + * byte within the page as vaddr. + */ +dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr, + size_t size, enum dma_data_direction direction) +{ + dma_addr_t dma_handle = DMA_ERROR_CODE; + unsigned long uaddr; + unsigned int npages; + + BUG_ON(direction == DMA_NONE); + + uaddr = (unsigned long)vaddr; + npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK); + npages >>= PAGE_SHIFT; + + if (tbl) { + dma_handle = iommu_alloc(tbl, vaddr, npages, direction, 0); + if (dma_handle == DMA_ERROR_CODE) { + if (printk_ratelimit()) { + printk(KERN_INFO "iommu_alloc failed, " + "tbl %p vaddr %p npages %d\n", + tbl, vaddr, npages); + } + } else + dma_handle |= (uaddr & ~PAGE_MASK); + } + + return dma_handle; +} + +void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + + if (tbl) + iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) - + (dma_handle & PAGE_MASK)) >> PAGE_SHIFT); +} + +/* Allocates a contiguous real buffer and creates mappings over it. + * Returns the virtual address of the buffer and sets dma_handle + * to the dma address (mapping) of the first page. + */ +void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size, + dma_addr_t *dma_handle, unsigned int __nocast flag) +{ + void *ret = NULL; + dma_addr_t mapping; + unsigned int npages, order; + + size = PAGE_ALIGN(size); + npages = size >> PAGE_SHIFT; + order = get_order(size); + + /* + * Client asked for way too much space. This is checked later + * anyway. It is easier to debug here for the drivers than in + * the tce tables. + */ + if (order >= IOMAP_MAX_ORDER) { + printk("iommu_alloc_consistent size too large: 0x%lx\n", size); + return NULL; + } + + if (!tbl) + return NULL; + + /* Alloc enough pages (and possibly more) */ + ret = (void *)__get_free_pages(flag, order); + if (!ret) + return NULL; + memset(ret, 0, size); + + /* Set up tces to cover the allocated range */ + mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL, order); + if (mapping == DMA_ERROR_CODE) { + free_pages((unsigned long)ret, order); + ret = NULL; + } else + *dma_handle = mapping; + return ret; +} + +void iommu_free_coherent(struct iommu_table *tbl, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + unsigned int npages; + + if (tbl) { + size = PAGE_ALIGN(size); + npages = size >> PAGE_SHIFT; + iommu_free(tbl, dma_handle, npages); + free_pages((unsigned long)vaddr, get_order(size)); + } +} diff --git a/arch/ppc64/kernel/irq.c b/arch/ppc64/kernel/irq.c new file mode 100644 index 00000000000..4fd7f203c1e --- /dev/null +++ b/arch/ppc64/kernel/irq.c @@ -0,0 +1,519 @@ +/* + * arch/ppc/kernel/irq.c + * + * Derived from arch/i386/kernel/irq.c + * Copyright (C) 1992 Linus Torvalds + * Adapted from arch/i386 by Gary Thomas + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Updated and modified by Cort Dougan (cort@cs.nmt.edu) + * Copyright (C) 1996 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_SMP +extern void iSeries_smp_message_recv( struct pt_regs * ); +#endif + +extern irq_desc_t irq_desc[NR_IRQS]; +EXPORT_SYMBOL(irq_desc); + +int distribute_irqs = 1; +int __irq_offset_value; +int ppc_spurious_interrupts; +unsigned long lpevent_count; +u64 ppc64_interrupt_controller; + +int show_interrupts(struct seq_file *p, void *v) +{ + int i = *(loff_t *) v, j; + struct irqaction * action; + irq_desc_t *desc; + unsigned long flags; + + if (i == 0) { + seq_printf(p, " "); + for (j=0; jlock, flags); + action = desc->action; + if (!action || !action->handler) + goto skip; + seq_printf(p, "%3d: ", i); +#ifdef CONFIG_SMP + for (j = 0; j < NR_CPUS; j++) { + if (cpu_online(j)) + seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + } +#else + seq_printf(p, "%10u ", kstat_irqs(i)); +#endif /* CONFIG_SMP */ + if (desc->handler) + seq_printf(p, " %s ", desc->handler->typename ); + else + seq_printf(p, " None "); + seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge "); + seq_printf(p, " %s",action->name); + for (action=action->next; action; action = action->next) + seq_printf(p, ", %s", action->name); + seq_putc(p, '\n'); +skip: + spin_unlock_irqrestore(&desc->lock, flags); + } else if (i == NR_IRQS) + seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts); + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +void fixup_irqs(cpumask_t map) +{ + unsigned int irq; + static int warned; + + for_each_irq(irq) { + cpumask_t mask; + + if (irq_desc[irq].status & IRQ_PER_CPU) + continue; + + cpus_and(mask, irq_affinity[irq], map); + if (any_online_cpu(mask) == NR_CPUS) { + printk("Breaking affinity for irq %i\n", irq); + mask = map; + } + if (irq_desc[irq].handler->set_affinity) + irq_desc[irq].handler->set_affinity(irq, mask); + else if (irq_desc[irq].action && !(warned++)) + printk("Cannot set affinity for irq %i\n", irq); + } + + local_irq_enable(); + mdelay(1); + local_irq_disable(); +} +#endif + +extern int noirqdebug; + +/* + * Eventually, this should take an array of interrupts and an array size + * so it can dispatch multiple interrupts. + */ +void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq) +{ + int status; + struct irqaction *action; + int cpu = smp_processor_id(); + irq_desc_t *desc = get_irq_desc(irq); + irqreturn_t action_ret; +#ifdef CONFIG_IRQSTACKS + struct thread_info *curtp, *irqtp; +#endif + + kstat_cpu(cpu).irqs[irq]++; + + if (desc->status & IRQ_PER_CPU) { + /* no locking required for CPU-local interrupts: */ + ack_irq(irq); + action_ret = handle_IRQ_event(irq, regs, desc->action); + desc->handler->end(irq); + return; + } + + spin_lock(&desc->lock); + ack_irq(irq); + /* + REPLAY is when Linux resends an IRQ that was dropped earlier + WAITING is used by probe to mark irqs that are being tested + */ + status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); + status |= IRQ_PENDING; /* we _want_ to handle it */ + + /* + * If the IRQ is disabled for whatever reason, we cannot + * use the action we have. + */ + action = NULL; + if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) { + action = desc->action; + if (!action || !action->handler) { + ppc_spurious_interrupts++; + printk(KERN_DEBUG "Unhandled interrupt %x, disabled\n", irq); + /* We can't call disable_irq here, it would deadlock */ + if (!desc->depth) + desc->depth = 1; + desc->status |= IRQ_DISABLED; + /* This is not a real spurrious interrupt, we + * have to eoi it, so we jump to out + */ + mask_irq(irq); + goto out; + } + status &= ~IRQ_PENDING; /* we commit to handling */ + status |= IRQ_INPROGRESS; /* we are handling it */ + } + desc->status = status; + + /* + * If there is no IRQ handler or it was disabled, exit early. + Since we set PENDING, if another processor is handling + a different instance of this same irq, the other processor + will take care of it. + */ + if (unlikely(!action)) + goto out; + + /* + * Edge triggered interrupts need to remember + * pending events. + * This applies to any hw interrupts that allow a second + * instance of the same irq to arrive while we are in do_IRQ + * or in the handler. But the code here only handles the _second_ + * instance of the irq, not the third or fourth. So it is mostly + * useful for irq hardware that does not mask cleanly in an + * SMP environment. + */ + for (;;) { + spin_unlock(&desc->lock); + +#ifdef CONFIG_IRQSTACKS + /* Switch to the irq stack to handle this */ + curtp = current_thread_info(); + irqtp = hardirq_ctx[smp_processor_id()]; + if (curtp != irqtp) { + irqtp->task = curtp->task; + irqtp->flags = 0; + action_ret = call_handle_IRQ_event(irq, regs, action, irqtp); + irqtp->task = NULL; + if (irqtp->flags) + set_bits(irqtp->flags, &curtp->flags); + } else +#endif + action_ret = handle_IRQ_event(irq, regs, action); + + spin_lock(&desc->lock); + if (!noirqdebug) + note_interrupt(irq, desc, action_ret); + if (likely(!(desc->status & IRQ_PENDING))) + break; + desc->status &= ~IRQ_PENDING; + } +out: + desc->status &= ~IRQ_INPROGRESS; + /* + * The ->end() handler has to deal with interrupts which got + * disabled while the handler was running. + */ + if (desc->handler) { + if (desc->handler->end) + desc->handler->end(irq); + else if (desc->handler->enable) + desc->handler->enable(irq); + } + spin_unlock(&desc->lock); +} + +#ifdef CONFIG_PPC_ISERIES +void do_IRQ(struct pt_regs *regs) +{ + struct paca_struct *lpaca; + struct ItLpQueue *lpq; + + irq_enter(); + +#ifdef CONFIG_DEBUG_STACKOVERFLOW + /* Debugging check for stack overflow: is there less than 2KB free? */ + { + long sp; + + sp = __get_SP() & (THREAD_SIZE-1); + + if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { + printk("do_IRQ: stack overflow: %ld\n", + sp - sizeof(struct thread_info)); + dump_stack(); + } + } +#endif + + lpaca = get_paca(); +#ifdef CONFIG_SMP + if (lpaca->lppaca.int_dword.fields.ipi_cnt) { + lpaca->lppaca.int_dword.fields.ipi_cnt = 0; + iSeries_smp_message_recv(regs); + } +#endif /* CONFIG_SMP */ + lpq = lpaca->lpqueue_ptr; + if (lpq && ItLpQueue_isLpIntPending(lpq)) + lpevent_count += ItLpQueue_process(lpq, regs); + + irq_exit(); + + if (lpaca->lppaca.int_dword.fields.decr_int) { + lpaca->lppaca.int_dword.fields.decr_int = 0; + /* Signal a fake decrementer interrupt */ + timer_interrupt(regs); + } +} + +#else /* CONFIG_PPC_ISERIES */ + +void do_IRQ(struct pt_regs *regs) +{ + int irq; + + irq_enter(); + +#ifdef CONFIG_DEBUG_STACKOVERFLOW + /* Debugging check for stack overflow: is there less than 2KB free? */ + { + long sp; + + sp = __get_SP() & (THREAD_SIZE-1); + + if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { + printk("do_IRQ: stack overflow: %ld\n", + sp - sizeof(struct thread_info)); + dump_stack(); + } + } +#endif + + irq = ppc_md.get_irq(regs); + + if (irq >= 0) + ppc_irq_dispatch_handler(regs, irq); + else + /* That's not SMP safe ... but who cares ? */ + ppc_spurious_interrupts++; + + irq_exit(); +} +#endif /* CONFIG_PPC_ISERIES */ + +void __init init_IRQ(void) +{ + static int once = 0; + + if (once) + return; + + once++; + + ppc_md.init_IRQ(); + irq_ctx_init(); +} + +#ifndef CONFIG_PPC_ISERIES +/* + * Virtual IRQ mapping code, used on systems with XICS interrupt controllers. + */ + +#define UNDEFINED_IRQ 0xffffffff +unsigned int virt_irq_to_real_map[NR_IRQS]; + +/* + * Don't use virtual irqs 0, 1, 2 for devices. + * The pcnet32 driver considers interrupt numbers < 2 to be invalid, + * and 2 is the XICS IPI interrupt. + * We limit virtual irqs to 17 less than NR_IRQS so that when we + * offset them by 16 (to reserve the first 16 for ISA interrupts) + * we don't end up with an interrupt number >= NR_IRQS. + */ +#define MIN_VIRT_IRQ 3 +#define MAX_VIRT_IRQ (NR_IRQS - NUM_ISA_INTERRUPTS - 1) +#define NR_VIRT_IRQS (MAX_VIRT_IRQ - MIN_VIRT_IRQ + 1) + +void +virt_irq_init(void) +{ + int i; + for (i = 0; i < NR_IRQS; i++) + virt_irq_to_real_map[i] = UNDEFINED_IRQ; +} + +/* Create a mapping for a real_irq if it doesn't already exist. + * Return the virtual irq as a convenience. + */ +int virt_irq_create_mapping(unsigned int real_irq) +{ + unsigned int virq, first_virq; + static int warned; + + if (ppc64_interrupt_controller == IC_OPEN_PIC) + return real_irq; /* no mapping for openpic (for now) */ + + /* don't map interrupts < MIN_VIRT_IRQ */ + if (real_irq < MIN_VIRT_IRQ) { + virt_irq_to_real_map[real_irq] = real_irq; + return real_irq; + } + + /* map to a number between MIN_VIRT_IRQ and MAX_VIRT_IRQ */ + virq = real_irq; + if (virq > MAX_VIRT_IRQ) + virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ; + + /* search for this number or a free slot */ + first_virq = virq; + while (virt_irq_to_real_map[virq] != UNDEFINED_IRQ) { + if (virt_irq_to_real_map[virq] == real_irq) + return virq; + if (++virq > MAX_VIRT_IRQ) + virq = MIN_VIRT_IRQ; + if (virq == first_virq) + goto nospace; /* oops, no free slots */ + } + + virt_irq_to_real_map[virq] = real_irq; + return virq; + + nospace: + if (!warned) { + printk(KERN_CRIT "Interrupt table is full\n"); + printk(KERN_CRIT "Increase NR_IRQS (currently %d) " + "in your kernel sources and rebuild.\n", NR_IRQS); + warned = 1; + } + return NO_IRQ; +} + +/* + * In most cases will get a hit on the very first slot checked in the + * virt_irq_to_real_map. Only when there are a large number of + * IRQs will this be expensive. + */ +unsigned int real_irq_to_virt_slowpath(unsigned int real_irq) +{ + unsigned int virq; + unsigned int first_virq; + + virq = real_irq; + + if (virq > MAX_VIRT_IRQ) + virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ; + + first_virq = virq; + + do { + if (virt_irq_to_real_map[virq] == real_irq) + return virq; + + virq++; + + if (virq >= MAX_VIRT_IRQ) + virq = 0; + + } while (first_virq != virq); + + return NO_IRQ; + +} + +#endif /* CONFIG_PPC_ISERIES */ + +#ifdef CONFIG_IRQSTACKS +struct thread_info *softirq_ctx[NR_CPUS]; +struct thread_info *hardirq_ctx[NR_CPUS]; + +void irq_ctx_init(void) +{ + struct thread_info *tp; + int i; + + for_each_cpu(i) { + memset((void *)softirq_ctx[i], 0, THREAD_SIZE); + tp = softirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = SOFTIRQ_OFFSET; + + memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); + tp = hardirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = HARDIRQ_OFFSET; + } +} + +void do_softirq(void) +{ + unsigned long flags; + struct thread_info *curtp, *irqtp; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + if (local_softirq_pending()) { + curtp = current_thread_info(); + irqtp = softirq_ctx[smp_processor_id()]; + irqtp->task = curtp->task; + call_do_softirq(irqtp); + irqtp->task = NULL; + } + + local_irq_restore(flags); +} +EXPORT_SYMBOL(do_softirq); + +#endif /* CONFIG_IRQSTACKS */ + +static int __init setup_noirqdistrib(char *str) +{ + distribute_irqs = 0; + return 1; +} + +__setup("noirqdistrib", setup_noirqdistrib); diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c new file mode 100644 index 00000000000..103daaf7357 --- /dev/null +++ b/arch/ppc64/kernel/kprobes.c @@ -0,0 +1,290 @@ +/* + * Kernel Probes (KProbes) + * arch/ppc64/kernel/kprobes.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * + * 2002-Oct Created by Vamsi Krishna S Kernel + * Probes initial implementation ( includes contributions from + * Rusty Russell). + * 2004-July Suparna Bhattacharya added jumper probes + * interface to access function arguments. + * 2004-Nov Ananth N Mavinakayanahalli kprobes port + * for PPC64 + */ + +#include +#include +#include +#include +#include +#include +#include + +/* kprobe_status settings */ +#define KPROBE_HIT_ACTIVE 0x00000001 +#define KPROBE_HIT_SS 0x00000002 + +static struct kprobe *current_kprobe; +static unsigned long kprobe_status, kprobe_saved_msr; +static struct pt_regs jprobe_saved_regs; + +int arch_prepare_kprobe(struct kprobe *p) +{ + kprobe_opcode_t insn = *p->addr; + + if (IS_MTMSRD(insn) || IS_RFID(insn)) + /* cannot put bp on RFID/MTMSRD */ + return 1; + return 0; +} + +void arch_copy_kprobe(struct kprobe *p) +{ + memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); +} + +void arch_remove_kprobe(struct kprobe *p) +{ +} + +static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +{ + *p->addr = p->opcode; + regs->nip = (unsigned long)p->addr; +} + +static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +{ + regs->msr |= MSR_SE; + /*single step inline if it a breakpoint instruction*/ + if (p->opcode == BREAKPOINT_INSTRUCTION) + regs->nip = (unsigned long)p->addr; + else + regs->nip = (unsigned long)&p->ainsn.insn; +} + +static inline int kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p; + int ret = 0; + unsigned int *addr = (unsigned int *)regs->nip; + + /* Check we're not actually recursing */ + if (kprobe_running()) { + /* We *are* holding lock here, so this is safe. + Disarm the probe we just hit, and ignore it. */ + p = get_kprobe(addr); + if (p) { + if (kprobe_status == KPROBE_HIT_SS) { + regs->msr &= ~MSR_SE; + regs->msr |= kprobe_saved_msr; + unlock_kprobes(); + goto no_kprobe; + } + disarm_kprobe(p, regs); + ret = 1; + } else { + p = current_kprobe; + if (p->break_handler && p->break_handler(p, regs)) { + goto ss_probe; + } + } + /* If it's not ours, can't be delete race, (we hold lock). */ + goto no_kprobe; + } + + lock_kprobes(); + p = get_kprobe(addr); + if (!p) { + unlock_kprobes(); + if (*addr != BREAKPOINT_INSTRUCTION) { + /* + * PowerPC has multiple variants of the "trap" + * instruction. If the current instruction is a + * trap variant, it could belong to someone else + */ + kprobe_opcode_t cur_insn = *addr; + if (IS_TW(cur_insn) || IS_TD(cur_insn) || + IS_TWI(cur_insn) || IS_TDI(cur_insn)) + goto no_kprobe; + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + */ + ret = 1; + } + /* Not one of ours: let kernel handle it */ + goto no_kprobe; + } + + kprobe_status = KPROBE_HIT_ACTIVE; + current_kprobe = p; + kprobe_saved_msr = regs->msr; + if (p->pre_handler && p->pre_handler(p, regs)) + /* handler has already set things up, so skip ss setup */ + return 1; + +ss_probe: + prepare_singlestep(p, regs); + kprobe_status = KPROBE_HIT_SS; + /* + * This preempt_disable() matches the preempt_enable_no_resched() + * in post_kprobe_handler(). + */ + preempt_disable(); + return 1; + +no_kprobe: + return ret; +} + +/* + * Called after single-stepping. p->addr is the address of the + * instruction whose first byte has been replaced by the "breakpoint" + * instruction. To avoid the SMP problems that can occur when we + * temporarily put back the original opcode to single-step, we + * single-stepped a copy of the instruction. The address of this + * copy is p->ainsn.insn. + */ +static void resume_execution(struct kprobe *p, struct pt_regs *regs) +{ + int ret; + + regs->nip = (unsigned long)p->addr; + ret = emulate_step(regs, p->ainsn.insn[0]); + if (ret == 0) + regs->nip = (unsigned long)p->addr + 4; + + regs->msr &= ~MSR_SE; +} + +static inline int post_kprobe_handler(struct pt_regs *regs) +{ + if (!kprobe_running()) + return 0; + + if (current_kprobe->post_handler) + current_kprobe->post_handler(current_kprobe, regs, 0); + + resume_execution(current_kprobe, regs); + regs->msr |= kprobe_saved_msr; + + unlock_kprobes(); + preempt_enable_no_resched(); + + /* + * if somebody else is singlestepping across a probe point, msr + * will have SE set, in which case, continue the remaining processing + * of do_debug, as if this is not a probe hit. + */ + if (regs->msr & MSR_SE) + return 0; + + return 1; +} + +/* Interrupts disabled, kprobe_lock held. */ +static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + if (current_kprobe->fault_handler + && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) + return 1; + + if (kprobe_status & KPROBE_HIT_SS) { + resume_execution(current_kprobe, regs); + regs->msr |= kprobe_saved_msr; + + unlock_kprobes(); + preempt_enable_no_resched(); + } + return 0; +} + +/* + * Wrapper routine to for handling exceptions. + */ +int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + /* + * Interrupts are not disabled here. We need to disable + * preemption, because kprobe_running() uses smp_processor_id(). + */ + preempt_disable(); + switch (val) { + case DIE_IABR_MATCH: + case DIE_DABR_MATCH: + case DIE_BPT: + if (kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_SSTEP: + if (post_kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_GPF: + case DIE_PAGE_FAULT: + if (kprobe_running() && + kprobe_fault_handler(args->regs, args->trapnr)) + ret = NOTIFY_STOP; + break; + default: + break; + } + preempt_enable(); + return ret; +} + +int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + + memcpy(&jprobe_saved_regs, regs, sizeof(struct pt_regs)); + + /* setup return addr to the jprobe handler routine */ + regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry); + regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); + + return 1; +} + +void jprobe_return(void) +{ + asm volatile("trap" ::: "memory"); +} + +void jprobe_return_end(void) +{ +}; + +int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + /* + * FIXME - we should ideally be validating that we got here 'cos + * of the "trap" in jprobe_return() above, before restoring the + * saved regs... + */ + memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs)); + return 1; +} diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c new file mode 100644 index 00000000000..d6c6bd03d2a --- /dev/null +++ b/arch/ppc64/kernel/lmb.c @@ -0,0 +1,372 @@ +/* + * Procedures for interfacing to Open Firmware. + * + * Peter Bergner, IBM Corp. June 2001. + * Copyright (C) 2001 Peter Bergner. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct lmb lmb; + +#undef DEBUG + +void lmb_dump_all(void) +{ +#ifdef DEBUG + unsigned long i; + struct lmb *_lmb = &lmb; + + udbg_printf("lmb_dump_all:\n"); + udbg_printf(" memory.cnt = 0x%lx\n", + _lmb->memory.cnt); + udbg_printf(" memory.size = 0x%lx\n", + _lmb->memory.size); + for (i=0; i < _lmb->memory.cnt ;i++) { + udbg_printf(" memory.region[0x%x].base = 0x%lx\n", + i, _lmb->memory.region[i].base); + udbg_printf(" .physbase = 0x%lx\n", + _lmb->memory.region[i].physbase); + udbg_printf(" .size = 0x%lx\n", + _lmb->memory.region[i].size); + } + + udbg_printf("\n reserved.cnt = 0x%lx\n", + _lmb->reserved.cnt); + udbg_printf(" reserved.size = 0x%lx\n", + _lmb->reserved.size); + for (i=0; i < _lmb->reserved.cnt ;i++) { + udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", + i, _lmb->reserved.region[i].base); + udbg_printf(" .physbase = 0x%lx\n", + _lmb->reserved.region[i].physbase); + udbg_printf(" .size = 0x%lx\n", + _lmb->reserved.region[i].size); + } +#endif /* DEBUG */ +} + +static unsigned long __init +lmb_addrs_overlap(unsigned long base1, unsigned long size1, + unsigned long base2, unsigned long size2) +{ + return ((base1 < (base2+size2)) && (base2 < (base1+size1))); +} + +static long __init +lmb_addrs_adjacent(unsigned long base1, unsigned long size1, + unsigned long base2, unsigned long size2) +{ + if (base2 == base1 + size1) + return 1; + else if (base1 == base2 + size2) + return -1; + + return 0; +} + +static long __init +lmb_regions_adjacent(struct lmb_region *rgn, unsigned long r1, unsigned long r2) +{ + unsigned long base1 = rgn->region[r1].base; + unsigned long size1 = rgn->region[r1].size; + unsigned long base2 = rgn->region[r2].base; + unsigned long size2 = rgn->region[r2].size; + + return lmb_addrs_adjacent(base1, size1, base2, size2); +} + +/* Assumption: base addr of region 1 < base addr of region 2 */ +static void __init +lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) +{ + unsigned long i; + + rgn->region[r1].size += rgn->region[r2].size; + for (i=r2; i < rgn->cnt-1; i++) { + rgn->region[i].base = rgn->region[i+1].base; + rgn->region[i].physbase = rgn->region[i+1].physbase; + rgn->region[i].size = rgn->region[i+1].size; + } + rgn->cnt--; +} + +/* This routine called with relocation disabled. */ +void __init +lmb_init(void) +{ + struct lmb *_lmb = &lmb; + + /* Create a dummy zero size LMB which will get coalesced away later. + * This simplifies the lmb_add() code below... + */ + _lmb->memory.region[0].base = 0; + _lmb->memory.region[0].size = 0; + _lmb->memory.cnt = 1; + + /* Ditto. */ + _lmb->reserved.region[0].base = 0; + _lmb->reserved.region[0].size = 0; + _lmb->reserved.cnt = 1; +} + +/* This routine called with relocation disabled. */ +void __init +lmb_analyze(void) +{ + unsigned long i; + unsigned long mem_size = 0; + unsigned long size_mask = 0; + struct lmb *_lmb = &lmb; +#ifdef CONFIG_MSCHUNKS + unsigned long physbase = 0; +#endif + + for (i=0; i < _lmb->memory.cnt; i++) { + unsigned long lmb_size; + + lmb_size = _lmb->memory.region[i].size; + +#ifdef CONFIG_MSCHUNKS + _lmb->memory.region[i].physbase = physbase; + physbase += lmb_size; +#else + _lmb->memory.region[i].physbase = _lmb->memory.region[i].base; +#endif + mem_size += lmb_size; + size_mask |= lmb_size; + } + + _lmb->memory.size = mem_size; +} + +/* This routine called with relocation disabled. */ +static long __init +lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) +{ + unsigned long i, coalesced = 0; + long adjacent; + + /* First try and coalesce this LMB with another. */ + for (i=0; i < rgn->cnt; i++) { + unsigned long rgnbase = rgn->region[i].base; + unsigned long rgnsize = rgn->region[i].size; + + adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize); + if ( adjacent > 0 ) { + rgn->region[i].base -= size; + rgn->region[i].physbase -= size; + rgn->region[i].size += size; + coalesced++; + break; + } + else if ( adjacent < 0 ) { + rgn->region[i].size += size; + coalesced++; + break; + } + } + + if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) { + lmb_coalesce_regions(rgn, i, i+1); + coalesced++; + } + + if ( coalesced ) { + return coalesced; + } else if ( rgn->cnt >= MAX_LMB_REGIONS ) { + return -1; + } + + /* Couldn't coalesce the LMB, so add it to the sorted table. */ + for (i=rgn->cnt-1; i >= 0; i--) { + if (base < rgn->region[i].base) { + rgn->region[i+1].base = rgn->region[i].base; + rgn->region[i+1].physbase = rgn->region[i].physbase; + rgn->region[i+1].size = rgn->region[i].size; + } else { + rgn->region[i+1].base = base; + rgn->region[i+1].physbase = lmb_abs_to_phys(base); + rgn->region[i+1].size = size; + break; + } + } + rgn->cnt++; + + return 0; +} + +/* This routine called with relocation disabled. */ +long __init +lmb_add(unsigned long base, unsigned long size) +{ + struct lmb *_lmb = &lmb; + struct lmb_region *_rgn = &(_lmb->memory); + + /* On pSeries LPAR systems, the first LMB is our RMO region. */ + if ( base == 0 ) + _lmb->rmo_size = size; + + return lmb_add_region(_rgn, base, size); + +} + +long __init +lmb_reserve(unsigned long base, unsigned long size) +{ + struct lmb *_lmb = &lmb; + struct lmb_region *_rgn = &(_lmb->reserved); + + return lmb_add_region(_rgn, base, size); +} + +long __init +lmb_overlaps_region(struct lmb_region *rgn, unsigned long base, unsigned long size) +{ + unsigned long i; + + for (i=0; i < rgn->cnt; i++) { + unsigned long rgnbase = rgn->region[i].base; + unsigned long rgnsize = rgn->region[i].size; + if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) { + break; + } + } + + return (i < rgn->cnt) ? i : -1; +} + +unsigned long __init +lmb_alloc(unsigned long size, unsigned long align) +{ + return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); +} + +unsigned long __init +lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) +{ + long i, j; + unsigned long base = 0; + struct lmb *_lmb = &lmb; + struct lmb_region *_mem = &(_lmb->memory); + struct lmb_region *_rsv = &(_lmb->reserved); + + for (i=_mem->cnt-1; i >= 0; i--) { + unsigned long lmbbase = _mem->region[i].base; + unsigned long lmbsize = _mem->region[i].size; + + if ( max_addr == LMB_ALLOC_ANYWHERE ) + base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); + else if ( lmbbase < max_addr ) + base = _ALIGN_DOWN(min(lmbbase+lmbsize,max_addr)-size, align); + else + continue; + + while ( (lmbbase <= base) && + ((j = lmb_overlaps_region(_rsv,base,size)) >= 0) ) { + base = _ALIGN_DOWN(_rsv->region[j].base-size, align); + } + + if ( (base != 0) && (lmbbase <= base) ) + break; + } + + if ( i < 0 ) + return 0; + + lmb_add_region(_rsv, base, size); + + return base; +} + +unsigned long __init +lmb_phys_mem_size(void) +{ + struct lmb *_lmb = &lmb; +#ifdef CONFIG_MSCHUNKS + return _lmb->memory.size; +#else + struct lmb_region *_mem = &(_lmb->memory); + unsigned long total = 0; + int i; + + /* add all physical memory to the bootmem map */ + for (i=0; i < _mem->cnt; i++) + total += _mem->region[i].size; + return total; +#endif /* CONFIG_MSCHUNKS */ +} + +unsigned long __init +lmb_end_of_DRAM(void) +{ + struct lmb *_lmb = &lmb; + struct lmb_region *_mem = &(_lmb->memory); + int idx = _mem->cnt - 1; + +#ifdef CONFIG_MSCHUNKS + return (_mem->region[idx].physbase + _mem->region[idx].size); +#else + return (_mem->region[idx].base + _mem->region[idx].size); +#endif /* CONFIG_MSCHUNKS */ + + return 0; +} + +unsigned long __init +lmb_abs_to_phys(unsigned long aa) +{ + unsigned long i, pa = aa; + struct lmb *_lmb = &lmb; + struct lmb_region *_mem = &(_lmb->memory); + + for (i=0; i < _mem->cnt; i++) { + unsigned long lmbbase = _mem->region[i].base; + unsigned long lmbsize = _mem->region[i].size; + if ( lmb_addrs_overlap(aa,1,lmbbase,lmbsize) ) { + pa = _mem->region[i].physbase + (aa - lmbbase); + break; + } + } + + return pa; +} + +/* + * Truncate the lmb list to memory_limit if it's set + * You must call lmb_analyze() after this. + */ +void __init lmb_enforce_memory_limit(void) +{ + extern unsigned long memory_limit; + unsigned long i, limit; + struct lmb_region *mem = &(lmb.memory); + + if (! memory_limit) + return; + + limit = memory_limit; + for (i = 0; i < mem->cnt; i++) { + if (limit > mem->region[i].size) { + limit -= mem->region[i].size; + continue; + } + + mem->region[i].size = limit; + mem->cnt = i + 1; + break; + } +} diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c new file mode 100644 index 00000000000..a8fd32df848 --- /dev/null +++ b/arch/ppc64/kernel/lparcfg.c @@ -0,0 +1,611 @@ +/* + * PowerPC64 LPAR Configuration Information Driver + * + * Dave Engebretsen engebret@us.ibm.com + * Copyright (c) 2003 Dave Engebretsen + * Will Schmidt willschm@us.ibm.com + * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation. + * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation. + * Nathan Lynch nathanl@austin.ibm.com + * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This driver creates a proc file at /proc/ppc64/lparcfg which contains + * keyword - value pairs that specify the configuration of the partition. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MODULE_VERS "1.6" +#define MODULE_NAME "lparcfg" + +/* #define LPARCFG_DEBUG */ + +/* find a better place for this function... */ +void log_plpar_hcall_return(unsigned long rc, char *tag) +{ + if (rc == 0) /* success, return */ + return; +/* check for null tag ? */ + if (rc == H_Hardware) + printk(KERN_INFO + "plpar-hcall (%s) failed with hardware fault\n", tag); + else if (rc == H_Function) + printk(KERN_INFO + "plpar-hcall (%s) failed; function not allowed\n", tag); + else if (rc == H_Authority) + printk(KERN_INFO + "plpar-hcall (%s) failed; not authorized to this function\n", + tag); + else if (rc == H_Parameter) + printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n", + tag); + else + printk(KERN_INFO + "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n", + tag, rc); + +} + +static struct proc_dir_entry *proc_ppc64_lparcfg; +#define LPARCFG_BUFF_SIZE 4096 + +#ifdef CONFIG_PPC_ISERIES + +/* + * For iSeries legacy systems, the PPA purr function is available from the + * emulated_time_base field in the paca. + */ +static unsigned long get_purr(void) +{ + unsigned long sum_purr = 0; + int cpu; + struct paca_struct *lpaca; + + for_each_cpu(cpu) { + lpaca = paca + cpu; + sum_purr += lpaca->lppaca.emulated_time_base; + +#ifdef PURR_DEBUG + printk(KERN_INFO "get_purr for cpu (%d) has value (%ld) \n", + cpu, lpaca->lppaca.emulated_time_base); +#endif + } + return sum_purr; +} + +#define lparcfg_write NULL + +/* + * Methods used to fetch LPAR data when running on an iSeries platform. + */ +static int lparcfg_data(struct seq_file *m, void *v) +{ + unsigned long pool_id, lp_index; + int shared, entitled_capacity, max_entitled_capacity; + int processors, max_processors; + struct paca_struct *lpaca = get_paca(); + unsigned long purr = get_purr(); + + seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS); + + shared = (int)(lpaca->lppaca_ptr->shared_proc); + seq_printf(m, "serial_number=%c%c%c%c%c%c%c\n", + e2a(xItExtVpdPanel.mfgID[2]), + e2a(xItExtVpdPanel.mfgID[3]), + e2a(xItExtVpdPanel.systemSerial[1]), + e2a(xItExtVpdPanel.systemSerial[2]), + e2a(xItExtVpdPanel.systemSerial[3]), + e2a(xItExtVpdPanel.systemSerial[4]), + e2a(xItExtVpdPanel.systemSerial[5])); + + seq_printf(m, "system_type=%c%c%c%c\n", + e2a(xItExtVpdPanel.machineType[0]), + e2a(xItExtVpdPanel.machineType[1]), + e2a(xItExtVpdPanel.machineType[2]), + e2a(xItExtVpdPanel.machineType[3])); + + lp_index = HvLpConfig_getLpIndex(); + seq_printf(m, "partition_id=%d\n", (int)lp_index); + + seq_printf(m, "system_active_processors=%d\n", + (int)HvLpConfig_getSystemPhysicalProcessors()); + + seq_printf(m, "system_potential_processors=%d\n", + (int)HvLpConfig_getSystemPhysicalProcessors()); + + processors = (int)HvLpConfig_getPhysicalProcessors(); + seq_printf(m, "partition_active_processors=%d\n", processors); + + max_processors = (int)HvLpConfig_getMaxPhysicalProcessors(); + seq_printf(m, "partition_potential_processors=%d\n", max_processors); + + if (shared) { + entitled_capacity = HvLpConfig_getSharedProcUnits(); + max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits(); + } else { + entitled_capacity = processors * 100; + max_entitled_capacity = max_processors * 100; + } + seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity); + + seq_printf(m, "partition_max_entitled_capacity=%d\n", + max_entitled_capacity); + + if (shared) { + pool_id = HvLpConfig_getSharedPoolIndex(); + seq_printf(m, "pool=%d\n", (int)pool_id); + seq_printf(m, "pool_capacity=%d\n", + (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) * + 100)); + seq_printf(m, "purr=%ld\n", purr); + } + + seq_printf(m, "shared_processor_mode=%d\n", shared); + + return 0; +} +#endif /* CONFIG_PPC_ISERIES */ + +#ifdef CONFIG_PPC_PSERIES +/* + * Methods used to fetch LPAR data when running on a pSeries platform. + */ + +/* + * H_GET_PPP hcall returns info in 4 parms. + * entitled_capacity,unallocated_capacity, + * aggregation, resource_capability). + * + * R4 = Entitled Processor Capacity Percentage. + * R5 = Unallocated Processor Capacity Percentage. + * R6 (AABBCCDDEEFFGGHH). + * XXXX - reserved (0) + * XXXX - reserved (0) + * XXXX - Group Number + * XXXX - Pool Number. + * R7 (IIJJKKLLMMNNOOPP). + * XX - reserved. (0) + * XX - bit 0-6 reserved (0). bit 7 is Capped indicator. + * XX - variable processor Capacity Weight + * XX - Unallocated Variable Processor Capacity Weight. + * XXXX - Active processors in Physical Processor Pool. + * XXXX - Processors active on platform. + */ +static unsigned int h_get_ppp(unsigned long *entitled, + unsigned long *unallocated, + unsigned long *aggregation, + unsigned long *resource) +{ + unsigned long rc; + rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated, + aggregation, resource); + + log_plpar_hcall_return(rc, "H_GET_PPP"); + + return rc; +} + +static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) +{ + unsigned long rc; + unsigned long dummy; + rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy); + + log_plpar_hcall_return(rc, "H_PIC"); +} + +static unsigned long get_purr(void); + +/* Track sum of all purrs across all processors. This is used to further */ +/* calculate usage values by different applications */ + +static unsigned long get_purr(void) +{ + unsigned long sum_purr = 0; + int cpu; + struct cpu_usage *cu; + + for_each_cpu(cpu) { + cu = &per_cpu(cpu_usage_array, cpu); + sum_purr += cu->current_tb; + } + return sum_purr; +} + +#define SPLPAR_CHARACTERISTICS_TOKEN 20 +#define SPLPAR_MAXLENGTH 1026*(sizeof(char)) + +/* + * parse_system_parameter_string() + * Retrieve the potential_processors, max_entitled_capacity and friends + * through the get-system-parameter rtas call. Replace keyword strings as + * necessary. + */ +static void parse_system_parameter_string(struct seq_file *m) +{ + int call_status; + + char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); + if (!local_buffer) { + printk(KERN_ERR "%s %s kmalloc failure at line %d \n", + __FILE__, __FUNCTION__, __LINE__); + return; + } + + spin_lock(&rtas_data_buf_lock); + memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH); + call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, + NULL, + SPLPAR_CHARACTERISTICS_TOKEN, + __pa(rtas_data_buf)); + memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH); + spin_unlock(&rtas_data_buf_lock); + + if (call_status != 0) { + printk(KERN_INFO + "%s %s Error calling get-system-parameter (0x%x)\n", + __FILE__, __FUNCTION__, call_status); + } else { + int splpar_strlen; + int idx, w_idx; + char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); + if (!workbuffer) { + printk(KERN_ERR "%s %s kmalloc failure at line %d \n", + __FILE__, __FUNCTION__, __LINE__); + return; + } +#ifdef LPARCFG_DEBUG + printk(KERN_INFO "success calling get-system-parameter \n"); +#endif + splpar_strlen = local_buffer[0] * 16 + local_buffer[1]; + local_buffer += 2; /* step over strlen value */ + + memset(workbuffer, 0, SPLPAR_MAXLENGTH); + w_idx = 0; + idx = 0; + while ((*local_buffer) && (idx < splpar_strlen)) { + workbuffer[w_idx++] = local_buffer[idx++]; + if ((local_buffer[idx] == ',') + || (local_buffer[idx] == '\0')) { + workbuffer[w_idx] = '\0'; + if (w_idx) { + /* avoid the empty string */ + seq_printf(m, "%s\n", workbuffer); + } + memset(workbuffer, 0, SPLPAR_MAXLENGTH); + idx++; /* skip the comma */ + w_idx = 0; + } else if (local_buffer[idx] == '=') { + /* code here to replace workbuffer contents + with different keyword strings */ + if (0 == strcmp(workbuffer, "MaxEntCap")) { + strcpy(workbuffer, + "partition_max_entitled_capacity"); + w_idx = strlen(workbuffer); + } + if (0 == strcmp(workbuffer, "MaxPlatProcs")) { + strcpy(workbuffer, + "system_potential_processors"); + w_idx = strlen(workbuffer); + } + } + } + kfree(workbuffer); + local_buffer -= 2; /* back up over strlen value */ + } + kfree(local_buffer); +} + +static int lparcfg_count_active_processors(void); + +/* Return the number of processors in the system. + * This function reads through the device tree and counts + * the virtual processors, this does not include threads. + */ +static int lparcfg_count_active_processors(void) +{ + struct device_node *cpus_dn = NULL; + int count = 0; + + while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) { +#ifdef LPARCFG_DEBUG + printk(KERN_ERR "cpus_dn %p \n", cpus_dn); +#endif + count++; + } + return count; +} + +static int lparcfg_data(struct seq_file *m, void *v) +{ + int partition_potential_processors; + int partition_active_processors; + struct device_node *rootdn; + const char *model = ""; + const char *system_id = ""; + unsigned int *lp_index_ptr, lp_index = 0; + struct device_node *rtas_node; + int *lrdrp; + + rootdn = find_path_device("/"); + if (rootdn) { + model = get_property(rootdn, "model", NULL); + system_id = get_property(rootdn, "system-id", NULL); + lp_index_ptr = (unsigned int *) + get_property(rootdn, "ibm,partition-no", NULL); + if (lp_index_ptr) + lp_index = *lp_index_ptr; + } + + seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS); + + seq_printf(m, "serial_number=%s\n", system_id); + + seq_printf(m, "system_type=%s\n", model); + + seq_printf(m, "partition_id=%d\n", (int)lp_index); + + rtas_node = find_path_device("/rtas"); + lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", NULL); + + if (lrdrp == NULL) { + partition_potential_processors = systemcfg->processorCount; + } else { + partition_potential_processors = *(lrdrp + 4); + } + + partition_active_processors = lparcfg_count_active_processors(); + + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + unsigned long h_entitled, h_unallocated; + unsigned long h_aggregation, h_resource; + unsigned long pool_idle_time, pool_procs; + unsigned long purr; + + h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation, + &h_resource); + + seq_printf(m, "R4=0x%lx\n", h_entitled); + seq_printf(m, "R5=0x%lx\n", h_unallocated); + seq_printf(m, "R6=0x%lx\n", h_aggregation); + seq_printf(m, "R7=0x%lx\n", h_resource); + + purr = get_purr(); + + /* this call handles the ibm,get-system-parameter contents */ + parse_system_parameter_string(m); + + seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled); + + seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff); + + seq_printf(m, "system_active_processors=%ld\n", + (h_resource >> 0 * 8) & 0xffff); + + /* pool related entries are apropriate for shared configs */ + if (paca[0].lppaca.shared_proc) { + + h_pic(&pool_idle_time, &pool_procs); + + seq_printf(m, "pool=%ld\n", + (h_aggregation >> 0 * 8) & 0xffff); + + /* report pool_capacity in percentage */ + seq_printf(m, "pool_capacity=%ld\n", + ((h_resource >> 2 * 8) & 0xffff) * 100); + + seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); + + seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + } + + seq_printf(m, "unallocated_capacity_weight=%ld\n", + (h_resource >> 4 * 8) & 0xFF); + + seq_printf(m, "capacity_weight=%ld\n", + (h_resource >> 5 * 8) & 0xFF); + + seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01); + + seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated); + + seq_printf(m, "purr=%ld\n", purr); + + } else { /* non SPLPAR case */ + + seq_printf(m, "system_active_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "system_potential_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "partition_max_entitled_capacity=%d\n", + partition_potential_processors * 100); + + seq_printf(m, "partition_entitled_capacity=%d\n", + partition_active_processors * 100); + } + + seq_printf(m, "partition_active_processors=%d\n", + partition_active_processors); + + seq_printf(m, "partition_potential_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "shared_processor_mode=%d\n", paca[0].lppaca.shared_proc); + + return 0; +} + +/* + * Interface for changing system parameters (variable capacity weight + * and entitled capacity). Format of input is "param_name=value"; + * anything after value is ignored. Valid parameters at this time are + * "partition_entitled_capacity" and "capacity_weight". We use + * H_SET_PPP to alter parameters. + * + * This function should be invoked only on systems with + * FW_FEATURE_SPLPAR. + */ +static ssize_t lparcfg_write(struct file *file, const char __user * buf, + size_t count, loff_t * off) +{ + char *kbuf; + char *tmp; + u64 new_entitled, *new_entitled_ptr = &new_entitled; + u8 new_weight, *new_weight_ptr = &new_weight; + + unsigned long current_entitled; /* parameters for h_get_ppp */ + unsigned long dummy; + unsigned long resource; + u8 current_weight; + + ssize_t retval = -ENOMEM; + + kbuf = kmalloc(count, GFP_KERNEL); + if (!kbuf) + goto out; + + retval = -EFAULT; + if (copy_from_user(kbuf, buf, count)) + goto out; + + retval = -EINVAL; + kbuf[count - 1] = '\0'; + tmp = strchr(kbuf, '='); + if (!tmp) + goto out; + + *tmp++ = '\0'; + + if (!strcmp(kbuf, "partition_entitled_capacity")) { + char *endp; + *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + goto out; + new_weight_ptr = ¤t_weight; + } else if (!strcmp(kbuf, "capacity_weight")) { + char *endp; + *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + goto out; + new_entitled_ptr = ¤t_entitled; + } else + goto out; + + /* Get our current parameters */ + retval = h_get_ppp(¤t_entitled, &dummy, &dummy, &resource); + if (retval) { + retval = -EIO; + goto out; + } + + current_weight = (resource >> 5 * 8) & 0xFF; + + pr_debug("%s: current_entitled = %lu, current_weight = %lu\n", + __FUNCTION__, current_entitled, current_weight); + + pr_debug("%s: new_entitled = %lu, new_weight = %lu\n", + __FUNCTION__, *new_entitled_ptr, *new_weight_ptr); + + retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr, + *new_weight_ptr); + + if (retval == H_Success || retval == H_Constrained) { + retval = count; + } else if (retval == H_Busy) { + retval = -EBUSY; + } else if (retval == H_Hardware) { + retval = -EIO; + } else if (retval == H_Parameter) { + retval = -EINVAL; + } else { + printk(KERN_WARNING "%s: received unknown hv return code %ld", + __FUNCTION__, retval); + retval = -EIO; + } + + out: + kfree(kbuf); + return retval; +} + +#endif /* CONFIG_PPC_PSERIES */ + +static int lparcfg_open(struct inode *inode, struct file *file) +{ + return single_open(file, lparcfg_data, NULL); +} + +struct file_operations lparcfg_fops = { + .owner = THIS_MODULE, + .read = seq_read, + .open = lparcfg_open, + .release = single_release, +}; + +int __init lparcfg_init(void) +{ + struct proc_dir_entry *ent; + mode_t mode = S_IRUSR; + + /* Allow writing if we have FW_FEATURE_SPLPAR */ + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + lparcfg_fops.write = lparcfg_write; + mode |= S_IWUSR; + } + + ent = create_proc_entry("ppc64/lparcfg", mode, NULL); + if (ent) { + ent->proc_fops = &lparcfg_fops; + ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL); + if (!ent->data) { + printk(KERN_ERR + "Failed to allocate buffer for lparcfg\n"); + remove_proc_entry("lparcfg", ent->parent); + return -ENOMEM; + } + } else { + printk(KERN_ERR "Failed to create ppc64/lparcfg\n"); + return -EIO; + } + + proc_ppc64_lparcfg = ent; + return 0; +} + +void __exit lparcfg_cleanup(void) +{ + if (proc_ppc64_lparcfg) { + if (proc_ppc64_lparcfg->data) { + kfree(proc_ppc64_lparcfg->data); + } + remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent); + } +} + +module_init(lparcfg_init); +module_exit(lparcfg_cleanup); +MODULE_DESCRIPTION("Interface for LPAR configuration data"); +MODULE_AUTHOR("Dave Engebretsen"); +MODULE_LICENSE("GPL"); diff --git a/arch/ppc64/kernel/maple_pci.c b/arch/ppc64/kernel/maple_pci.c new file mode 100644 index 00000000000..53993999b26 --- /dev/null +++ b/arch/ppc64/kernel/maple_pci.c @@ -0,0 +1,521 @@ +/* + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define DEBUG + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "pci.h" + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +static struct pci_controller *u3_agp, *u3_ht; + +static int __init fixup_one_level_bus_range(struct device_node *node, int higher) +{ + for (; node != 0;node = node->sibling) { + int * bus_range; + unsigned int *class_code; + int len; + + /* For PCI<->PCI bridges or CardBus bridges, we go down */ + class_code = (unsigned int *) get_property(node, "class-code", NULL); + if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && + (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) + continue; + bus_range = (int *) get_property(node, "bus-range", &len); + if (bus_range != NULL && len > 2 * sizeof(int)) { + if (bus_range[1] > higher) + higher = bus_range[1]; + } + higher = fixup_one_level_bus_range(node->child, higher); + } + return higher; +} + +/* This routine fixes the "bus-range" property of all bridges in the + * system since they tend to have their "last" member wrong on macs + * + * Note that the bus numbers manipulated here are OF bus numbers, they + * are not Linux bus numbers. + */ +static void __init fixup_bus_range(struct device_node *bridge) +{ + int * bus_range; + int len; + + /* Lookup the "bus-range" property for the hose */ + bus_range = (int *) get_property(bridge, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s\n", + bridge->full_name); + return; + } + bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]); +} + + +#define U3_AGP_CFA0(devfn, off) \ + ((1 << (unsigned long)PCI_SLOT(dev_fn)) \ + | (((unsigned long)PCI_FUNC(dev_fn)) << 8) \ + | (((unsigned long)(off)) & 0xFCUL)) + +#define U3_AGP_CFA1(bus, devfn, off) \ + ((((unsigned long)(bus)) << 16) \ + |(((unsigned long)(devfn)) << 8) \ + |(((unsigned long)(off)) & 0xFCUL) \ + |1UL) + +static unsigned long u3_agp_cfg_access(struct pci_controller* hose, + u8 bus, u8 dev_fn, u8 offset) +{ + unsigned int caddr; + + if (bus == hose->first_busno) { + if (dev_fn < (11 << 3)) + return 0; + caddr = U3_AGP_CFA0(dev_fn, offset); + } else + caddr = U3_AGP_CFA1(bus, dev_fn, offset); + + /* Uninorth will return garbage if we don't read back the value ! */ + do { + out_le32(hose->cfg_addr, caddr); + } while (in_le32(hose->cfg_addr) != caddr); + + offset &= 0x07; + return ((unsigned long)hose->cfg_data) + offset; +} + +static int u3_agp_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_agp_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8((u8 *)addr); + break; + case 2: + *val = in_le16((u16 *)addr); + break; + default: + *val = in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int u3_agp_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_agp_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8((u8 *)addr, val); + (void) in_8((u8 *)addr); + break; + case 2: + out_le16((u16 *)addr, val); + (void) in_le16((u16 *)addr); + break; + default: + out_le32((u32 *)addr, val); + (void) in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops u3_agp_pci_ops = +{ + u3_agp_read_config, + u3_agp_write_config +}; + + +#define U3_HT_CFA0(devfn, off) \ + ((((unsigned long)devfn) << 8) | offset) +#define U3_HT_CFA1(bus, devfn, off) \ + (U3_HT_CFA0(devfn, off) \ + + (((unsigned long)bus) << 16) \ + + 0x01000000UL) + +static unsigned long u3_ht_cfg_access(struct pci_controller* hose, + u8 bus, u8 devfn, u8 offset) +{ + if (bus == hose->first_busno) { + if (PCI_SLOT(devfn) == 0) + return 0; + return ((unsigned long)hose->cfg_data) + U3_HT_CFA0(devfn, offset); + } else + return ((unsigned long)hose->cfg_data) + U3_HT_CFA1(bus, devfn, offset); +} + +static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8((u8 *)addr); + break; + case 2: + *val = in_le16((u16 *)addr); + break; + default: + *val = in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8((u8 *)addr, val); + (void) in_8((u8 *)addr); + break; + case 2: + out_le16((u16 *)addr, val); + (void) in_le16((u16 *)addr); + break; + default: + out_le32((u32 *)addr, val); + (void) in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops u3_ht_pci_ops = +{ + u3_ht_read_config, + u3_ht_write_config +}; + +static void __init setup_u3_agp(struct pci_controller* hose) +{ + /* On G5, we move AGP up to high bus number so we don't need + * to reassign bus numbers for HT. If we ever have P2P bridges + * on AGP, we'll have to move pci_assign_all_busses to the + * pci_controller structure so we enable it for AGP and not for + * HT childs. + * We hard code the address because of the different size of + * the reg address cell, we shall fix that by killing struct + * reg_property and using some accessor functions instead + */ + hose->first_busno = 0xf0; + hose->last_busno = 0xff; + hose->ops = &u3_agp_pci_ops; + hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); + hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); + + u3_agp = hose; +} + +static void __init setup_u3_ht(struct pci_controller* hose) +{ + hose->ops = &u3_ht_pci_ops; + + /* We hard code the address because of the different size of + * the reg address cell, we shall fix that by killing struct + * reg_property and using some accessor functions instead + */ + hose->cfg_data = (volatile unsigned char *)ioremap(0xf2000000, 0x02000000); + + hose->first_busno = 0; + hose->last_busno = 0xef; + + u3_ht = hose; +} + +static int __init add_bridge(struct device_node *dev) +{ + int len; + struct pci_controller *hose; + char* disp_name; + int *bus_range; + int primary = 1; + struct property *of_prop; + + DBG("Adding PCI host bridge %s\n", dev->full_name); + + bus_range = (int *) get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n", + dev->full_name); + } + + hose = alloc_bootmem(sizeof(struct pci_controller)); + if (hose == NULL) + return -ENOMEM; + pci_setup_pci_controller(hose); + + hose->arch_data = dev; + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + + of_prop = alloc_bootmem(sizeof(struct property) + + sizeof(hose->global_number)); + if (of_prop) { + memset(of_prop, 0, sizeof(struct property)); + of_prop->name = "linux,pci-domain"; + of_prop->length = sizeof(hose->global_number); + of_prop->value = (unsigned char *)&of_prop[1]; + memcpy(of_prop->value, &hose->global_number, sizeof(hose->global_number)); + prom_add_property(dev, of_prop); + } + + disp_name = NULL; + if (device_is_compatible(dev, "u3-agp")) { + setup_u3_agp(hose); + disp_name = "U3-AGP"; + primary = 0; + } else if (device_is_compatible(dev, "u3-ht")) { + setup_u3_ht(hose); + disp_name = "U3-HT"; + primary = 1; + } + printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n", + disp_name, hose->first_busno, hose->last_busno); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pci_process_bridge_OF_ranges(hose, dev); + pci_setup_phb_io(hose, primary); + + /* Fixup "bus-range" OF property */ + fixup_bus_range(dev); + + return 0; +} + + +void __init maple_pcibios_fixup(void) +{ + struct pci_dev *dev = NULL; + + DBG(" -> maple_pcibios_fixup\n"); + + for_each_pci_dev(dev) + pci_read_irq_line(dev); + + /* Do the mapping of the IO space */ + phbs_remap_io(); + + DBG(" <- maple_pcibios_fixup\n"); +} + +static void __init maple_fixup_phb_resources(void) +{ + struct pci_controller *hose, *tmp; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; + hose->io_resource.start += offset; + hose->io_resource.end += offset; + printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n", + hose->global_number, + hose->io_resource.start, hose->io_resource.end); + } +} + +void __init maple_pci_init(void) +{ + struct device_node *np, *root; + struct device_node *ht = NULL; + + /* Probe root PCI hosts, that is on U3 the AGP host and the + * HyperTransport host. That one is actually "kept" around + * and actually added last as it's resource management relies + * on the AGP resources to have been setup first + */ + root = of_find_node_by_path("/"); + if (root == NULL) { + printk(KERN_CRIT "maple_find_bridges: can't find root of device tree\n"); + return; + } + for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) { + if (np->name == NULL) + continue; + if (strcmp(np->name, "pci") == 0) { + if (add_bridge(np) == 0) + of_node_get(np); + } + if (strcmp(np->name, "ht") == 0) { + of_node_get(np); + ht = np; + } + } + of_node_put(root); + + /* Now setup the HyperTransport host if we found any + */ + if (ht && add_bridge(ht) != 0) + of_node_put(ht); + + /* Fixup the IO resources on our host bridges as the common code + * does it only for childs of the host bridges + */ + maple_fixup_phb_resources(); + + /* Setup the linkage between OF nodes and PHBs */ + pci_devs_phb_init(); + + /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We + * assume there is no P2P bridge on the AGP bus, which should be a + * safe assumptions hopefully. + */ + if (u3_agp) { + struct device_node *np = u3_agp->arch_data; + np->busno = 0xf0; + for (np = np->child; np; np = np->sibling) + np->busno = 0xf0; + } + + /* Tell pci.c to use the common resource allocation mecanism */ + pci_probe_only = 0; + + /* Allow all IO */ + io_page_mask = -1; +} + +int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) +{ + struct device_node *np; + int irq = channel ? 15 : 14; + + if (pdev->vendor != PCI_VENDOR_ID_AMD || + pdev->device != PCI_DEVICE_ID_AMD_8111_IDE) + return irq; + + np = pci_device_to_OF_node(pdev); + if (np == NULL) + return irq; + if (np->n_intrs < 2) + return irq; + return np->intrs[channel & 0x1].line; +} + +/* XXX: To remove once all firmwares are ok */ +static void fixup_maple_ide(struct pci_dev* dev) +{ +#if 0 /* Enable this to enable IDE port 0 */ + { + u8 v; + + pci_read_config_byte(dev, 0x40, &v); + v |= 2; + pci_write_config_byte(dev, 0x40, v); + } +#endif +#if 0 /* fix bus master base */ + pci_write_config_dword(dev, 0x20, 0xcc01); + printk("old ide resource: %lx -> %lx \n", + dev->resource[4].start, dev->resource[4].end); + dev->resource[4].start = 0xcc00; + dev->resource[4].end = 0xcc10; +#endif +#if 1 /* Enable this to fixup IDE sense/polarity of irqs in IO-APICs */ + { + struct pci_dev *apicdev; + u32 v; + + apicdev = pci_get_slot (dev->bus, PCI_DEVFN(5,0)); + if (apicdev == NULL) + printk("IDE Fixup IRQ: Can't find IO-APIC !\n"); + else { + pci_write_config_byte(apicdev, 0xf2, 0x10 + 2*14); + pci_read_config_dword(apicdev, 0xf4, &v); + v &= ~0x00000022; + pci_write_config_dword(apicdev, 0xf4, v); + pci_write_config_byte(apicdev, 0xf2, 0x10 + 2*15); + pci_read_config_dword(apicdev, 0xf4, &v); + v &= ~0x00000022; + pci_write_config_dword(apicdev, 0xf4, v); + pci_dev_put(apicdev); + } + } +#endif +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_IDE, + fixup_maple_ide); diff --git a/arch/ppc64/kernel/maple_setup.c b/arch/ppc64/kernel/maple_setup.c new file mode 100644 index 00000000000..1db6ea0f336 --- /dev/null +++ b/arch/ppc64/kernel/maple_setup.c @@ -0,0 +1,240 @@ +/* + * arch/ppc64/kernel/maple_setup.c + * + * (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org), + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpic.h" + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +extern int maple_set_rtc_time(struct rtc_time *tm); +extern void maple_get_rtc_time(struct rtc_time *tm); +extern void maple_get_boot_time(struct rtc_time *tm); +extern void maple_calibrate_decr(void); +extern void maple_pci_init(void); +extern void maple_pcibios_fixup(void); +extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel); +extern void generic_find_legacy_serial_ports(u64 *physport, + unsigned int *default_speed); + + +static void maple_restart(char *cmd) +{ +} + +static void maple_power_off(void) +{ +} + +static void maple_halt(void) +{ +} + +#ifdef CONFIG_SMP +struct smp_ops_t maple_smp_ops = { + .probe = smp_mpic_probe, + .message_pass = smp_mpic_message_pass, + .kick_cpu = smp_generic_kick_cpu, + .setup_cpu = smp_mpic_setup_cpu, + .give_timebase = smp_generic_give_timebase, + .take_timebase = smp_generic_take_timebase, +}; +#endif /* CONFIG_SMP */ + +void __init maple_setup_arch(void) +{ + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + /* Setup SMP callback */ +#ifdef CONFIG_SMP + smp_ops = &maple_smp_ops; +#endif + /* Lookup PCI hosts */ + maple_pci_init(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif +} + +/* + * Early initialization. + */ +static void __init maple_init_early(void) +{ + unsigned int default_speed; + u64 physport; + + DBG(" -> maple_init_early\n"); + + /* Initialize hash table, from now on, we can take hash faults + * and call ioremap + */ + hpte_init_native(); + + /* Find the serial port */ + generic_find_legacy_serial_ports(&physport, &default_speed); + + DBG("phys port addr: %lx\n", (long)physport); + + if (physport) { + void *comport; + /* Map the uart for udbg. */ + comport = (void *)__ioremap(physport, 16, _PAGE_NO_CACHE); + udbg_init_uart(comport, default_speed); + + ppc_md.udbg_putc = udbg_putc; + ppc_md.udbg_getc = udbg_getc; + ppc_md.udbg_getc_poll = udbg_getc_poll; + DBG("Hello World !\n"); + } + + /* Setup interrupt mapping options */ + ppc64_interrupt_controller = IC_OPEN_PIC; + + iommu_init_early_u3(); + + DBG(" <- maple_init_early\n"); +} + + +static __init void maple_init_IRQ(void) +{ + struct device_node *root; + unsigned int *opprop; + unsigned long opic_addr; + struct mpic *mpic; + unsigned char senses[128]; + int n; + + DBG(" -> maple_init_IRQ\n"); + + /* XXX: Non standard, replace that with a proper openpic/mpic node + * in the device-tree. Find the Open PIC if present */ + root = of_find_node_by_path("/"); + opprop = (unsigned int *) get_property(root, + "platform-open-pic", NULL); + if (opprop == 0) + panic("OpenPIC not found !\n"); + + n = prom_n_addr_cells(root); + for (opic_addr = 0; n > 0; --n) + opic_addr = (opic_addr << 32) + *opprop++; + of_node_put(root); + + /* Obtain sense values from device-tree */ + prom_get_irq_senses(senses, 0, 128); + + mpic = mpic_alloc(opic_addr, + MPIC_PRIMARY | MPIC_BIG_ENDIAN | + MPIC_BROKEN_U3 | MPIC_WANTS_RESET, + 0, 0, 128, 128, senses, 128, "U3-MPIC"); + BUG_ON(mpic == NULL); + mpic_init(mpic); + + DBG(" <- maple_init_IRQ\n"); +} + +static void __init maple_progress(char *s, unsigned short hex) +{ + printk("*** %04x : %s\n", hex, s ? s : ""); +} + + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init maple_probe(int platform) +{ + if (platform != PLATFORM_MAPLE) + return 0; + /* + * On U3, the DART (iommu) must be allocated now since it + * has an impact on htab_initialize (due to the large page it + * occupies having to be broken up so the DART itself is not + * part of the cacheable linar mapping + */ + alloc_u3_dart_table(); + + return 1; +} + +struct machdep_calls __initdata maple_md = { + .probe = maple_probe, + .setup_arch = maple_setup_arch, + .init_early = maple_init_early, + .init_IRQ = maple_init_IRQ, + .get_irq = mpic_get_irq, + .pcibios_fixup = maple_pcibios_fixup, + .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, + .restart = maple_restart, + .power_off = maple_power_off, + .halt = maple_halt, + .get_boot_time = maple_get_boot_time, + .set_rtc_time = maple_set_rtc_time, + .get_rtc_time = maple_get_rtc_time, + .calibrate_decr = maple_calibrate_decr, + .progress = maple_progress, +}; diff --git a/arch/ppc64/kernel/maple_time.c b/arch/ppc64/kernel/maple_time.c new file mode 100644 index 00000000000..07ce7895b43 --- /dev/null +++ b/arch/ppc64/kernel/maple_time.c @@ -0,0 +1,226 @@ +/* + * arch/ppc64/kernel/maple_time.c + * + * (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org), + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +extern void setup_default_decr(void); +extern void GregorianDay(struct rtc_time * tm); + +extern unsigned long ppc_tb_freq; +extern unsigned long ppc_proc_freq; +static int maple_rtc_addr; + +static int maple_clock_read(int addr) +{ + outb_p(addr, maple_rtc_addr); + return inb_p(maple_rtc_addr+1); +} + +static void maple_clock_write(unsigned long val, int addr) +{ + outb_p(addr, maple_rtc_addr); + outb_p(val, maple_rtc_addr+1); +} + +void maple_get_rtc_time(struct rtc_time *tm) +{ + int uip, i; + + /* The Linux interpretation of the CMOS clock register contents: + * When the Update-In-Progress (UIP) flag goes from 1 to 0, the + * RTC registers show the second which has precisely just started. + * Let's hope other operating systems interpret the RTC the same way. + */ + + /* Since the UIP flag is set for about 2.2 ms and the clock + * is typically written with a precision of 1 jiffy, trying + * to obtain a precision better than a few milliseconds is + * an illusion. Only consistency is interesting, this also + * allows to use the routine for /dev/rtc without a potential + * 1 second kernel busy loop triggered by any reader of /dev/rtc. + */ + + for (i = 0; i<1000000; i++) { + uip = maple_clock_read(RTC_FREQ_SELECT); + tm->tm_sec = maple_clock_read(RTC_SECONDS); + tm->tm_min = maple_clock_read(RTC_MINUTES); + tm->tm_hour = maple_clock_read(RTC_HOURS); + tm->tm_mday = maple_clock_read(RTC_DAY_OF_MONTH); + tm->tm_mon = maple_clock_read(RTC_MONTH); + tm->tm_year = maple_clock_read(RTC_YEAR); + uip |= maple_clock_read(RTC_FREQ_SELECT); + if ((uip & RTC_UIP)==0) + break; + } + + if (!(maple_clock_read(RTC_CONTROL) & RTC_DM_BINARY) + || RTC_ALWAYS_BCD) { + BCD_TO_BIN(tm->tm_sec); + BCD_TO_BIN(tm->tm_min); + BCD_TO_BIN(tm->tm_hour); + BCD_TO_BIN(tm->tm_mday); + BCD_TO_BIN(tm->tm_mon); + BCD_TO_BIN(tm->tm_year); + } + if ((tm->tm_year + 1900) < 1970) + tm->tm_year += 100; + + GregorianDay(tm); +} + +int maple_set_rtc_time(struct rtc_time *tm) +{ + unsigned char save_control, save_freq_select; + int sec, min, hour, mon, mday, year; + + spin_lock(&rtc_lock); + + save_control = maple_clock_read(RTC_CONTROL); /* tell the clock it's being set */ + + maple_clock_write((save_control|RTC_SET), RTC_CONTROL); + + save_freq_select = maple_clock_read(RTC_FREQ_SELECT); /* stop and reset prescaler */ + + maple_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + sec = tm->tm_sec; + min = tm->tm_min; + hour = tm->tm_hour; + mon = tm->tm_mon; + mday = tm->tm_mday; + year = tm->tm_year; + + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + BIN_TO_BCD(sec); + BIN_TO_BCD(min); + BIN_TO_BCD(hour); + BIN_TO_BCD(mon); + BIN_TO_BCD(mday); + BIN_TO_BCD(year); + } + maple_clock_write(sec, RTC_SECONDS); + maple_clock_write(min, RTC_MINUTES); + maple_clock_write(hour, RTC_HOURS); + maple_clock_write(mon, RTC_MONTH); + maple_clock_write(mday, RTC_DAY_OF_MONTH); + maple_clock_write(year, RTC_YEAR); + + /* The following flags have to be released exactly in this order, + * otherwise the DS12887 (popular MC146818A clone with integrated + * battery and quartz) will not reset the oscillator and will not + * update precisely 500 ms later. You won't find this mentioned in + * the Dallas Semiconductor data sheets, but who believes data + * sheets anyway ... -- Markus Kuhn + */ + maple_clock_write(save_control, RTC_CONTROL); + maple_clock_write(save_freq_select, RTC_FREQ_SELECT); + + spin_unlock(&rtc_lock); + + return 0; +} + +void __init maple_get_boot_time(struct rtc_time *tm) +{ + struct device_node *rtcs; + + rtcs = find_compatible_devices("rtc", "pnpPNP,b00"); + if (rtcs && rtcs->addrs) { + maple_rtc_addr = rtcs->addrs[0].address; + printk(KERN_INFO "Maple: Found RTC at 0x%x\n", maple_rtc_addr); + } else { + maple_rtc_addr = RTC_PORT(0); /* legacy address */ + printk(KERN_INFO "Maple: No device node for RTC, assuming " + "legacy address (0x%x)\n", maple_rtc_addr); + } + + maple_get_rtc_time(tm); +} + +/* XXX FIXME: Some sane defaults: 125 MHz timebase, 1GHz processor */ +#define DEFAULT_TB_FREQ 125000000UL +#define DEFAULT_PROC_FREQ (DEFAULT_TB_FREQ * 8) + +void __init maple_calibrate_decr(void) +{ + struct device_node *cpu; + struct div_result divres; + unsigned int *fp = NULL; + + /* + * The cpu node should have a timebase-frequency property + * to tell us the rate at which the decrementer counts. + */ + cpu = of_find_node_by_type(NULL, "cpu"); + + ppc_tb_freq = DEFAULT_TB_FREQ; + if (cpu != 0) + fp = (unsigned int *)get_property(cpu, "timebase-frequency", NULL); + if (fp != NULL) + ppc_tb_freq = *fp; + else + printk(KERN_ERR "WARNING: Estimating decrementer frequency (not found)\n"); + fp = NULL; + ppc_proc_freq = DEFAULT_PROC_FREQ; + if (cpu != 0) + fp = (unsigned int *)get_property(cpu, "clock-frequency", NULL); + if (fp != NULL) + ppc_proc_freq = *fp; + else + printk(KERN_ERR "WARNING: Estimating processor frequency (not found)\n"); + + of_node_put(cpu); + + printk(KERN_INFO "time_init: decrementer frequency = %lu.%.6lu MHz\n", + ppc_tb_freq/1000000, ppc_tb_freq%1000000); + printk(KERN_INFO "time_init: processor frequency = %lu.%.6lu MHz\n", + ppc_proc_freq/1000000, ppc_proc_freq%1000000); + + tb_ticks_per_jiffy = ppc_tb_freq / HZ; + tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; + tb_ticks_per_usec = ppc_tb_freq / 1000000; + tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); + div128_by_32(1024*1024, 0, tb_ticks_per_sec, &divres); + tb_to_xs = divres.result_low; + + setup_default_decr(); +} diff --git a/arch/ppc64/kernel/mf.c b/arch/ppc64/kernel/mf.c new file mode 100644 index 00000000000..1bd52ece497 --- /dev/null +++ b/arch/ppc64/kernel/mf.c @@ -0,0 +1,1239 @@ +/* + * mf.c + * Copyright (C) 2001 Troy D. Armstrong IBM Corporation + * Copyright (C) 2004 Stephen Rothwell IBM Corporation + * + * This modules exists as an interface between a Linux secondary partition + * running on an iSeries and the primary partition's Virtual Service + * Processor (VSP) object. The VSP has final authority over powering on/off + * all partitions in the iSeries. It also provides miscellaneous low-level + * machine facility type operations. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* + * This is the structure layout for the Machine Facilites LPAR event + * flows. + */ +struct vsp_cmd_data { + u64 token; + u16 cmd; + HvLpIndex lp_index; + u8 result_code; + u32 reserved; + union { + u64 state; /* GetStateOut */ + u64 ipl_type; /* GetIplTypeOut, Function02SelectIplTypeIn */ + u64 ipl_mode; /* GetIplModeOut, Function02SelectIplModeIn */ + u64 page[4]; /* GetSrcHistoryIn */ + u64 flag; /* GetAutoIplWhenPrimaryIplsOut, + SetAutoIplWhenPrimaryIplsIn, + WhiteButtonPowerOffIn, + Function08FastPowerOffIn, + IsSpcnRackPowerIncompleteOut */ + struct { + u64 token; + u64 address_type; + u64 side; + u32 length; + u32 offset; + } kern; /* SetKernelImageIn, GetKernelImageIn, + SetKernelCmdLineIn, GetKernelCmdLineIn */ + u32 length_out; /* GetKernelImageOut, GetKernelCmdLineOut */ + u8 reserved[80]; + } sub_data; +}; + +struct vsp_rsp_data { + struct completion com; + struct vsp_cmd_data *response; +}; + +struct alloc_data { + u16 size; + u16 type; + u32 count; + u16 reserved1; + u8 reserved2; + HvLpIndex target_lp; +}; + +struct ce_msg_data; + +typedef void (*ce_msg_comp_hdlr)(void *token, struct ce_msg_data *vsp_cmd_rsp); + +struct ce_msg_comp_data { + ce_msg_comp_hdlr handler; + void *token; +}; + +struct ce_msg_data { + u8 ce_msg[12]; + char reserved[4]; + struct ce_msg_comp_data *completion; +}; + +struct io_mf_lp_event { + struct HvLpEvent hp_lp_event; + u16 subtype_result_code; + u16 reserved1; + u32 reserved2; + union { + struct alloc_data alloc; + struct ce_msg_data ce_msg; + struct vsp_cmd_data vsp_cmd; + } data; +}; + +#define subtype_data(a, b, c, d) \ + (((a) << 24) + ((b) << 16) + ((c) << 8) + (d)) + +/* + * All outgoing event traffic is kept on a FIFO queue. The first + * pointer points to the one that is outstanding, and all new + * requests get stuck on the end. Also, we keep a certain number of + * preallocated pending events so that we can operate very early in + * the boot up sequence (before kmalloc is ready). + */ +struct pending_event { + struct pending_event *next; + struct io_mf_lp_event event; + MFCompleteHandler hdlr; + char dma_data[72]; + unsigned dma_data_length; + unsigned remote_address; +}; +static spinlock_t pending_event_spinlock; +static struct pending_event *pending_event_head; +static struct pending_event *pending_event_tail; +static struct pending_event *pending_event_avail; +static struct pending_event pending_event_prealloc[16]; + +/* + * Put a pending event onto the available queue, so it can get reused. + * Attention! You must have the pending_event_spinlock before calling! + */ +static void free_pending_event(struct pending_event *ev) +{ + if (ev != NULL) { + ev->next = pending_event_avail; + pending_event_avail = ev; + } +} + +/* + * Enqueue the outbound event onto the stack. If the queue was + * empty to begin with, we must also issue it via the Hypervisor + * interface. There is a section of code below that will touch + * the first stack pointer without the protection of the pending_event_spinlock. + * This is OK, because we know that nobody else will be modifying + * the first pointer when we do this. + */ +static int signal_event(struct pending_event *ev) +{ + int rc = 0; + unsigned long flags; + int go = 1; + struct pending_event *ev1; + HvLpEvent_Rc hv_rc; + + /* enqueue the event */ + if (ev != NULL) { + ev->next = NULL; + spin_lock_irqsave(&pending_event_spinlock, flags); + if (pending_event_head == NULL) + pending_event_head = ev; + else { + go = 0; + pending_event_tail->next = ev; + } + pending_event_tail = ev; + spin_unlock_irqrestore(&pending_event_spinlock, flags); + } + + /* send the event */ + while (go) { + go = 0; + + /* any DMA data to send beforehand? */ + if (pending_event_head->dma_data_length > 0) + HvCallEvent_dmaToSp(pending_event_head->dma_data, + pending_event_head->remote_address, + pending_event_head->dma_data_length, + HvLpDma_Direction_LocalToRemote); + + hv_rc = HvCallEvent_signalLpEvent( + &pending_event_head->event.hp_lp_event); + if (hv_rc != HvLpEvent_Rc_Good) { + printk(KERN_ERR "mf.c: HvCallEvent_signalLpEvent() " + "failed with %d\n", (int)hv_rc); + + spin_lock_irqsave(&pending_event_spinlock, flags); + ev1 = pending_event_head; + pending_event_head = pending_event_head->next; + if (pending_event_head != NULL) + go = 1; + spin_unlock_irqrestore(&pending_event_spinlock, flags); + + if (ev1 == ev) + rc = -EIO; + else if (ev1->hdlr != NULL) + (*ev1->hdlr)((void *)ev1->event.hp_lp_event.xCorrelationToken, -EIO); + + spin_lock_irqsave(&pending_event_spinlock, flags); + free_pending_event(ev1); + spin_unlock_irqrestore(&pending_event_spinlock, flags); + } + } + + return rc; +} + +/* + * Allocate a new pending_event structure, and initialize it. + */ +static struct pending_event *new_pending_event(void) +{ + struct pending_event *ev = NULL; + HvLpIndex primary_lp = HvLpConfig_getPrimaryLpIndex(); + unsigned long flags; + struct HvLpEvent *hev; + + spin_lock_irqsave(&pending_event_spinlock, flags); + if (pending_event_avail != NULL) { + ev = pending_event_avail; + pending_event_avail = pending_event_avail->next; + } + spin_unlock_irqrestore(&pending_event_spinlock, flags); + if (ev == NULL) { + ev = kmalloc(sizeof(struct pending_event), GFP_ATOMIC); + if (ev == NULL) { + printk(KERN_ERR "mf.c: unable to kmalloc %ld bytes\n", + sizeof(struct pending_event)); + return NULL; + } + } + memset(ev, 0, sizeof(struct pending_event)); + hev = &ev->event.hp_lp_event; + hev->xFlags.xValid = 1; + hev->xFlags.xAckType = HvLpEvent_AckType_ImmediateAck; + hev->xFlags.xAckInd = HvLpEvent_AckInd_DoAck; + hev->xFlags.xFunction = HvLpEvent_Function_Int; + hev->xType = HvLpEvent_Type_MachineFac; + hev->xSourceLp = HvLpConfig_getLpIndex(); + hev->xTargetLp = primary_lp; + hev->xSizeMinus1 = sizeof(ev->event) - 1; + hev->xRc = HvLpEvent_Rc_Good; + hev->xSourceInstanceId = HvCallEvent_getSourceLpInstanceId(primary_lp, + HvLpEvent_Type_MachineFac); + hev->xTargetInstanceId = HvCallEvent_getTargetLpInstanceId(primary_lp, + HvLpEvent_Type_MachineFac); + + return ev; +} + +static int signal_vsp_instruction(struct vsp_cmd_data *vsp_cmd) +{ + struct pending_event *ev = new_pending_event(); + int rc; + struct vsp_rsp_data response; + + if (ev == NULL) + return -ENOMEM; + + init_completion(&response.com); + response.response = vsp_cmd; + ev->event.hp_lp_event.xSubtype = 6; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'V', 'I'); + ev->event.data.vsp_cmd.token = (u64)&response; + ev->event.data.vsp_cmd.cmd = vsp_cmd->cmd; + ev->event.data.vsp_cmd.lp_index = HvLpConfig_getLpIndex(); + ev->event.data.vsp_cmd.result_code = 0xFF; + ev->event.data.vsp_cmd.reserved = 0; + memcpy(&(ev->event.data.vsp_cmd.sub_data), + &(vsp_cmd->sub_data), sizeof(vsp_cmd->sub_data)); + mb(); + + rc = signal_event(ev); + if (rc == 0) + wait_for_completion(&response.com); + return rc; +} + + +/* + * Send a 12-byte CE message to the primary partition VSP object + */ +static int signal_ce_msg(char *ce_msg, struct ce_msg_comp_data *completion) +{ + struct pending_event *ev = new_pending_event(); + + if (ev == NULL) + return -ENOMEM; + + ev->event.hp_lp_event.xSubtype = 0; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'C', 'E'); + memcpy(ev->event.data.ce_msg.ce_msg, ce_msg, 12); + ev->event.data.ce_msg.completion = completion; + return signal_event(ev); +} + +/* + * Send a 12-byte CE message (with no data) to the primary partition VSP object + */ +static int signal_ce_msg_simple(u8 ce_op, struct ce_msg_comp_data *completion) +{ + u8 ce_msg[12]; + + memset(ce_msg, 0, sizeof(ce_msg)); + ce_msg[3] = ce_op; + return signal_ce_msg(ce_msg, completion); +} + +/* + * Send a 12-byte CE message and DMA data to the primary partition VSP object + */ +static int dma_and_signal_ce_msg(char *ce_msg, + struct ce_msg_comp_data *completion, void *dma_data, + unsigned dma_data_length, unsigned remote_address) +{ + struct pending_event *ev = new_pending_event(); + + if (ev == NULL) + return -ENOMEM; + + ev->event.hp_lp_event.xSubtype = 0; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'C', 'E'); + memcpy(ev->event.data.ce_msg.ce_msg, ce_msg, 12); + ev->event.data.ce_msg.completion = completion; + memcpy(ev->dma_data, dma_data, dma_data_length); + ev->dma_data_length = dma_data_length; + ev->remote_address = remote_address; + return signal_event(ev); +} + +/* + * Initiate a nice (hopefully) shutdown of Linux. We simply are + * going to try and send the init process a SIGINT signal. If + * this fails (why?), we'll simply force it off in a not-so-nice + * manner. + */ +static int shutdown(void) +{ + int rc = kill_proc(1, SIGINT, 1); + + if (rc) { + printk(KERN_ALERT "mf.c: SIGINT to init failed (%d), " + "hard shutdown commencing\n", rc); + mf_power_off(); + } else + printk(KERN_INFO "mf.c: init has been successfully notified " + "to proceed with shutdown\n"); + return rc; +} + +/* + * The primary partition VSP object is sending us a new + * event flow. Handle it... + */ +static void handle_int(struct io_mf_lp_event *event) +{ + struct ce_msg_data *ce_msg_data; + struct ce_msg_data *pce_msg_data; + unsigned long flags; + struct pending_event *pev; + + /* ack the interrupt */ + event->hp_lp_event.xRc = HvLpEvent_Rc_Good; + HvCallEvent_ackLpEvent(&event->hp_lp_event); + + /* process interrupt */ + switch (event->hp_lp_event.xSubtype) { + case 0: /* CE message */ + ce_msg_data = &event->data.ce_msg; + switch (ce_msg_data->ce_msg[3]) { + case 0x5B: /* power control notification */ + if ((ce_msg_data->ce_msg[5] & 0x20) != 0) { + printk(KERN_INFO "mf.c: Commencing partition shutdown\n"); + if (shutdown() == 0) + signal_ce_msg_simple(0xDB, NULL); + } + break; + case 0xC0: /* get time */ + spin_lock_irqsave(&pending_event_spinlock, flags); + pev = pending_event_head; + if (pev != NULL) + pending_event_head = pending_event_head->next; + spin_unlock_irqrestore(&pending_event_spinlock, flags); + if (pev == NULL) + break; + pce_msg_data = &pev->event.data.ce_msg; + if (pce_msg_data->ce_msg[3] != 0x40) + break; + if (pce_msg_data->completion != NULL) { + ce_msg_comp_hdlr handler = + pce_msg_data->completion->handler; + void *token = pce_msg_data->completion->token; + + if (handler != NULL) + (*handler)(token, ce_msg_data); + } + spin_lock_irqsave(&pending_event_spinlock, flags); + free_pending_event(pev); + spin_unlock_irqrestore(&pending_event_spinlock, flags); + /* send next waiting event */ + if (pending_event_head != NULL) + signal_event(NULL); + break; + } + break; + case 1: /* IT sys shutdown */ + printk(KERN_INFO "mf.c: Commencing system shutdown\n"); + shutdown(); + break; + } +} + +/* + * The primary partition VSP object is acknowledging the receipt + * of a flow we sent to them. If there are other flows queued + * up, we must send another one now... + */ +static void handle_ack(struct io_mf_lp_event *event) +{ + unsigned long flags; + struct pending_event *two = NULL; + unsigned long free_it = 0; + struct ce_msg_data *ce_msg_data; + struct ce_msg_data *pce_msg_data; + struct vsp_rsp_data *rsp; + + /* handle current event */ + if (pending_event_head == NULL) { + printk(KERN_ERR "mf.c: stack empty for receiving ack\n"); + return; + } + + switch (event->hp_lp_event.xSubtype) { + case 0: /* CE msg */ + ce_msg_data = &event->data.ce_msg; + if (ce_msg_data->ce_msg[3] != 0x40) { + free_it = 1; + break; + } + if (ce_msg_data->ce_msg[2] == 0) + break; + free_it = 1; + pce_msg_data = &pending_event_head->event.data.ce_msg; + if (pce_msg_data->completion != NULL) { + ce_msg_comp_hdlr handler = + pce_msg_data->completion->handler; + void *token = pce_msg_data->completion->token; + + if (handler != NULL) + (*handler)(token, ce_msg_data); + } + break; + case 4: /* allocate */ + case 5: /* deallocate */ + if (pending_event_head->hdlr != NULL) + (*pending_event_head->hdlr)((void *)event->hp_lp_event.xCorrelationToken, event->data.alloc.count); + free_it = 1; + break; + case 6: + free_it = 1; + rsp = (struct vsp_rsp_data *)event->data.vsp_cmd.token; + if (rsp == NULL) { + printk(KERN_ERR "mf.c: no rsp\n"); + break; + } + if (rsp->response != NULL) + memcpy(rsp->response, &event->data.vsp_cmd, + sizeof(event->data.vsp_cmd)); + complete(&rsp->com); + break; + } + + /* remove from queue */ + spin_lock_irqsave(&pending_event_spinlock, flags); + if ((pending_event_head != NULL) && (free_it == 1)) { + struct pending_event *oldHead = pending_event_head; + + pending_event_head = pending_event_head->next; + two = pending_event_head; + free_pending_event(oldHead); + } + spin_unlock_irqrestore(&pending_event_spinlock, flags); + + /* send next waiting event */ + if (two != NULL) + signal_event(NULL); +} + +/* + * This is the generic event handler we are registering with + * the Hypervisor. Ensure the flows are for us, and then + * parse it enough to know if it is an interrupt or an + * acknowledge. + */ +static void hv_handler(struct HvLpEvent *event, struct pt_regs *regs) +{ + if ((event != NULL) && (event->xType == HvLpEvent_Type_MachineFac)) { + switch(event->xFlags.xFunction) { + case HvLpEvent_Function_Ack: + handle_ack((struct io_mf_lp_event *)event); + break; + case HvLpEvent_Function_Int: + handle_int((struct io_mf_lp_event *)event); + break; + default: + printk(KERN_ERR "mf.c: non ack/int event received\n"); + break; + } + } else + printk(KERN_ERR "mf.c: alien event received\n"); +} + +/* + * Global kernel interface to allocate and seed events into the + * Hypervisor. + */ +void mf_allocate_lp_events(HvLpIndex target_lp, HvLpEvent_Type type, + unsigned size, unsigned count, MFCompleteHandler hdlr, + void *user_token) +{ + struct pending_event *ev = new_pending_event(); + int rc; + + if (ev == NULL) { + rc = -ENOMEM; + } else { + ev->event.hp_lp_event.xSubtype = 4; + ev->event.hp_lp_event.xCorrelationToken = (u64)user_token; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'M', 'A'); + ev->event.data.alloc.target_lp = target_lp; + ev->event.data.alloc.type = type; + ev->event.data.alloc.size = size; + ev->event.data.alloc.count = count; + ev->hdlr = hdlr; + rc = signal_event(ev); + } + if ((rc != 0) && (hdlr != NULL)) + (*hdlr)(user_token, rc); +} +EXPORT_SYMBOL(mf_allocate_lp_events); + +/* + * Global kernel interface to unseed and deallocate events already in + * Hypervisor. + */ +void mf_deallocate_lp_events(HvLpIndex target_lp, HvLpEvent_Type type, + unsigned count, MFCompleteHandler hdlr, void *user_token) +{ + struct pending_event *ev = new_pending_event(); + int rc; + + if (ev == NULL) + rc = -ENOMEM; + else { + ev->event.hp_lp_event.xSubtype = 5; + ev->event.hp_lp_event.xCorrelationToken = (u64)user_token; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'M', 'D'); + ev->event.data.alloc.target_lp = target_lp; + ev->event.data.alloc.type = type; + ev->event.data.alloc.count = count; + ev->hdlr = hdlr; + rc = signal_event(ev); + } + if ((rc != 0) && (hdlr != NULL)) + (*hdlr)(user_token, rc); +} +EXPORT_SYMBOL(mf_deallocate_lp_events); + +/* + * Global kernel interface to tell the VSP object in the primary + * partition to power this partition off. + */ +void mf_power_off(void) +{ + printk(KERN_INFO "mf.c: Down it goes...\n"); + signal_ce_msg_simple(0x4d, NULL); + for (;;) + ; +} + +/* + * Global kernel interface to tell the VSP object in the primary + * partition to reboot this partition. + */ +void mf_reboot(void) +{ + printk(KERN_INFO "mf.c: Preparing to bounce...\n"); + signal_ce_msg_simple(0x4e, NULL); + for (;;) + ; +} + +/* + * Display a single word SRC onto the VSP control panel. + */ +void mf_display_src(u32 word) +{ + u8 ce[12]; + + memset(ce, 0, sizeof(ce)); + ce[3] = 0x4a; + ce[7] = 0x01; + ce[8] = word >> 24; + ce[9] = word >> 16; + ce[10] = word >> 8; + ce[11] = word; + signal_ce_msg(ce, NULL); +} + +/* + * Display a single word SRC of the form "PROGXXXX" on the VSP control panel. + */ +void mf_display_progress(u16 value) +{ + u8 ce[12]; + u8 src[72]; + + memcpy(ce, "\x00\x00\x04\x4A\x00\x00\x00\x48\x00\x00\x00\x00", 12); + memcpy(src, "\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00PROGxxxx ", + 72); + src[6] = value >> 8; + src[7] = value & 255; + src[44] = "0123456789ABCDEF"[(value >> 12) & 15]; + src[45] = "0123456789ABCDEF"[(value >> 8) & 15]; + src[46] = "0123456789ABCDEF"[(value >> 4) & 15]; + src[47] = "0123456789ABCDEF"[value & 15]; + dma_and_signal_ce_msg(ce, NULL, src, sizeof(src), 9 * 64 * 1024); +} + +/* + * Clear the VSP control panel. Used to "erase" an SRC that was + * previously displayed. + */ +void mf_clear_src(void) +{ + signal_ce_msg_simple(0x4b, NULL); +} + +/* + * Initialization code here. + */ +void mf_init(void) +{ + int i; + + /* initialize */ + spin_lock_init(&pending_event_spinlock); + for (i = 0; + i < sizeof(pending_event_prealloc) / sizeof(*pending_event_prealloc); + ++i) + free_pending_event(&pending_event_prealloc[i]); + HvLpEvent_registerHandler(HvLpEvent_Type_MachineFac, &hv_handler); + + /* virtual continue ack */ + signal_ce_msg_simple(0x57, NULL); + + /* initialization complete */ + printk(KERN_NOTICE "mf.c: iSeries Linux LPAR Machine Facilities " + "initialized\n"); +} + +struct rtc_time_data { + struct completion com; + struct ce_msg_data ce_msg; + int rc; +}; + +static void get_rtc_time_complete(void *token, struct ce_msg_data *ce_msg) +{ + struct rtc_time_data *rtc = token; + + memcpy(&rtc->ce_msg, ce_msg, sizeof(rtc->ce_msg)); + rtc->rc = 0; + complete(&rtc->com); +} + +int mf_get_rtc(struct rtc_time *tm) +{ + struct ce_msg_comp_data ce_complete; + struct rtc_time_data rtc_data; + int rc; + + memset(&ce_complete, 0, sizeof(ce_complete)); + memset(&rtc_data, 0, sizeof(rtc_data)); + init_completion(&rtc_data.com); + ce_complete.handler = &get_rtc_time_complete; + ce_complete.token = &rtc_data; + rc = signal_ce_msg_simple(0x40, &ce_complete); + if (rc) + return rc; + wait_for_completion(&rtc_data.com); + tm->tm_wday = 0; + tm->tm_yday = 0; + tm->tm_isdst = 0; + if (rtc_data.rc) { + tm->tm_sec = 0; + tm->tm_min = 0; + tm->tm_hour = 0; + tm->tm_mday = 15; + tm->tm_mon = 5; + tm->tm_year = 52; + return rtc_data.rc; + } + + if ((rtc_data.ce_msg.ce_msg[2] == 0xa9) || + (rtc_data.ce_msg.ce_msg[2] == 0xaf)) { + /* TOD clock is not set */ + tm->tm_sec = 1; + tm->tm_min = 1; + tm->tm_hour = 1; + tm->tm_mday = 10; + tm->tm_mon = 8; + tm->tm_year = 71; + mf_set_rtc(tm); + } + { + u8 *ce_msg = rtc_data.ce_msg.ce_msg; + u8 year = ce_msg[5]; + u8 sec = ce_msg[6]; + u8 min = ce_msg[7]; + u8 hour = ce_msg[8]; + u8 day = ce_msg[10]; + u8 mon = ce_msg[11]; + + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year); + + if (year <= 69) + year += 100; + + tm->tm_sec = sec; + tm->tm_min = min; + tm->tm_hour = hour; + tm->tm_mday = day; + tm->tm_mon = mon; + tm->tm_year = year; + } + + return 0; +} + +int mf_set_rtc(struct rtc_time *tm) +{ + char ce_time[12]; + u8 day, mon, hour, min, sec, y1, y2; + unsigned year; + + year = 1900 + tm->tm_year; + y1 = year / 100; + y2 = year % 100; + + sec = tm->tm_sec; + min = tm->tm_min; + hour = tm->tm_hour; + day = tm->tm_mday; + mon = tm->tm_mon + 1; + + BIN_TO_BCD(sec); + BIN_TO_BCD(min); + BIN_TO_BCD(hour); + BIN_TO_BCD(mon); + BIN_TO_BCD(day); + BIN_TO_BCD(y1); + BIN_TO_BCD(y2); + + memset(ce_time, 0, sizeof(ce_time)); + ce_time[3] = 0x41; + ce_time[4] = y1; + ce_time[5] = y2; + ce_time[6] = sec; + ce_time[7] = min; + ce_time[8] = hour; + ce_time[10] = day; + ce_time[11] = mon; + + return signal_ce_msg(ce_time, NULL); +} + +#ifdef CONFIG_PROC_FS + +static int proc_mf_dump_cmdline(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + char *p; + struct vsp_cmd_data vsp_cmd; + int rc; + dma_addr_t dma_addr; + + /* The HV appears to return no more than 256 bytes of command line */ + if (off >= 256) + return 0; + if ((off + count) > 256) + count = 256 - off; + + dma_addr = dma_map_single(iSeries_vio_dev, page, off + count, + DMA_FROM_DEVICE); + if (dma_mapping_error(dma_addr)) + return -ENOMEM; + memset(page, 0, off + count); + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 33; + vsp_cmd.sub_data.kern.token = dma_addr; + vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex; + vsp_cmd.sub_data.kern.side = (u64)data; + vsp_cmd.sub_data.kern.length = off + count; + mb(); + rc = signal_vsp_instruction(&vsp_cmd); + dma_unmap_single(iSeries_vio_dev, dma_addr, off + count, + DMA_FROM_DEVICE); + if (rc) + return rc; + if (vsp_cmd.result_code != 0) + return -ENOMEM; + p = page; + len = 0; + while (len < (off + count)) { + if ((*p == '\0') || (*p == '\n')) { + if (*p == '\0') + *p = '\n'; + p++; + len++; + *eof = 1; + break; + } + p++; + len++; + } + + if (len < off) { + *eof = 1; + len = 0; + } + return len; +} + +#if 0 +static int mf_getVmlinuxChunk(char *buffer, int *size, int offset, u64 side) +{ + struct vsp_cmd_data vsp_cmd; + int rc; + int len = *size; + dma_addr_t dma_addr; + + dma_addr = dma_map_single(iSeries_vio_dev, buffer, len, + DMA_FROM_DEVICE); + memset(buffer, 0, len); + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 32; + vsp_cmd.sub_data.kern.token = dma_addr; + vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex; + vsp_cmd.sub_data.kern.side = side; + vsp_cmd.sub_data.kern.offset = offset; + vsp_cmd.sub_data.kern.length = len; + mb(); + rc = signal_vsp_instruction(&vsp_cmd); + if (rc == 0) { + if (vsp_cmd.result_code == 0) + *size = vsp_cmd.sub_data.length_out; + else + rc = -ENOMEM; + } + + dma_unmap_single(iSeries_vio_dev, dma_addr, len, DMA_FROM_DEVICE); + + return rc; +} + +static int proc_mf_dump_vmlinux(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int sizeToGet = count; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (mf_getVmlinuxChunk(page, &sizeToGet, off, (u64)data) == 0) { + if (sizeToGet != 0) { + *start = page + off; + return sizeToGet; + } + *eof = 1; + return 0; + } + *eof = 1; + return 0; +} +#endif + +static int proc_mf_dump_side(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + char mf_current_side = ' '; + struct vsp_cmd_data vsp_cmd; + + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 2; + vsp_cmd.sub_data.ipl_type = 0; + mb(); + + if (signal_vsp_instruction(&vsp_cmd) == 0) { + if (vsp_cmd.result_code == 0) { + switch (vsp_cmd.sub_data.ipl_type) { + case 0: mf_current_side = 'A'; + break; + case 1: mf_current_side = 'B'; + break; + case 2: mf_current_side = 'C'; + break; + default: mf_current_side = 'D'; + break; + } + } + } + + len = sprintf(page, "%c\n", mf_current_side); + + if (len <= (off + count)) + *eof = 1; + *start = page + off; + len -= off; + if (len > count) + len = count; + if (len < 0) + len = 0; + return len; +} + +static int proc_mf_change_side(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char side; + u64 newSide; + struct vsp_cmd_data vsp_cmd; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (count == 0) + return 0; + + if (get_user(side, buffer)) + return -EFAULT; + + switch (side) { + case 'A': newSide = 0; + break; + case 'B': newSide = 1; + break; + case 'C': newSide = 2; + break; + case 'D': newSide = 3; + break; + default: + printk(KERN_ERR "mf_proc.c: proc_mf_change_side: invalid side\n"); + return -EINVAL; + } + + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.sub_data.ipl_type = newSide; + vsp_cmd.cmd = 10; + + (void)signal_vsp_instruction(&vsp_cmd); + + return count; +} + +#if 0 +static void mf_getSrcHistory(char *buffer, int size) +{ + struct IplTypeReturnStuff return_stuff; + struct pending_event *ev = new_pending_event(); + int rc = 0; + char *pages[4]; + + pages[0] = kmalloc(4096, GFP_ATOMIC); + pages[1] = kmalloc(4096, GFP_ATOMIC); + pages[2] = kmalloc(4096, GFP_ATOMIC); + pages[3] = kmalloc(4096, GFP_ATOMIC); + if ((ev == NULL) || (pages[0] == NULL) || (pages[1] == NULL) + || (pages[2] == NULL) || (pages[3] == NULL)) + return -ENOMEM; + + return_stuff.xType = 0; + return_stuff.xRc = 0; + return_stuff.xDone = 0; + ev->event.hp_lp_event.xSubtype = 6; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'V', 'I'); + ev->event.data.vsp_cmd.xEvent = &return_stuff; + ev->event.data.vsp_cmd.cmd = 4; + ev->event.data.vsp_cmd.lp_index = HvLpConfig_getLpIndex(); + ev->event.data.vsp_cmd.result_code = 0xFF; + ev->event.data.vsp_cmd.reserved = 0; + ev->event.data.vsp_cmd.sub_data.page[0] = ISERIES_HV_ADDR(pages[0]); + ev->event.data.vsp_cmd.sub_data.page[1] = ISERIES_HV_ADDR(pages[1]); + ev->event.data.vsp_cmd.sub_data.page[2] = ISERIES_HV_ADDR(pages[2]); + ev->event.data.vsp_cmd.sub_data.page[3] = ISERIES_HV_ADDR(pages[3]); + mb(); + if (signal_event(ev) != 0) + return; + + while (return_stuff.xDone != 1) + udelay(10); + if (return_stuff.xRc == 0) + memcpy(buffer, pages[0], size); + kfree(pages[0]); + kfree(pages[1]); + kfree(pages[2]); + kfree(pages[3]); +} +#endif + +static int proc_mf_dump_src(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ +#if 0 + int len; + + mf_getSrcHistory(page, count); + len = count; + len -= off; + if (len < count) { + *eof = 1; + if (len <= 0) + return 0; + } else + len = count; + *start = page + off; + return len; +#else + return 0; +#endif +} + +static int proc_mf_change_src(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char stkbuf[10]; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if ((count < 4) && (count != 1)) { + printk(KERN_ERR "mf_proc: invalid src\n"); + return -EINVAL; + } + + if (count > (sizeof(stkbuf) - 1)) + count = sizeof(stkbuf) - 1; + if (copy_from_user(stkbuf, buffer, count)) + return -EFAULT; + + if ((count == 1) && (*stkbuf == '\0')) + mf_clear_src(); + else + mf_display_src(*(u32 *)stkbuf); + + return count; +} + +static int proc_mf_change_cmdline(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + struct vsp_cmd_data vsp_cmd; + dma_addr_t dma_addr; + char *page; + int ret = -EACCES; + + if (!capable(CAP_SYS_ADMIN)) + goto out; + + dma_addr = 0; + page = dma_alloc_coherent(iSeries_vio_dev, count, &dma_addr, + GFP_ATOMIC); + ret = -ENOMEM; + if (page == NULL) + goto out; + + ret = -EFAULT; + if (copy_from_user(page, buffer, count)) + goto out_free; + + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 31; + vsp_cmd.sub_data.kern.token = dma_addr; + vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex; + vsp_cmd.sub_data.kern.side = (u64)data; + vsp_cmd.sub_data.kern.length = count; + mb(); + (void)signal_vsp_instruction(&vsp_cmd); + ret = count; + +out_free: + dma_free_coherent(iSeries_vio_dev, count, page, dma_addr); +out: + return ret; +} + +static ssize_t proc_mf_change_vmlinux(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + ssize_t rc; + dma_addr_t dma_addr; + char *page; + struct vsp_cmd_data vsp_cmd; + + rc = -EACCES; + if (!capable(CAP_SYS_ADMIN)) + goto out; + + dma_addr = 0; + page = dma_alloc_coherent(iSeries_vio_dev, count, &dma_addr, + GFP_ATOMIC); + rc = -ENOMEM; + if (page == NULL) { + printk(KERN_ERR "mf.c: couldn't allocate memory to set vmlinux chunk\n"); + goto out; + } + rc = -EFAULT; + if (copy_from_user(page, buf, count)) + goto out_free; + + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 30; + vsp_cmd.sub_data.kern.token = dma_addr; + vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex; + vsp_cmd.sub_data.kern.side = (u64)dp->data; + vsp_cmd.sub_data.kern.offset = *ppos; + vsp_cmd.sub_data.kern.length = count; + mb(); + rc = signal_vsp_instruction(&vsp_cmd); + if (rc) + goto out_free; + rc = -ENOMEM; + if (vsp_cmd.result_code != 0) + goto out_free; + + *ppos += count; + rc = count; +out_free: + dma_free_coherent(iSeries_vio_dev, count, page, dma_addr); +out: + return rc; +} + +static struct file_operations proc_vmlinux_operations = { + .write = proc_mf_change_vmlinux, +}; + +static int __init mf_proc_init(void) +{ + struct proc_dir_entry *mf_proc_root; + struct proc_dir_entry *ent; + struct proc_dir_entry *mf; + char name[2]; + int i; + + mf_proc_root = proc_mkdir("iSeries/mf", NULL); + if (!mf_proc_root) + return 1; + + name[1] = '\0'; + for (i = 0; i < 4; i++) { + name[0] = 'A' + i; + mf = proc_mkdir(name, mf_proc_root); + if (!mf) + return 1; + + ent = create_proc_entry("cmdline", S_IFREG|S_IRUSR|S_IWUSR, mf); + if (!ent) + return 1; + ent->nlink = 1; + ent->data = (void *)(long)i; + ent->read_proc = proc_mf_dump_cmdline; + ent->write_proc = proc_mf_change_cmdline; + + if (i == 3) /* no vmlinux entry for 'D' */ + continue; + + ent = create_proc_entry("vmlinux", S_IFREG|S_IWUSR, mf); + if (!ent) + return 1; + ent->nlink = 1; + ent->data = (void *)(long)i; + ent->proc_fops = &proc_vmlinux_operations; + } + + ent = create_proc_entry("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root); + if (!ent) + return 1; + ent->nlink = 1; + ent->data = (void *)0; + ent->read_proc = proc_mf_dump_side; + ent->write_proc = proc_mf_change_side; + + ent = create_proc_entry("src", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root); + if (!ent) + return 1; + ent->nlink = 1; + ent->data = (void *)0; + ent->read_proc = proc_mf_dump_src; + ent->write_proc = proc_mf_change_src; + + return 0; +} + +__initcall(mf_proc_init); + +#endif /* CONFIG_PROC_FS */ diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S new file mode 100644 index 00000000000..90b41f48d21 --- /dev/null +++ b/arch/ppc64/kernel/misc.S @@ -0,0 +1,1234 @@ +/* + * arch/ppc/kernel/misc.S + * + * + * + * This file contains miscellaneous low-level functions. + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) + * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + .text + +/* + * Returns (address we're running at) - (address we were linked at) + * for use before the text and data are mapped to KERNELBASE. + */ + +_GLOBAL(reloc_offset) + mflr r0 + bl 1f +1: mflr r3 + LOADADDR(r4,1b) + sub r3,r4,r3 + mtlr r0 + blr + +_GLOBAL(get_msr) + mfmsr r3 + blr + +_GLOBAL(get_dar) + mfdar r3 + blr + +_GLOBAL(get_srr0) + mfsrr0 r3 + blr + +_GLOBAL(get_srr1) + mfsrr1 r3 + blr + +_GLOBAL(get_sp) + mr r3,r1 + blr + +#ifdef CONFIG_PPC_ISERIES +/* unsigned long local_save_flags(void) */ +_GLOBAL(local_get_flags) + lbz r3,PACAPROCENABLED(r13) + blr + +/* unsigned long local_irq_disable(void) */ +_GLOBAL(local_irq_disable) + lbz r3,PACAPROCENABLED(r13) + li r4,0 + stb r4,PACAPROCENABLED(r13) + blr /* Done */ + +/* void local_irq_restore(unsigned long flags) */ +_GLOBAL(local_irq_restore) + lbz r5,PACAPROCENABLED(r13) + /* Check if things are setup the way we want _already_. */ + cmpw 0,r3,r5 + beqlr + /* are we enabling interrupts? */ + cmpdi 0,r3,0 + stb r3,PACAPROCENABLED(r13) + beqlr + /* Check pending interrupts */ + /* A decrementer, IPI or PMC interrupt may have occurred + * while we were in the hypervisor (which enables) */ + ld r4,PACALPPACA+LPPACAANYINT(r13) + cmpdi r4,0 + beqlr + + /* + * Handle pending interrupts in interrupt context + */ + li r0,0x5555 + sc + blr +#endif /* CONFIG_PPC_ISERIES */ + +#ifdef CONFIG_IRQSTACKS +_GLOBAL(call_do_softirq) + mflr r0 + std r0,16(r1) + stdu r1,THREAD_SIZE-112(r3) + mr r1,r3 + bl .__do_softirq + ld r1,0(r1) + ld r0,16(r1) + mtlr r0 + blr + +_GLOBAL(call_handle_IRQ_event) + mflr r0 + std r0,16(r1) + stdu r1,THREAD_SIZE-112(r6) + mr r1,r6 + bl .handle_IRQ_event + ld r1,0(r1) + ld r0,16(r1) + mtlr r0 + blr +#endif /* CONFIG_IRQSTACKS */ + + /* + * To be called by C code which needs to do some operations with MMU + * disabled. Note that interrupts have to be disabled by the caller + * prior to calling us. The code called _MUST_ be in the RMO of course + * and part of the linear mapping as we don't attempt to translate the + * stack pointer at all. The function is called with the stack switched + * to this CPU emergency stack + * + * prototype is void *call_with_mmu_off(void *func, void *data); + * + * the called function is expected to be of the form + * + * void *called(void *data); + */ +_GLOBAL(call_with_mmu_off) + mflr r0 /* get link, save it on stackframe */ + std r0,16(r1) + mr r1,r5 /* save old stack ptr */ + ld r1,PACAEMERGSP(r13) /* get emerg. stack */ + subi r1,r1,STACK_FRAME_OVERHEAD + std r0,16(r1) /* save link on emerg. stack */ + std r5,0(r1) /* save old stack ptr in backchain */ + ld r3,0(r3) /* get to real function ptr (assume same TOC) */ + bl 2f /* we need LR to return, continue at label 2 */ + + ld r0,16(r1) /* we return here from the call, get LR and */ + ld r1,0(r1) /* .. old stack ptr */ + mtspr SPRN_SRR0,r0 /* and get back to virtual mode with these */ + mfmsr r4 + ori r4,r4,MSR_IR|MSR_DR + mtspr SPRN_SRR1,r4 + rfid + +2: mtspr SPRN_SRR0,r3 /* coming from above, enter real mode */ + mr r3,r4 /* get parameter */ + mfmsr r0 + ori r0,r0,MSR_IR|MSR_DR + xori r0,r0,MSR_IR|MSR_DR + mtspr SPRN_SRR1,r0 + rfid + + + .section ".toc","aw" +PPC64_CACHES: + .tc ppc64_caches[TC],ppc64_caches + .section ".text" + +/* + * Write any modified data cache blocks out to memory + * and invalidate the corresponding instruction cache blocks. + * + * flush_icache_range(unsigned long start, unsigned long stop) + * + * flush all bytes from start through stop-1 inclusive + */ + +_GLOBAL(__flush_icache_range) + +/* + * Flush the data cache to memory + * + * Different systems have different cache line sizes + * and in some cases i-cache and d-cache line sizes differ from + * each other. + */ + ld r10,PPC64_CACHES@toc(r2) + lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 +1: dcbst 0,r6 + add r6,r6,r7 + bdnz 1b + sync + +/* Now invalidate the instruction cache */ + + lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 + lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 +2: icbi 0,r6 + add r6,r6,r7 + bdnz 2b + isync + blr + +/* + * Like above, but only do the D-cache. + * + * flush_dcache_range(unsigned long start, unsigned long stop) + * + * flush all bytes from start to stop-1 inclusive + */ +_GLOBAL(flush_dcache_range) + +/* + * Flush the data cache to memory + * + * Different systems have different cache line sizes + */ + ld r10,PPC64_CACHES@toc(r2) + lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 +0: dcbst 0,r6 + add r6,r6,r7 + bdnz 0b + sync + blr + +/* + * Like above, but works on non-mapped physical addresses. + * Use only for non-LPAR setups ! It also assumes real mode + * is cacheable. Used for flushing out the DART before using + * it as uncacheable memory + * + * flush_dcache_phys_range(unsigned long start, unsigned long stop) + * + * flush all bytes from start to stop-1 inclusive + */ +_GLOBAL(flush_dcache_phys_range) + ld r10,PPC64_CACHES@toc(r2) + lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + mfmsr r5 /* Disable MMU Data Relocation */ + ori r0,r5,MSR_DR + xori r0,r0,MSR_DR + sync + mtmsr r0 + sync + isync + mtctr r8 +0: dcbst 0,r6 + add r6,r6,r7 + bdnz 0b + sync + isync + mtmsr r5 /* Re-enable MMU Data Relocation */ + sync + isync + blr + +_GLOBAL(flush_inval_dcache_range) + ld r10,PPC64_CACHES@toc(r2) + lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + addi r5,r7,-1 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */ + srw. r8,r8,r9 /* compute line count */ + beqlr /* nothing to do? */ + sync + isync + mtctr r8 +0: dcbf 0,r6 + add r6,r6,r7 + bdnz 0b + sync + isync + blr + + +/* + * Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * void __flush_dcache_icache(void *page) + */ +_GLOBAL(__flush_dcache_icache) +/* + * Flush the data cache to memory + * + * Different systems have different cache line sizes + */ + +/* Flush the dcache */ + ld r7,PPC64_CACHES@toc(r2) + clrrdi r3,r3,12 /* Page align */ + lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */ + lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */ + mr r6,r3 + mtctr r4 +0: dcbst 0,r6 + add r6,r6,r5 + bdnz 0b + sync + +/* Now invalidate the icache */ + + lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */ + lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */ + mtctr r4 +1: icbi 0,r3 + add r3,r3,r5 + bdnz 1b + isync + blr + +/* + * I/O string operations + * + * insb(port, buf, len) + * outsb(port, buf, len) + * insw(port, buf, len) + * outsw(port, buf, len) + * insl(port, buf, len) + * outsl(port, buf, len) + * insw_ns(port, buf, len) + * outsw_ns(port, buf, len) + * insl_ns(port, buf, len) + * outsl_ns(port, buf, len) + * + * The *_ns versions don't do byte-swapping. + */ +_GLOBAL(_insb) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,1 + blelr- +00: lbz r5,0(r3) + eieio + stbu r5,1(r4) + bdnz 00b + twi 0,r5,0 + isync + blr + +_GLOBAL(_outsb) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,1 + blelr- +00: lbzu r5,1(r4) + stb r5,0(r3) + bdnz 00b + sync + blr + +_GLOBAL(_insw) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhbrx r5,0,r3 + eieio + sthu r5,2(r4) + bdnz 00b + twi 0,r5,0 + isync + blr + +_GLOBAL(_outsw) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhzu r5,2(r4) + sthbrx r5,0,r3 + bdnz 00b + sync + blr + +_GLOBAL(_insl) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwbrx r5,0,r3 + eieio + stwu r5,4(r4) + bdnz 00b + twi 0,r5,0 + isync + blr + +_GLOBAL(_outsl) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwzu r5,4(r4) + stwbrx r5,0,r3 + bdnz 00b + sync + blr + +/* _GLOBAL(ide_insw) now in drivers/ide/ide-iops.c */ +_GLOBAL(_insw_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhz r5,0(r3) + eieio + sthu r5,2(r4) + bdnz 00b + twi 0,r5,0 + isync + blr + +/* _GLOBAL(ide_outsw) now in drivers/ide/ide-iops.c */ +_GLOBAL(_outsw_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,2 + blelr- +00: lhzu r5,2(r4) + sth r5,0(r3) + bdnz 00b + sync + blr + +_GLOBAL(_insl_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwz r5,0(r3) + eieio + stwu r5,4(r4) + bdnz 00b + twi 0,r5,0 + isync + blr + +_GLOBAL(_outsl_ns) + cmpwi 0,r5,0 + mtctr r5 + subi r4,r4,4 + blelr- +00: lwzu r5,4(r4) + stw r5,0(r3) + bdnz 00b + sync + blr + + +_GLOBAL(cvt_fd) + lfd 0,0(r5) /* load up fpscr value */ + mtfsf 0xff,0 + lfs 0,0(r3) + stfd 0,0(r4) + mffs 0 /* save new fpscr value */ + stfd 0,0(r5) + blr + +_GLOBAL(cvt_df) + lfd 0,0(r5) /* load up fpscr value */ + mtfsf 0xff,0 + lfd 0,0(r3) + stfs 0,0(r4) + mffs 0 /* save new fpscr value */ + stfd 0,0(r5) + blr + +/* + * identify_cpu and calls setup_cpu + * In: r3 = base of the cpu_specs array + * r4 = address of cur_cpu_spec + * r5 = relocation offset + */ +_GLOBAL(identify_cpu) + mfpvr r7 +1: + lwz r8,CPU_SPEC_PVR_MASK(r3) + and r8,r8,r7 + lwz r9,CPU_SPEC_PVR_VALUE(r3) + cmplw 0,r9,r8 + beq 1f + addi r3,r3,CPU_SPEC_ENTRY_SIZE + b 1b +1: + add r0,r3,r5 + std r0,0(r4) + ld r4,CPU_SPEC_SETUP(r3) + sub r4,r4,r5 + ld r4,0(r4) + sub r4,r4,r5 + mtctr r4 + /* Calling convention for cpu setup is r3=offset, r4=cur_cpu_spec */ + mr r4,r3 + mr r3,r5 + bctr + +/* + * do_cpu_ftr_fixups - goes through the list of CPU feature fixups + * and writes nop's over sections of code that don't apply for this cpu. + * r3 = data offset (not changed) + */ +_GLOBAL(do_cpu_ftr_fixups) + /* Get CPU 0 features */ + LOADADDR(r6,cur_cpu_spec) + sub r6,r6,r3 + ld r4,0(r6) + sub r4,r4,r3 + ld r4,CPU_SPEC_FEATURES(r4) + /* Get the fixup table */ + LOADADDR(r6,__start___ftr_fixup) + sub r6,r6,r3 + LOADADDR(r7,__stop___ftr_fixup) + sub r7,r7,r3 + /* Do the fixup */ +1: cmpld r6,r7 + bgelr + addi r6,r6,32 + ld r8,-32(r6) /* mask */ + and r8,r8,r4 + ld r9,-24(r6) /* value */ + cmpld r8,r9 + beq 1b + ld r8,-16(r6) /* section begin */ + ld r9,-8(r6) /* section end */ + subf. r9,r8,r9 + beq 1b + /* write nops over the section of code */ + /* todo: if large section, add a branch at the start of it */ + srwi r9,r9,2 + mtctr r9 + sub r8,r8,r3 + lis r0,0x60000000@h /* nop */ +3: stw r0,0(r8) + andi. r10,r4,CPU_FTR_SPLIT_ID_CACHE@l + beq 2f + dcbst 0,r8 /* suboptimal, but simpler */ + sync + icbi 0,r8 +2: addi r8,r8,4 + bdnz 3b + sync /* additional sync needed on g4 */ + isync + b 1b + +#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) +/* + * Do an IO access in real mode + */ +_GLOBAL(real_readb) + mfmsr r7 + ori r0,r7,MSR_DR + xori r0,r0,MSR_DR + sync + mtmsrd r0 + sync + isync + mfspr r6,SPRN_HID4 + rldicl r5,r6,32,0 + ori r5,r5,0x100 + rldicl r5,r5,32,0 + sync + mtspr SPRN_HID4,r5 + isync + slbia + isync + lbz r3,0(r3) + sync + mtspr SPRN_HID4,r6 + isync + slbia + isync + mtmsrd r7 + sync + isync + blr + + /* + * Do an IO access in real mode + */ +_GLOBAL(real_writeb) + mfmsr r7 + ori r0,r7,MSR_DR + xori r0,r0,MSR_DR + sync + mtmsrd r0 + sync + isync + mfspr r6,SPRN_HID4 + rldicl r5,r6,32,0 + ori r5,r5,0x100 + rldicl r5,r5,32,0 + sync + mtspr SPRN_HID4,r5 + isync + slbia + isync + stb r3,0(r4) + sync + mtspr SPRN_HID4,r6 + isync + slbia + isync + mtmsrd r7 + sync + isync + blr +#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ + +/* + * Create a kernel thread + * kernel_thread(fn, arg, flags) + */ +_GLOBAL(kernel_thread) + std r29,-24(r1) + std r30,-16(r1) + stdu r1,-STACK_FRAME_OVERHEAD(r1) + mr r29,r3 + mr r30,r4 + ori r3,r5,CLONE_VM /* flags */ + oris r3,r3,(CLONE_UNTRACED>>16) + li r4,0 /* new sp (unused) */ + li r0,__NR_clone + sc + cmpdi 0,r3,0 /* parent or child? */ + bne 1f /* return if parent */ + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + ld r2,8(r29) + ld r29,0(r29) + mtlr r29 /* fn addr in lr */ + mr r3,r30 /* load arg and call fn */ + blrl + li r0,__NR_exit /* exit after child exits */ + li r3,0 + sc +1: addi r1,r1,STACK_FRAME_OVERHEAD + ld r29,-24(r1) + ld r30,-16(r1) + blr + +/* Why isn't this a) automatic, b) written in 'C'? */ + .balign 8 +_GLOBAL(sys_call_table32) + .llong .sys_restart_syscall /* 0 */ + .llong .sys_exit + .llong .ppc_fork + .llong .sys_read + .llong .sys_write + .llong .sys32_open /* 5 */ + .llong .sys_close + .llong .sys32_waitpid + .llong .sys32_creat + .llong .sys_link + .llong .sys_unlink /* 10 */ + .llong .sys32_execve + .llong .sys_chdir + .llong .compat_sys_time + .llong .sys_mknod + .llong .sys_chmod /* 15 */ + .llong .sys_lchown + .llong .sys_ni_syscall /* old break syscall */ + .llong .sys_ni_syscall /* old stat syscall */ + .llong .ppc32_lseek + .llong .sys_getpid /* 20 */ + .llong .compat_sys_mount + .llong .sys_oldumount + .llong .sys_setuid + .llong .sys_getuid + .llong .compat_sys_stime /* 25 */ + .llong .sys32_ptrace + .llong .sys_alarm + .llong .sys_ni_syscall /* old fstat syscall */ + .llong .sys32_pause + .llong .compat_sys_utime /* 30 */ + .llong .sys_ni_syscall /* old stty syscall */ + .llong .sys_ni_syscall /* old gtty syscall */ + .llong .sys32_access + .llong .sys32_nice + .llong .sys_ni_syscall /* 35 - old ftime syscall */ + .llong .sys_sync + .llong .sys32_kill + .llong .sys_rename + .llong .sys32_mkdir + .llong .sys_rmdir /* 40 */ + .llong .sys_dup + .llong .sys_pipe + .llong .compat_sys_times + .llong .sys_ni_syscall /* old prof syscall */ + .llong .sys_brk /* 45 */ + .llong .sys_setgid + .llong .sys_getgid + .llong .sys_signal + .llong .sys_geteuid + .llong .sys_getegid /* 50 */ + .llong .sys_acct + .llong .sys_umount + .llong .sys_ni_syscall /* old lock syscall */ + .llong .compat_sys_ioctl + .llong .compat_sys_fcntl /* 55 */ + .llong .sys_ni_syscall /* old mpx syscall */ + .llong .sys32_setpgid + .llong .sys_ni_syscall /* old ulimit syscall */ + .llong .sys32_olduname + .llong .sys32_umask /* 60 */ + .llong .sys_chroot + .llong .sys_ustat + .llong .sys_dup2 + .llong .sys_getppid + .llong .sys_getpgrp /* 65 */ + .llong .sys_setsid + .llong .sys32_sigaction + .llong .sys_sgetmask + .llong .sys32_ssetmask + .llong .sys_setreuid /* 70 */ + .llong .sys_setregid + .llong .ppc32_sigsuspend + .llong .compat_sys_sigpending + .llong .sys32_sethostname + .llong .compat_sys_setrlimit /* 75 */ + .llong .compat_sys_old_getrlimit + .llong .compat_sys_getrusage + .llong .sys32_gettimeofday + .llong .sys32_settimeofday + .llong .sys32_getgroups /* 80 */ + .llong .sys32_setgroups + .llong .sys_ni_syscall /* old select syscall */ + .llong .sys_symlink + .llong .sys_ni_syscall /* old lstat syscall */ + .llong .sys32_readlink /* 85 */ + .llong .sys_uselib + .llong .sys_swapon + .llong .sys_reboot + .llong .old32_readdir + .llong .sys_mmap /* 90 */ + .llong .sys_munmap + .llong .sys_truncate + .llong .sys_ftruncate + .llong .sys_fchmod + .llong .sys_fchown /* 95 */ + .llong .sys32_getpriority + .llong .sys32_setpriority + .llong .sys_ni_syscall /* old profil syscall */ + .llong .compat_sys_statfs + .llong .compat_sys_fstatfs /* 100 */ + .llong .sys_ni_syscall /* old ioperm syscall */ + .llong .compat_sys_socketcall + .llong .sys32_syslog + .llong .compat_sys_setitimer + .llong .compat_sys_getitimer /* 105 */ + .llong .compat_sys_newstat + .llong .compat_sys_newlstat + .llong .compat_sys_newfstat + .llong .sys_uname + .llong .sys_ni_syscall /* 110 old iopl syscall */ + .llong .sys_vhangup + .llong .sys_ni_syscall /* old idle syscall */ + .llong .sys_ni_syscall /* old vm86 syscall */ + .llong .compat_sys_wait4 + .llong .sys_swapoff /* 115 */ + .llong .sys32_sysinfo + .llong .sys32_ipc + .llong .sys_fsync + .llong .ppc32_sigreturn + .llong .ppc_clone /* 120 */ + .llong .sys32_setdomainname + .llong .ppc64_newuname + .llong .sys_ni_syscall /* old modify_ldt syscall */ + .llong .sys32_adjtimex + .llong .sys_mprotect /* 125 */ + .llong .compat_sys_sigprocmask + .llong .sys_ni_syscall /* old create_module syscall */ + .llong .sys_init_module + .llong .sys_delete_module + .llong .sys_ni_syscall /* 130 old get_kernel_syms syscall */ + .llong .sys_quotactl + .llong .sys32_getpgid + .llong .sys_fchdir + .llong .sys_bdflush + .llong .sys32_sysfs /* 135 */ + .llong .ppc64_personality + .llong .sys_ni_syscall /* for afs_syscall */ + .llong .sys_setfsuid + .llong .sys_setfsgid + .llong .sys_llseek /* 140 */ + .llong .sys32_getdents + .llong .ppc32_select + .llong .sys_flock + .llong .sys_msync + .llong .compat_sys_readv /* 145 */ + .llong .compat_sys_writev + .llong .sys32_getsid + .llong .sys_fdatasync + .llong .sys32_sysctl + .llong .sys_mlock /* 150 */ + .llong .sys_munlock + .llong .sys_mlockall + .llong .sys_munlockall + .llong .sys32_sched_setparam + .llong .sys32_sched_getparam /* 155 */ + .llong .sys32_sched_setscheduler + .llong .sys32_sched_getscheduler + .llong .sys_sched_yield + .llong .sys32_sched_get_priority_max + .llong .sys32_sched_get_priority_min /* 160 */ + .llong .sys32_sched_rr_get_interval + .llong .compat_sys_nanosleep + .llong .sys_mremap + .llong .sys_setresuid + .llong .sys_getresuid /* 165 */ + .llong .sys_ni_syscall /* old query_module syscall */ + .llong .sys_poll + .llong .compat_sys_nfsservctl + .llong .sys_setresgid + .llong .sys_getresgid /* 170 */ + .llong .sys32_prctl + .llong .ppc32_rt_sigreturn + .llong .sys32_rt_sigaction + .llong .sys32_rt_sigprocmask + .llong .sys32_rt_sigpending /* 175 */ + .llong .compat_sys_rt_sigtimedwait + .llong .sys32_rt_sigqueueinfo + .llong .ppc32_rt_sigsuspend + .llong .sys32_pread64 + .llong .sys32_pwrite64 /* 180 */ + .llong .sys_chown + .llong .sys_getcwd + .llong .sys_capget + .llong .sys_capset + .llong .sys32_sigaltstack /* 185 */ + .llong .sys32_sendfile + .llong .sys_ni_syscall /* reserved for streams1 */ + .llong .sys_ni_syscall /* reserved for streams2 */ + .llong .ppc_vfork + .llong .compat_sys_getrlimit /* 190 */ + .llong .sys32_readahead + .llong .sys32_mmap2 + .llong .sys32_truncate64 + .llong .sys32_ftruncate64 + .llong .sys_stat64 /* 195 */ + .llong .sys_lstat64 + .llong .sys_fstat64 + .llong .sys32_pciconfig_read + .llong .sys32_pciconfig_write + .llong .sys32_pciconfig_iobase /* 200 - pciconfig_iobase */ + .llong .sys_ni_syscall /* reserved for MacOnLinux */ + .llong .sys_getdents64 + .llong .sys_pivot_root + .llong .compat_sys_fcntl64 + .llong .sys_madvise /* 205 */ + .llong .sys_mincore + .llong .sys_gettid + .llong .sys_tkill + .llong .sys_setxattr + .llong .sys_lsetxattr /* 210 */ + .llong .sys_fsetxattr + .llong .sys_getxattr + .llong .sys_lgetxattr + .llong .sys_fgetxattr + .llong .sys_listxattr /* 215 */ + .llong .sys_llistxattr + .llong .sys_flistxattr + .llong .sys_removexattr + .llong .sys_lremovexattr + .llong .sys_fremovexattr /* 220 */ + .llong .compat_sys_futex + .llong .compat_sys_sched_setaffinity + .llong .compat_sys_sched_getaffinity + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 225 - reserved for tux */ + .llong .sys32_sendfile64 + .llong .compat_sys_io_setup + .llong .sys_io_destroy + .llong .compat_sys_io_getevents + .llong .compat_sys_io_submit + .llong .sys_io_cancel + .llong .sys_set_tid_address + .llong .ppc32_fadvise64 + .llong .sys_exit_group + .llong .ppc32_lookup_dcookie /* 235 */ + .llong .sys_epoll_create + .llong .sys_epoll_ctl + .llong .sys_epoll_wait + .llong .sys_remap_file_pages + .llong .ppc32_timer_create /* 240 */ + .llong .compat_sys_timer_settime + .llong .compat_sys_timer_gettime + .llong .sys_timer_getoverrun + .llong .sys_timer_delete + .llong .compat_sys_clock_settime /* 245 */ + .llong .compat_sys_clock_gettime + .llong .compat_sys_clock_getres + .llong .compat_sys_clock_nanosleep + .llong .ppc32_swapcontext + .llong .sys32_tgkill /* 250 */ + .llong .sys32_utimes + .llong .compat_sys_statfs64 + .llong .compat_sys_fstatfs64 + .llong .ppc32_fadvise64_64 /* 32bit only fadvise64_64 */ + .llong .ppc_rtas /* 255 */ + .llong .sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ + .llong .sys_ni_syscall /* 257 reserved for vserver */ + .llong .sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ + .llong .compat_sys_mbind + .llong .compat_sys_get_mempolicy /* 260 */ + .llong .compat_sys_set_mempolicy + .llong .compat_sys_mq_open + .llong .sys_mq_unlink + .llong .compat_sys_mq_timedsend + .llong .compat_sys_mq_timedreceive /* 265 */ + .llong .compat_sys_mq_notify + .llong .compat_sys_mq_getsetattr + .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */ + .llong .sys32_add_key + .llong .sys32_request_key + .llong .compat_sys_keyctl + .llong .compat_sys_waitid + + .balign 8 +_GLOBAL(sys_call_table) + .llong .sys_restart_syscall /* 0 */ + .llong .sys_exit + .llong .ppc_fork + .llong .sys_read + .llong .sys_write + .llong .sys_open /* 5 */ + .llong .sys_close + .llong .sys_waitpid + .llong .sys_creat + .llong .sys_link + .llong .sys_unlink /* 10 */ + .llong .sys_execve + .llong .sys_chdir + .llong .sys64_time + .llong .sys_mknod + .llong .sys_chmod /* 15 */ + .llong .sys_lchown + .llong .sys_ni_syscall /* old break syscall */ + .llong .sys_ni_syscall /* old stat syscall */ + .llong .sys_lseek + .llong .sys_getpid /* 20 */ + .llong .sys_mount + .llong .sys_ni_syscall /* old umount syscall */ + .llong .sys_setuid + .llong .sys_getuid + .llong .sys_stime /* 25 */ + .llong .sys_ptrace + .llong .sys_alarm + .llong .sys_ni_syscall /* old fstat syscall */ + .llong .sys_pause + .llong .sys_utime /* 30 */ + .llong .sys_ni_syscall /* old stty syscall */ + .llong .sys_ni_syscall /* old gtty syscall */ + .llong .sys_access + .llong .sys_nice + .llong .sys_ni_syscall /* 35 - old ftime syscall */ + .llong .sys_sync + .llong .sys_kill + .llong .sys_rename + .llong .sys_mkdir + .llong .sys_rmdir /* 40 */ + .llong .sys_dup + .llong .sys_pipe + .llong .sys_times + .llong .sys_ni_syscall /* old prof syscall */ + .llong .sys_brk /* 45 */ + .llong .sys_setgid + .llong .sys_getgid + .llong .sys_signal + .llong .sys_geteuid + .llong .sys_getegid /* 50 */ + .llong .sys_acct + .llong .sys_umount + .llong .sys_ni_syscall /* old lock syscall */ + .llong .sys_ioctl + .llong .sys_fcntl /* 55 */ + .llong .sys_ni_syscall /* old mpx syscall */ + .llong .sys_setpgid + .llong .sys_ni_syscall /* old ulimit syscall */ + .llong .sys_ni_syscall /* old uname syscall */ + .llong .sys_umask /* 60 */ + .llong .sys_chroot + .llong .sys_ustat + .llong .sys_dup2 + .llong .sys_getppid + .llong .sys_getpgrp /* 65 */ + .llong .sys_setsid + .llong .sys_ni_syscall + .llong .sys_sgetmask + .llong .sys_ssetmask + .llong .sys_setreuid /* 70 */ + .llong .sys_setregid + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_sethostname + .llong .sys_setrlimit /* 75 */ + .llong .sys_ni_syscall /* old getrlimit syscall */ + .llong .sys_getrusage + .llong .sys_gettimeofday + .llong .sys_settimeofday + .llong .sys_getgroups /* 80 */ + .llong .sys_setgroups + .llong .sys_ni_syscall /* old select syscall */ + .llong .sys_symlink + .llong .sys_ni_syscall /* old lstat syscall */ + .llong .sys_readlink /* 85 */ + .llong .sys_uselib + .llong .sys_swapon + .llong .sys_reboot + .llong .sys_ni_syscall /* old readdir syscall */ + .llong .sys_mmap /* 90 */ + .llong .sys_munmap + .llong .sys_truncate + .llong .sys_ftruncate + .llong .sys_fchmod + .llong .sys_fchown /* 95 */ + .llong .sys_getpriority + .llong .sys_setpriority + .llong .sys_ni_syscall /* old profil syscall holder */ + .llong .sys_statfs + .llong .sys_fstatfs /* 100 */ + .llong .sys_ni_syscall /* old ioperm syscall */ + .llong .sys_socketcall + .llong .sys_syslog + .llong .sys_setitimer + .llong .sys_getitimer /* 105 */ + .llong .sys_newstat + .llong .sys_newlstat + .llong .sys_newfstat + .llong .sys_ni_syscall /* old uname syscall */ + .llong .sys_ni_syscall /* 110 old iopl syscall */ + .llong .sys_vhangup + .llong .sys_ni_syscall /* old idle syscall */ + .llong .sys_ni_syscall /* old vm86 syscall */ + .llong .sys_wait4 + .llong .sys_swapoff /* 115 */ + .llong .sys_sysinfo + .llong .sys_ipc + .llong .sys_fsync + .llong .sys_ni_syscall + .llong .ppc_clone /* 120 */ + .llong .sys_setdomainname + .llong .ppc64_newuname + .llong .sys_ni_syscall /* old modify_ldt syscall */ + .llong .sys_adjtimex + .llong .sys_mprotect /* 125 */ + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* old create_module syscall */ + .llong .sys_init_module + .llong .sys_delete_module + .llong .sys_ni_syscall /* 130 old get_kernel_syms syscall */ + .llong .sys_quotactl + .llong .sys_getpgid + .llong .sys_fchdir + .llong .sys_bdflush + .llong .sys_sysfs /* 135 */ + .llong .ppc64_personality + .llong .sys_ni_syscall /* for afs_syscall */ + .llong .sys_setfsuid + .llong .sys_setfsgid + .llong .sys_llseek /* 140 */ + .llong .sys_getdents + .llong .sys_select + .llong .sys_flock + .llong .sys_msync + .llong .sys_readv /* 145 */ + .llong .sys_writev + .llong .sys_getsid + .llong .sys_fdatasync + .llong .sys_sysctl + .llong .sys_mlock /* 150 */ + .llong .sys_munlock + .llong .sys_mlockall + .llong .sys_munlockall + .llong .sys_sched_setparam + .llong .sys_sched_getparam /* 155 */ + .llong .sys_sched_setscheduler + .llong .sys_sched_getscheduler + .llong .sys_sched_yield + .llong .sys_sched_get_priority_max + .llong .sys_sched_get_priority_min /* 160 */ + .llong .sys_sched_rr_get_interval + .llong .sys_nanosleep + .llong .sys_mremap + .llong .sys_setresuid + .llong .sys_getresuid /* 165 */ + .llong .sys_ni_syscall /* old query_module syscall */ + .llong .sys_poll + .llong .sys_nfsservctl + .llong .sys_setresgid + .llong .sys_getresgid /* 170 */ + .llong .sys_prctl + .llong .ppc64_rt_sigreturn + .llong .sys_rt_sigaction + .llong .sys_rt_sigprocmask + .llong .sys_rt_sigpending /* 175 */ + .llong .sys_rt_sigtimedwait + .llong .sys_rt_sigqueueinfo + .llong .ppc64_rt_sigsuspend + .llong .sys_pread64 + .llong .sys_pwrite64 /* 180 */ + .llong .sys_chown + .llong .sys_getcwd + .llong .sys_capget + .llong .sys_capset + .llong .sys_sigaltstack /* 185 */ + .llong .sys_sendfile64 + .llong .sys_ni_syscall /* reserved for streams1 */ + .llong .sys_ni_syscall /* reserved for streams2 */ + .llong .ppc_vfork + .llong .sys_getrlimit /* 190 */ + .llong .sys_readahead + .llong .sys_ni_syscall /* 32bit only mmap2 */ + .llong .sys_ni_syscall /* 32bit only truncate64 */ + .llong .sys_ni_syscall /* 32bit only ftruncate64 */ + .llong .sys_ni_syscall /* 195 - 32bit only stat64 */ + .llong .sys_ni_syscall /* 32bit only lstat64 */ + .llong .sys_ni_syscall /* 32bit only fstat64 */ + .llong .sys_ni_syscall /* 32bit only pciconfig_read */ + .llong .sys_ni_syscall /* 32bit only pciconfig_write */ + .llong .sys_ni_syscall /* 32bit only pciconfig_iobase */ + .llong .sys_ni_syscall /* reserved for MacOnLinux */ + .llong .sys_getdents64 + .llong .sys_pivot_root + .llong .sys_ni_syscall /* 32bit only fcntl64 */ + .llong .sys_madvise /* 205 */ + .llong .sys_mincore + .llong .sys_gettid + .llong .sys_tkill + .llong .sys_setxattr + .llong .sys_lsetxattr /* 210 */ + .llong .sys_fsetxattr + .llong .sys_getxattr + .llong .sys_lgetxattr + .llong .sys_fgetxattr + .llong .sys_listxattr /* 215 */ + .llong .sys_llistxattr + .llong .sys_flistxattr + .llong .sys_removexattr + .llong .sys_lremovexattr + .llong .sys_fremovexattr /* 220 */ + .llong .sys_futex + .llong .sys_sched_setaffinity + .llong .sys_sched_getaffinity + .llong .sys_ni_syscall + .llong .sys_ni_syscall /* 225 - reserved for tux */ + .llong .sys_ni_syscall /* 32bit only sendfile64 */ + .llong .sys_io_setup + .llong .sys_io_destroy + .llong .sys_io_getevents + .llong .sys_io_submit /* 230 */ + .llong .sys_io_cancel + .llong .sys_set_tid_address + .llong .sys_fadvise64 + .llong .sys_exit_group + .llong .sys_lookup_dcookie /* 235 */ + .llong .sys_epoll_create + .llong .sys_epoll_ctl + .llong .sys_epoll_wait + .llong .sys_remap_file_pages + .llong .sys_timer_create /* 240 */ + .llong .sys_timer_settime + .llong .sys_timer_gettime + .llong .sys_timer_getoverrun + .llong .sys_timer_delete + .llong .sys_clock_settime /* 245 */ + .llong .sys_clock_gettime + .llong .sys_clock_getres + .llong .sys_clock_nanosleep + .llong .ppc64_swapcontext + .llong .sys_tgkill /* 250 */ + .llong .sys_utimes + .llong .sys_statfs64 + .llong .sys_fstatfs64 + .llong .sys_ni_syscall /* 32bit only fadvise64_64 */ + .llong .ppc_rtas /* 255 */ + .llong .sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ + .llong .sys_ni_syscall /* 257 reserved for vserver */ + .llong .sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ + .llong .sys_mbind + .llong .sys_get_mempolicy /* 260 */ + .llong .sys_set_mempolicy + .llong .sys_mq_open + .llong .sys_mq_unlink + .llong .sys_mq_timedsend + .llong .sys_mq_timedreceive /* 265 */ + .llong .sys_mq_notify + .llong .sys_mq_getsetattr + .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */ + .llong .sys_add_key + .llong .sys_request_key /* 270 */ + .llong .sys_keyctl + .llong .sys_waitid diff --git a/arch/ppc64/kernel/module.c b/arch/ppc64/kernel/module.c new file mode 100644 index 00000000000..c683bf88e69 --- /dev/null +++ b/arch/ppc64/kernel/module.c @@ -0,0 +1,442 @@ +/* Kernel module help for PPC64. + Copyright (C) 2001, 2003 Rusty Russell IBM Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include +#include +#include +#include +#include +#include +#include + +/* FIXME: We don't do .init separately. To do this, we'd need to have + a separate r2 value in the init and core section, and stub between + them, too. + + Using a magic allocator which places modules within 32MB solves + this, and makes other things simpler. Anton? + --RR. */ +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(fmt , ...) +#endif + +/* There's actually a third entry here, but it's unused */ +struct ppc64_opd_entry +{ + unsigned long funcaddr; + unsigned long r2; +}; + +/* Like PPC32, we need little trampolines to do > 24-bit jumps (into + the kernel itself). But on PPC64, these need to be used for every + jump, actually, to reset r2 (TOC+0x8000). */ +struct ppc64_stub_entry +{ + /* 28 byte jump instruction sequence (7 instructions) */ + unsigned char jump[28]; + unsigned char unused[4]; + /* Data for the above code */ + struct ppc64_opd_entry opd; +}; + +/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external) + function which may be more than 24-bits away. We could simply + patch the new r2 value and function pointer into the stub, but it's + significantly shorter to put these values at the end of the stub + code, and patch the stub address (32-bits relative to the TOC ptr, + r2) into the stub. */ +static struct ppc64_stub_entry ppc64_stub = +{ .jump = { + 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, */ + 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, */ + /* Save current r2 value in magic place on the stack. */ + 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */ + 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */ + 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */ + 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */ + 0x4e, 0x80, 0x04, 0x20 /* bctr */ +} }; + +/* Count how many different 24-bit relocations (different symbol, + different addend) */ +static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, j, ret = 0; + + /* FIXME: Only count external ones --RR */ + /* Sure, this is order(n^2), but it's usually short, and not + time critical */ + for (i = 0; i < num; i++) { + /* Only count 24-bit relocs, others don't need stubs */ + if (ELF64_R_TYPE(rela[i].r_info) != R_PPC_REL24) + continue; + for (j = 0; j < i; j++) { + /* If this addend appeared before, it's + already been counted */ + if (rela[i].r_info == rela[j].r_info + && rela[i].r_addend == rela[j].r_addend) + break; + } + if (j == i) ret++; + } + return ret; +} + +void *module_alloc(unsigned long size) +{ + if (size == 0) + return NULL; + + return vmalloc_exec(size); +} + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ +} + +/* Get size of potential trampolines required. */ +static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, + const Elf64_Shdr *sechdrs) +{ + /* One extra reloc so it's always 0-funcaddr terminated */ + unsigned long relocs = 1; + unsigned i; + + /* Every relocated section... */ + for (i = 1; i < hdr->e_shnum; i++) { + if (sechdrs[i].sh_type == SHT_RELA) { + DEBUGP("Found relocations in section %u\n", i); + DEBUGP("Ptr: %p. Number: %lu\n", + (void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size / sizeof(Elf64_Rela)); + relocs += count_relocs((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size + / sizeof(Elf64_Rela)); + } + } + + DEBUGP("Looks like a total of %lu stubs, max\n", relocs); + return relocs * sizeof(struct ppc64_stub_entry); +} + +static void dedotify_versions(struct modversion_info *vers, + unsigned long size) +{ + struct modversion_info *end; + + for (end = (void *)vers + size; vers < end; vers++) + if (vers->name[0] == '.') + memmove(vers->name, vers->name+1, strlen(vers->name)); +} + +/* Undefined symbols which refer to .funcname, hack to funcname */ +static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab) +{ + unsigned int i; + + for (i = 1; i < numsyms; i++) { + if (syms[i].st_shndx == SHN_UNDEF) { + char *name = strtab + syms[i].st_name; + if (name[0] == '.') + memmove(name, name+1, strlen(name)); + } + } +} + +int module_frob_arch_sections(Elf64_Ehdr *hdr, + Elf64_Shdr *sechdrs, + char *secstrings, + struct module *me) +{ + unsigned int i; + + /* Find .toc and .stubs sections, symtab and strtab */ + for (i = 1; i < hdr->e_shnum; i++) { + char *p; + if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) + me->arch.stubs_section = i; + else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) + me->arch.toc_section = i; + else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) + dedotify_versions((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size); + + /* We don't handle .init for the moment: rename to _init */ + while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init"))) + p[0] = '_'; + + if (sechdrs[i].sh_type == SHT_SYMTAB) + dedotify((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size / sizeof(Elf64_Sym), + (void *)hdr + + sechdrs[sechdrs[i].sh_link].sh_offset); + } + if (!me->arch.stubs_section || !me->arch.toc_section) { + printk("%s: doesn't contain .toc or .stubs.\n", me->name); + return -ENOEXEC; + } + + /* Override the stubs size */ + sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs); + return 0; +} + +int apply_relocate(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", me->name); + return -ENOEXEC; +} + +/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this + gives the value maximum span in an instruction which uses a signed + offset) */ +static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me) +{ + return sechdrs[me->arch.toc_section].sh_addr + 0x8000; +} + +/* Both low and high 16 bits are added as SIGNED additions, so if low + 16 bits has high bit set, high 16 bits must be adjusted. These + macros do that (stolen from binutils). */ +#define PPC_LO(v) ((v) & 0xffff) +#define PPC_HI(v) (((v) >> 16) & 0xffff) +#define PPC_HA(v) PPC_HI ((v) + 0x8000) + +/* Patch stub to reference function and correct r2 value. */ +static inline int create_stub(Elf64_Shdr *sechdrs, + struct ppc64_stub_entry *entry, + struct ppc64_opd_entry *opd, + struct module *me) +{ + Elf64_Half *loc1, *loc2; + long reladdr; + + *entry = ppc64_stub; + + loc1 = (Elf64_Half *)&entry->jump[2]; + loc2 = (Elf64_Half *)&entry->jump[6]; + + /* Stub uses address relative to r2. */ + reladdr = (unsigned long)entry - my_r2(sechdrs, me); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + printk("%s: Address %p of stub out of range of %p.\n", + me->name, (void *)reladdr, (void *)my_r2); + return 0; + } + DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr); + + *loc1 = PPC_HA(reladdr); + *loc2 = PPC_LO(reladdr); + entry->opd.funcaddr = opd->funcaddr; + entry->opd.r2 = opd->r2; + return 1; +} + +/* Create stub to jump to function described in this OPD: we need the + stub to set up the TOC ptr (r2) for the function. */ +static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, + unsigned long opdaddr, + struct module *me) +{ + struct ppc64_stub_entry *stubs; + struct ppc64_opd_entry *opd = (void *)opdaddr; + unsigned int i, num_stubs; + + num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs); + + /* Find this stub, or if that fails, the next avail. entry */ + stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr; + for (i = 0; stubs[i].opd.funcaddr; i++) { + BUG_ON(i >= num_stubs); + + if (stubs[i].opd.funcaddr == opd->funcaddr) + return (unsigned long)&stubs[i]; + } + + if (!create_stub(sechdrs, &stubs[i], opd, me)) + return 0; + + return (unsigned long)&stubs[i]; +} + +/* We expect a noop next: if it is, replace it with instruction to + restore r2. */ +static int restore_r2(u32 *instruction, struct module *me) +{ + if (*instruction != 0x60000000) { + printk("%s: Expect noop after relocate, got %08x\n", + me->name, *instruction); + return 0; + } + *instruction = 0xe8410028; /* ld r2,40(r1) */ + return 1; +} + +int apply_relocate_add(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + unsigned int i; + Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr; + Elf64_Sym *sym; + unsigned long *location; + unsigned long value; + + DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rela[i].r_offset; + /* This is the symbol it is referring to */ + sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + + ELF64_R_SYM(rela[i].r_info); + + DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n", + location, (long)ELF64_R_TYPE(rela[i].r_info), + strtab + sym->st_name, (unsigned long)sym->st_value, + (long)rela[i].r_addend); + + /* `Everything is relative'. */ + value = sym->st_value + rela[i].r_addend; + + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_PPC64_ADDR32: + /* Simply set it */ + *(u32 *)location = value; + break; + + case R_PPC64_ADDR64: + /* Simply set it */ + *(unsigned long *)location = value; + break; + + case R_PPC64_TOC: + *(unsigned long *)location = my_r2(sechdrs, me); + break; + + case R_PPC64_TOC16_DS: + /* Subtact TOC pointer */ + value -= my_r2(sechdrs, me); + if ((value & 3) != 0 || value + 0x8000 > 0xffff) { + printk("%s: bad TOC16_DS relocation (%lu)\n", + me->name, value); + return -ENOEXEC; + } + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xfffc) + | (value & 0xfffc); + break; + + case R_PPC_REL24: + /* FIXME: Handle weak symbols here --RR */ + if (sym->st_shndx == SHN_UNDEF) { + /* External: go via stub */ + value = stub_for_addr(sechdrs, value, me); + if (!value) + return -ENOENT; + if (!restore_r2((u32 *)location + 1, me)) + return -ENOEXEC; + } + + /* Convert value to relative */ + value -= (unsigned long)location; + if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){ + printk("%s: REL24 %li out of range!\n", + me->name, (long int)value); + return -ENOEXEC; + } + + /* Only replace bits 2 through 26 */ + *(uint32_t *)location + = (*(uint32_t *)location & ~0x03fffffc) + | (value & 0x03fffffc); + break; + + default: + printk("%s: Unknown ADD relocation: %lu\n", + me->name, + (unsigned long)ELF64_R_TYPE(rela[i].r_info)); + return -ENOEXEC; + } + } + + return 0; +} + +LIST_HEAD(module_bug_list); + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, struct module *me) +{ + char *secstrings; + unsigned int i; + + me->arch.bug_table = NULL; + me->arch.num_bugs = 0; + + /* Find the __bug_table section, if present */ + secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (i = 1; i < hdr->e_shnum; i++) { + if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table")) + continue; + me->arch.bug_table = (void *) sechdrs[i].sh_addr; + me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry); + break; + } + + /* + * Strictly speaking this should have a spinlock to protect against + * traversals, but since we only traverse on BUG()s, a spinlock + * could potentially lead to deadlock and thus be counter-productive. + */ + list_add(&me->arch.bug_list, &module_bug_list); + + return 0; +} + +void module_arch_cleanup(struct module *mod) +{ + list_del(&mod->arch.bug_list); +} + +struct bug_entry *module_find_bug(unsigned long bugaddr) +{ + struct mod_arch_specific *mod; + unsigned int i; + struct bug_entry *bug; + + list_for_each_entry(mod, &module_bug_list, bug_list) { + bug = mod->bug_table; + for (i = 0; i < mod->num_bugs; ++i, ++bug) + if (bugaddr == bug->bug_addr) + return bug; + } + return NULL; +} diff --git a/arch/ppc64/kernel/mpic.c b/arch/ppc64/kernel/mpic.c new file mode 100644 index 00000000000..593ea5b82af --- /dev/null +++ b/arch/ppc64/kernel/mpic.c @@ -0,0 +1,859 @@ +/* + * arch/ppc64/kernel/mpic.c + * + * Driver for interrupt controllers following the OpenPIC standard, the + * common implementation beeing IBM's MPIC. This driver also can deal + * with various broken implementations of this HW. + * + * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "mpic.h" + +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +static struct mpic *mpics; +static struct mpic *mpic_primary; +static DEFINE_SPINLOCK(mpic_lock); + + +/* + * Register accessor functions + */ + + +static inline u32 _mpic_read(unsigned int be, volatile u32 __iomem *base, + unsigned int reg) +{ + if (be) + return in_be32(base + (reg >> 2)); + else + return in_le32(base + (reg >> 2)); +} + +static inline void _mpic_write(unsigned int be, volatile u32 __iomem *base, + unsigned int reg, u32 value) +{ + if (be) + out_be32(base + (reg >> 2), value); + else + out_le32(base + (reg >> 2), value); +} + +static inline u32 _mpic_ipi_read(struct mpic *mpic, unsigned int ipi) +{ + unsigned int be = (mpic->flags & MPIC_BIG_ENDIAN) != 0; + unsigned int offset = MPIC_GREG_IPI_VECTOR_PRI_0 + (ipi * 0x10); + + if (mpic->flags & MPIC_BROKEN_IPI) + be = !be; + return _mpic_read(be, mpic->gregs, offset); +} + +static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 value) +{ + unsigned int offset = MPIC_GREG_IPI_VECTOR_PRI_0 + (ipi * 0x10); + + _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->gregs, offset, value); +} + +static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg) +{ + unsigned int cpu = 0; + + if (mpic->flags & MPIC_PRIMARY) + cpu = hard_smp_processor_id(); + + return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->cpuregs[cpu], reg); +} + +static inline void _mpic_cpu_write(struct mpic *mpic, unsigned int reg, u32 value) +{ + unsigned int cpu = 0; + + if (mpic->flags & MPIC_PRIMARY) + cpu = hard_smp_processor_id(); + + _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->cpuregs[cpu], reg, value); +} + +static inline u32 _mpic_irq_read(struct mpic *mpic, unsigned int src_no, unsigned int reg) +{ + unsigned int isu = src_no >> mpic->isu_shift; + unsigned int idx = src_no & mpic->isu_mask; + + return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu], + reg + (idx * MPIC_IRQ_STRIDE)); +} + +static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no, + unsigned int reg, u32 value) +{ + unsigned int isu = src_no >> mpic->isu_shift; + unsigned int idx = src_no & mpic->isu_mask; + + _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu], + reg + (idx * MPIC_IRQ_STRIDE), value); +} + +#define mpic_read(b,r) _mpic_read(mpic->flags & MPIC_BIG_ENDIAN,(b),(r)) +#define mpic_write(b,r,v) _mpic_write(mpic->flags & MPIC_BIG_ENDIAN,(b),(r),(v)) +#define mpic_ipi_read(i) _mpic_ipi_read(mpic,(i)) +#define mpic_ipi_write(i,v) _mpic_ipi_write(mpic,(i),(v)) +#define mpic_cpu_read(i) _mpic_cpu_read(mpic,(i)) +#define mpic_cpu_write(i,v) _mpic_cpu_write(mpic,(i),(v)) +#define mpic_irq_read(s,r) _mpic_irq_read(mpic,(s),(r)) +#define mpic_irq_write(s,r,v) _mpic_irq_write(mpic,(s),(r),(v)) + + +/* + * Low level utility functions + */ + + + +/* Check if we have one of those nice broken MPICs with a flipped endian on + * reads from IPI registers + */ +static void __init mpic_test_broken_ipi(struct mpic *mpic) +{ + u32 r; + + mpic_write(mpic->gregs, MPIC_GREG_IPI_VECTOR_PRI_0, MPIC_VECPRI_MASK); + r = mpic_read(mpic->gregs, MPIC_GREG_IPI_VECTOR_PRI_0); + + if (r == le32_to_cpu(MPIC_VECPRI_MASK)) { + printk(KERN_INFO "mpic: Detected reversed IPI registers\n"); + mpic->flags |= MPIC_BROKEN_IPI; + } +} + +#ifdef CONFIG_MPIC_BROKEN_U3 + +/* Test if an interrupt is sourced from HyperTransport (used on broken U3s) + * to force the edge setting on the MPIC and do the ack workaround. + */ +static inline int mpic_is_ht_interrupt(struct mpic *mpic, unsigned int source_no) +{ + if (source_no >= 128 || !mpic->fixups) + return 0; + return mpic->fixups[source_no].base != NULL; +} + +static inline void mpic_apic_end_irq(struct mpic *mpic, unsigned int source_no) +{ + struct mpic_irq_fixup *fixup = &mpic->fixups[source_no]; + u32 tmp; + + spin_lock(&mpic->fixup_lock); + writeb(0x11 + 2 * fixup->irq, fixup->base); + tmp = readl(fixup->base + 2); + writel(tmp | 0x80000000ul, fixup->base + 2); + /* config writes shouldn't be posted but let's be safe ... */ + (void)readl(fixup->base + 2); + spin_unlock(&mpic->fixup_lock); +} + + +static void __init mpic_amd8111_read_irq(struct mpic *mpic, u8 __iomem *devbase) +{ + int i, irq; + u32 tmp; + + printk(KERN_INFO "mpic: - Workarounds on AMD 8111 @ %p\n", devbase); + + for (i=0; i < 24; i++) { + writeb(0x10 + 2*i, devbase + 0xf2); + tmp = readl(devbase + 0xf4); + if ((tmp & 0x1) || !(tmp & 0x20)) + continue; + irq = (tmp >> 16) & 0xff; + mpic->fixups[irq].irq = i; + mpic->fixups[irq].base = devbase + 0xf2; + } +} + +static void __init mpic_amd8131_read_irq(struct mpic *mpic, u8 __iomem *devbase) +{ + int i, irq; + u32 tmp; + + printk(KERN_INFO "mpic: - Workarounds on AMD 8131 @ %p\n", devbase); + + for (i=0; i < 4; i++) { + writeb(0x10 + 2*i, devbase + 0xba); + tmp = readl(devbase + 0xbc); + if ((tmp & 0x1) || !(tmp & 0x20)) + continue; + irq = (tmp >> 16) & 0xff; + mpic->fixups[irq].irq = i; + mpic->fixups[irq].base = devbase + 0xba; + } +} + +static void __init mpic_scan_ioapics(struct mpic *mpic) +{ + unsigned int devfn; + u8 __iomem *cfgspace; + + printk(KERN_INFO "mpic: Setting up IO-APICs workarounds for U3\n"); + + /* Allocate fixups array */ + mpic->fixups = alloc_bootmem(128 * sizeof(struct mpic_irq_fixup)); + BUG_ON(mpic->fixups == NULL); + memset(mpic->fixups, 0, 128 * sizeof(struct mpic_irq_fixup)); + + /* Init spinlock */ + spin_lock_init(&mpic->fixup_lock); + + /* Map u3 config space. We assume all IO-APICs are on the primary bus + * and slot will never be above "0xf" so we only need to map 32k + */ + cfgspace = (unsigned char __iomem *)ioremap(0xf2000000, 0x8000); + BUG_ON(cfgspace == NULL); + + /* Now we scan all slots. We do a very quick scan, we read the header type, + * vendor ID and device ID only, that's plenty enough + */ + for (devfn = 0; devfn < PCI_DEVFN(0x10,0); devfn ++) { + u8 __iomem *devbase = cfgspace + (devfn << 8); + u8 hdr_type = readb(devbase + PCI_HEADER_TYPE); + u32 l = readl(devbase + PCI_VENDOR_ID); + u16 vendor_id, device_id; + int multifunc = 0; + + DBG("devfn %x, l: %x\n", devfn, l); + + /* If no device, skip */ + if (l == 0xffffffff || l == 0x00000000 || + l == 0x0000ffff || l == 0xffff0000) + goto next; + + /* Check if it's a multifunction device (only really used + * to function 0 though + */ + multifunc = !!(hdr_type & 0x80); + vendor_id = l & 0xffff; + device_id = (l >> 16) & 0xffff; + + /* If a known device, go to fixup setup code */ + if (vendor_id == PCI_VENDOR_ID_AMD && device_id == 0x7460) + mpic_amd8111_read_irq(mpic, devbase); + if (vendor_id == PCI_VENDOR_ID_AMD && device_id == 0x7450) + mpic_amd8131_read_irq(mpic, devbase); + next: + /* next device, if function 0 */ + if ((PCI_FUNC(devfn) == 0) && !multifunc) + devfn += 7; + } +} + +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + +/* Find an mpic associated with a given linux interrupt */ +static struct mpic *mpic_find(unsigned int irq, unsigned int *is_ipi) +{ + struct mpic *mpic = mpics; + + while(mpic) { + /* search IPIs first since they may override the main interrupts */ + if (irq >= mpic->ipi_offset && irq < (mpic->ipi_offset + 4)) { + if (is_ipi) + *is_ipi = 1; + return mpic; + } + if (irq >= mpic->irq_offset && + irq < (mpic->irq_offset + mpic->irq_count)) { + if (is_ipi) + *is_ipi = 0; + return mpic; + } + mpic = mpic -> next; + } + return NULL; +} + +/* Convert a cpu mask from logical to physical cpu numbers. */ +static inline u32 mpic_physmask(u32 cpumask) +{ + int i; + u32 mask = 0; + + for (i = 0; i < NR_CPUS; ++i, cpumask >>= 1) + mask |= (cpumask & 1) << get_hard_smp_processor_id(i); + return mask; +} + +#ifdef CONFIG_SMP +/* Get the mpic structure from the IPI number */ +static inline struct mpic * mpic_from_ipi(unsigned int ipi) +{ + return container_of(irq_desc[ipi].handler, struct mpic, hc_ipi); +} +#endif + +/* Get the mpic structure from the irq number */ +static inline struct mpic * mpic_from_irq(unsigned int irq) +{ + return container_of(irq_desc[irq].handler, struct mpic, hc_irq); +} + +/* Send an EOI */ +static inline void mpic_eoi(struct mpic *mpic) +{ + mpic_cpu_write(MPIC_CPU_EOI, 0); + (void)mpic_cpu_read(MPIC_CPU_WHOAMI); +} + +#ifdef CONFIG_SMP +static irqreturn_t mpic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) +{ + struct mpic *mpic = dev_id; + + smp_message_recv(irq - mpic->ipi_offset, regs); + return IRQ_HANDLED; +} +#endif /* CONFIG_SMP */ + +/* + * Linux descriptor level callbacks + */ + + +static void mpic_enable_irq(unsigned int irq) +{ + unsigned int loops = 100000; + struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = irq - mpic->irq_offset; + + DBG("%s: enable_irq: %d (src %d)\n", mpic->name, irq, src); + + mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI, + mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & ~MPIC_VECPRI_MASK); + + /* make sure mask gets to controller before we return to user */ + do { + if (!loops--) { + printk(KERN_ERR "mpic_enable_irq timeout\n"); + break; + } + } while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK); +} + +static void mpic_disable_irq(unsigned int irq) +{ + unsigned int loops = 100000; + struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = irq - mpic->irq_offset; + + DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src); + + mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI, + mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) | MPIC_VECPRI_MASK); + + /* make sure mask gets to controller before we return to user */ + do { + if (!loops--) { + printk(KERN_ERR "mpic_enable_irq timeout\n"); + break; + } + } while(!(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK)); +} + +static void mpic_end_irq(unsigned int irq) +{ + struct mpic *mpic = mpic_from_irq(irq); + + DBG("%s: end_irq: %d\n", mpic->name, irq); + + /* We always EOI on end_irq() even for edge interrupts since that + * should only lower the priority, the MPIC should have properly + * latched another edge interrupt coming in anyway + */ + +#ifdef CONFIG_MPIC_BROKEN_U3 + if (mpic->flags & MPIC_BROKEN_U3) { + unsigned int src = irq - mpic->irq_offset; + if (mpic_is_ht_interrupt(mpic, src)) + mpic_apic_end_irq(mpic, src); + } +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + mpic_eoi(mpic); +} + +#ifdef CONFIG_SMP + +static void mpic_enable_ipi(unsigned int irq) +{ + struct mpic *mpic = mpic_from_ipi(irq); + unsigned int src = irq - mpic->ipi_offset; + + DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, irq, src); + mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK); +} + +static void mpic_disable_ipi(unsigned int irq) +{ + /* NEVER disable an IPI... that's just plain wrong! */ +} + +static void mpic_end_ipi(unsigned int irq) +{ + struct mpic *mpic = mpic_from_ipi(irq); + + /* + * IPIs are marked IRQ_PER_CPU. This has the side effect of + * preventing the IRQ_PENDING/IRQ_INPROGRESS logic from + * applying to them. We EOI them late to avoid re-entering. + * We mark IPI's with SA_INTERRUPT as they must run with + * irqs disabled. + */ + mpic_eoi(mpic); +} + +#endif /* CONFIG_SMP */ + +static void mpic_set_affinity(unsigned int irq, cpumask_t cpumask) +{ + struct mpic *mpic = mpic_from_irq(irq); + + cpumask_t tmp; + + cpus_and(tmp, cpumask, cpu_online_map); + + mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_DESTINATION, + mpic_physmask(cpus_addr(tmp)[0])); +} + + +/* + * Exported functions + */ + + +struct mpic * __init mpic_alloc(unsigned long phys_addr, + unsigned int flags, + unsigned int isu_size, + unsigned int irq_offset, + unsigned int irq_count, + unsigned int ipi_offset, + unsigned char *senses, + unsigned int senses_count, + const char *name) +{ + struct mpic *mpic; + u32 reg; + const char *vers; + int i; + + mpic = alloc_bootmem(sizeof(struct mpic)); + if (mpic == NULL) + return NULL; + + memset(mpic, 0, sizeof(struct mpic)); + mpic->name = name; + + mpic->hc_irq.typename = name; + mpic->hc_irq.enable = mpic_enable_irq; + mpic->hc_irq.disable = mpic_disable_irq; + mpic->hc_irq.end = mpic_end_irq; + if (flags & MPIC_PRIMARY) + mpic->hc_irq.set_affinity = mpic_set_affinity; +#ifdef CONFIG_SMP + mpic->hc_ipi.typename = name; + mpic->hc_ipi.enable = mpic_enable_ipi; + mpic->hc_ipi.disable = mpic_disable_ipi; + mpic->hc_ipi.end = mpic_end_ipi; +#endif /* CONFIG_SMP */ + + mpic->flags = flags; + mpic->isu_size = isu_size; + mpic->irq_offset = irq_offset; + mpic->irq_count = irq_count; + mpic->ipi_offset = ipi_offset; + mpic->num_sources = 0; /* so far */ + mpic->senses = senses; + mpic->senses_count = senses_count; + + /* Map the global registers */ + mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x1000); + mpic->tmregs = mpic->gregs + (MPIC_TIMER_BASE >> 2); + BUG_ON(mpic->gregs == NULL); + + /* Reset */ + if (flags & MPIC_WANTS_RESET) { + mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0, + mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) + | MPIC_GREG_GCONF_RESET); + while( mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) + & MPIC_GREG_GCONF_RESET) + mb(); + } + + /* Read feature register, calculate num CPUs and, for non-ISU + * MPICs, num sources as well. On ISU MPICs, sources are counted + * as ISUs are added + */ + reg = mpic_read(mpic->gregs, MPIC_GREG_FEATURE_0); + mpic->num_cpus = ((reg & MPIC_GREG_FEATURE_LAST_CPU_MASK) + >> MPIC_GREG_FEATURE_LAST_CPU_SHIFT) + 1; + if (isu_size == 0) + mpic->num_sources = ((reg & MPIC_GREG_FEATURE_LAST_SRC_MASK) + >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1; + + /* Map the per-CPU registers */ + for (i = 0; i < mpic->num_cpus; i++) { + mpic->cpuregs[i] = ioremap(phys_addr + MPIC_CPU_BASE + + i * MPIC_CPU_STRIDE, 0x1000); + BUG_ON(mpic->cpuregs[i] == NULL); + } + + /* Initialize main ISU if none provided */ + if (mpic->isu_size == 0) { + mpic->isu_size = mpic->num_sources; + mpic->isus[0] = ioremap(phys_addr + MPIC_IRQ_BASE, + MPIC_IRQ_STRIDE * mpic->isu_size); + BUG_ON(mpic->isus[0] == NULL); + } + mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1); + mpic->isu_mask = (1 << mpic->isu_shift) - 1; + + /* Display version */ + switch (reg & MPIC_GREG_FEATURE_VERSION_MASK) { + case 1: + vers = "1.0"; + break; + case 2: + vers = "1.2"; + break; + case 3: + vers = "1.3"; + break; + default: + vers = ""; + break; + } + printk(KERN_INFO "mpic: Setting up MPIC \"%s\" version %s at %lx, max %d CPUs\n", + name, vers, phys_addr, mpic->num_cpus); + printk(KERN_INFO "mpic: ISU size: %d, shift: %d, mask: %x\n", mpic->isu_size, + mpic->isu_shift, mpic->isu_mask); + + mpic->next = mpics; + mpics = mpic; + + if (flags & MPIC_PRIMARY) + mpic_primary = mpic; + + return mpic; +} + +void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num, + unsigned long phys_addr) +{ + unsigned int isu_first = isu_num * mpic->isu_size; + + BUG_ON(isu_num >= MPIC_MAX_ISU); + + mpic->isus[isu_num] = ioremap(phys_addr, MPIC_IRQ_STRIDE * mpic->isu_size); + if ((isu_first + mpic->isu_size) > mpic->num_sources) + mpic->num_sources = isu_first + mpic->isu_size; +} + +void __init mpic_setup_cascade(unsigned int irq, mpic_cascade_t handler, + void *data) +{ + struct mpic *mpic = mpic_find(irq, NULL); + unsigned long flags; + + /* Synchronization here is a bit dodgy, so don't try to replace cascade + * interrupts on the fly too often ... but normally it's set up at boot. + */ + spin_lock_irqsave(&mpic_lock, flags); + if (mpic->cascade) + mpic_disable_irq(mpic->cascade_vec + mpic->irq_offset); + mpic->cascade = NULL; + wmb(); + mpic->cascade_vec = irq - mpic->irq_offset; + mpic->cascade_data = data; + wmb(); + mpic->cascade = handler; + mpic_enable_irq(irq); + spin_unlock_irqrestore(&mpic_lock, flags); +} + +void __init mpic_init(struct mpic *mpic) +{ + int i; + + BUG_ON(mpic->num_sources == 0); + + printk(KERN_INFO "mpic: Initializing for %d sources\n", mpic->num_sources); + + /* Set current processor priority to max */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf); + + /* Initialize timers: just disable them all */ + for (i = 0; i < 4; i++) { + mpic_write(mpic->tmregs, + i * MPIC_TIMER_STRIDE + MPIC_TIMER_DESTINATION, 0); + mpic_write(mpic->tmregs, + i * MPIC_TIMER_STRIDE + MPIC_TIMER_VECTOR_PRI, + MPIC_VECPRI_MASK | + (MPIC_VEC_TIMER_0 + i)); + } + + /* Initialize IPIs to our reserved vectors and mark them disabled for now */ + mpic_test_broken_ipi(mpic); + for (i = 0; i < 4; i++) { + mpic_ipi_write(i, + MPIC_VECPRI_MASK | + (10 << MPIC_VECPRI_PRIORITY_SHIFT) | + (MPIC_VEC_IPI_0 + i)); +#ifdef CONFIG_SMP + if (!(mpic->flags & MPIC_PRIMARY)) + continue; + irq_desc[mpic->ipi_offset+i].status |= IRQ_PER_CPU; + irq_desc[mpic->ipi_offset+i].handler = &mpic->hc_ipi; + +#endif /* CONFIG_SMP */ + } + + /* Initialize interrupt sources */ + if (mpic->irq_count == 0) + mpic->irq_count = mpic->num_sources; + +#ifdef CONFIG_MPIC_BROKEN_U3 + /* Do the ioapic fixups on U3 broken mpic */ + DBG("MPIC flags: %x\n", mpic->flags); + if ((mpic->flags & MPIC_BROKEN_U3) && (mpic->flags & MPIC_PRIMARY)) + mpic_scan_ioapics(mpic); +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + for (i = 0; i < mpic->num_sources; i++) { + /* start with vector = source number, and masked */ + u32 vecpri = MPIC_VECPRI_MASK | i | (8 << MPIC_VECPRI_PRIORITY_SHIFT); + int level = 0; + + /* if it's an IPI, we skip it */ + if ((mpic->irq_offset + i) >= (mpic->ipi_offset + i) && + (mpic->irq_offset + i) < (mpic->ipi_offset + i + 4)) + continue; + + /* do senses munging */ + if (mpic->senses && i < mpic->senses_count) { + if (mpic->senses[i] & IRQ_SENSE_LEVEL) + vecpri |= MPIC_VECPRI_SENSE_LEVEL; + if (mpic->senses[i] & IRQ_POLARITY_POSITIVE) + vecpri |= MPIC_VECPRI_POLARITY_POSITIVE; + } else + vecpri |= MPIC_VECPRI_SENSE_LEVEL; + + /* remember if it was a level interrupts */ + level = (vecpri & MPIC_VECPRI_SENSE_LEVEL); + + /* deal with broken U3 */ + if (mpic->flags & MPIC_BROKEN_U3) { +#ifdef CONFIG_MPIC_BROKEN_U3 + if (mpic_is_ht_interrupt(mpic, i)) { + vecpri &= ~(MPIC_VECPRI_SENSE_MASK | + MPIC_VECPRI_POLARITY_MASK); + vecpri |= MPIC_VECPRI_POLARITY_POSITIVE; + } +#else + printk(KERN_ERR "mpic: BROKEN_U3 set, but CONFIG doesn't match\n"); +#endif + } + + DBG("setup source %d, vecpri: %08x, level: %d\n", i, vecpri, + (level != 0)); + + /* init hw */ + mpic_irq_write(i, MPIC_IRQ_VECTOR_PRI, vecpri); + mpic_irq_write(i, MPIC_IRQ_DESTINATION, + 1 << get_hard_smp_processor_id(boot_cpuid)); + + /* init linux descriptors */ + if (i < mpic->irq_count) { + irq_desc[mpic->irq_offset+i].status = level ? IRQ_LEVEL : 0; + irq_desc[mpic->irq_offset+i].handler = &mpic->hc_irq; + } + } + + /* Init spurrious vector */ + mpic_write(mpic->gregs, MPIC_GREG_SPURIOUS, MPIC_VEC_SPURRIOUS); + + /* Disable 8259 passthrough */ + mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0, + mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) + | MPIC_GREG_GCONF_8259_PTHROU_DIS); + + /* Set current processor priority to 0 */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0); +} + + + +void mpic_irq_set_priority(unsigned int irq, unsigned int pri) +{ + int is_ipi; + struct mpic *mpic = mpic_find(irq, &is_ipi); + unsigned long flags; + u32 reg; + + spin_lock_irqsave(&mpic_lock, flags); + if (is_ipi) { + reg = mpic_ipi_read(irq - mpic->ipi_offset) & MPIC_VECPRI_PRIORITY_MASK; + mpic_ipi_write(irq - mpic->ipi_offset, + reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); + } else { + reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI) + & MPIC_VECPRI_PRIORITY_MASK; + mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI, + reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); + } + spin_unlock_irqrestore(&mpic_lock, flags); +} + +unsigned int mpic_irq_get_priority(unsigned int irq) +{ + int is_ipi; + struct mpic *mpic = mpic_find(irq, &is_ipi); + unsigned long flags; + u32 reg; + + spin_lock_irqsave(&mpic_lock, flags); + if (is_ipi) + reg = mpic_ipi_read(irq - mpic->ipi_offset); + else + reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI); + spin_unlock_irqrestore(&mpic_lock, flags); + return (reg & MPIC_VECPRI_PRIORITY_MASK) >> MPIC_VECPRI_PRIORITY_SHIFT; +} + +void mpic_setup_this_cpu(void) +{ +#ifdef CONFIG_SMP + struct mpic *mpic = mpic_primary; + unsigned long flags; + u32 msk = 1 << hard_smp_processor_id(); + unsigned int i; + + BUG_ON(mpic == NULL); + + DBG("%s: setup_this_cpu(%d)\n", mpic->name, hard_smp_processor_id()); + + spin_lock_irqsave(&mpic_lock, flags); + + /* let the mpic know we want intrs. default affinity is 0xffffffff + * until changed via /proc. That's how it's done on x86. If we want + * it differently, then we should make sure we also change the default + * values of irq_affinity in irq.c. + */ + if (distribute_irqs) { + for (i = 0; i < mpic->num_sources ; i++) + mpic_irq_write(i, MPIC_IRQ_DESTINATION, + mpic_irq_read(i, MPIC_IRQ_DESTINATION) | msk); + } + + /* Set current processor priority to 0 */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0); + + spin_unlock_irqrestore(&mpic_lock, flags); +#endif /* CONFIG_SMP */ +} + +void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask) +{ + struct mpic *mpic = mpic_primary; + + BUG_ON(mpic == NULL); + + DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no); + + mpic_cpu_write(MPIC_CPU_IPI_DISPATCH_0 + ipi_no * 0x10, + mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0])); +} + +int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs) +{ + u32 irq; + + irq = mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK; + DBG("%s: get_one_irq(): %d\n", mpic->name, irq); + + if (mpic->cascade && irq == mpic->cascade_vec) { + DBG("%s: cascading ...\n", mpic->name); + irq = mpic->cascade(regs, mpic->cascade_data); + mpic_eoi(mpic); + return irq; + } + if (unlikely(irq == MPIC_VEC_SPURRIOUS)) + return -1; + if (irq < MPIC_VEC_IPI_0) + return irq + mpic->irq_offset; + DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0); + return irq - MPIC_VEC_IPI_0 + mpic->ipi_offset; +} + +int mpic_get_irq(struct pt_regs *regs) +{ + struct mpic *mpic = mpic_primary; + + BUG_ON(mpic == NULL); + + return mpic_get_one_irq(mpic, regs); +} + + +#ifdef CONFIG_SMP +void mpic_request_ipis(void) +{ + struct mpic *mpic = mpic_primary; + + BUG_ON(mpic == NULL); + + printk("requesting IPIs ... \n"); + + /* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */ + request_irq(mpic->ipi_offset+0, mpic_ipi_action, SA_INTERRUPT, + "IPI0 (call function)", mpic); + request_irq(mpic->ipi_offset+1, mpic_ipi_action, SA_INTERRUPT, + "IPI1 (reschedule)", mpic); + request_irq(mpic->ipi_offset+2, mpic_ipi_action, SA_INTERRUPT, + "IPI2 (unused)", mpic); + request_irq(mpic->ipi_offset+3, mpic_ipi_action, SA_INTERRUPT, + "IPI3 (debugger break)", mpic); + + printk("IPIs requested... \n"); +} +#endif /* CONFIG_SMP */ diff --git a/arch/ppc64/kernel/mpic.h b/arch/ppc64/kernel/mpic.h new file mode 100644 index 00000000000..571b3c99e06 --- /dev/null +++ b/arch/ppc64/kernel/mpic.h @@ -0,0 +1,267 @@ +#include + +/* + * Global registers + */ + +#define MPIC_GREG_BASE 0x01000 + +#define MPIC_GREG_FEATURE_0 0x00000 +#define MPIC_GREG_FEATURE_LAST_SRC_MASK 0x07ff0000 +#define MPIC_GREG_FEATURE_LAST_SRC_SHIFT 16 +#define MPIC_GREG_FEATURE_LAST_CPU_MASK 0x00001f00 +#define MPIC_GREG_FEATURE_LAST_CPU_SHIFT 8 +#define MPIC_GREG_FEATURE_VERSION_MASK 0xff +#define MPIC_GREG_FEATURE_1 0x00010 +#define MPIC_GREG_GLOBAL_CONF_0 0x00020 +#define MPIC_GREG_GCONF_RESET 0x80000000 +#define MPIC_GREG_GCONF_8259_PTHROU_DIS 0x20000000 +#define MPIC_GREG_GCONF_BASE_MASK 0x000fffff +#define MPIC_GREG_GLOBAL_CONF_1 0x00030 +#define MPIC_GREG_VENDOR_0 0x00040 +#define MPIC_GREG_VENDOR_1 0x00050 +#define MPIC_GREG_VENDOR_2 0x00060 +#define MPIC_GREG_VENDOR_3 0x00070 +#define MPIC_GREG_VENDOR_ID 0x00080 +#define MPIC_GREG_VENDOR_ID_STEPPING_MASK 0x00ff0000 +#define MPIC_GREG_VENDOR_ID_STEPPING_SHIFT 16 +#define MPIC_GREG_VENDOR_ID_DEVICE_ID_MASK 0x0000ff00 +#define MPIC_GREG_VENDOR_ID_DEVICE_ID_SHIFT 8 +#define MPIC_GREG_VENDOR_ID_VENDOR_ID_MASK 0x000000ff +#define MPIC_GREG_PROCESSOR_INIT 0x00090 +#define MPIC_GREG_IPI_VECTOR_PRI_0 0x000a0 +#define MPIC_GREG_IPI_VECTOR_PRI_1 0x000b0 +#define MPIC_GREG_IPI_VECTOR_PRI_2 0x000c0 +#define MPIC_GREG_IPI_VECTOR_PRI_3 0x000d0 +#define MPIC_GREG_SPURIOUS 0x000e0 +#define MPIC_GREG_TIMER_FREQ 0x000f0 + +/* + * + * Timer registers + */ +#define MPIC_TIMER_BASE 0x01100 +#define MPIC_TIMER_STRIDE 0x40 + +#define MPIC_TIMER_CURRENT_CNT 0x00000 +#define MPIC_TIMER_BASE_CNT 0x00010 +#define MPIC_TIMER_VECTOR_PRI 0x00020 +#define MPIC_TIMER_DESTINATION 0x00030 + +/* + * Per-Processor registers + */ + +#define MPIC_CPU_THISBASE 0x00000 +#define MPIC_CPU_BASE 0x20000 +#define MPIC_CPU_STRIDE 0x01000 + +#define MPIC_CPU_IPI_DISPATCH_0 0x00040 +#define MPIC_CPU_IPI_DISPATCH_1 0x00050 +#define MPIC_CPU_IPI_DISPATCH_2 0x00060 +#define MPIC_CPU_IPI_DISPATCH_3 0x00070 +#define MPIC_CPU_CURRENT_TASK_PRI 0x00080 +#define MPIC_CPU_TASKPRI_MASK 0x0000000f +#define MPIC_CPU_WHOAMI 0x00090 +#define MPIC_CPU_WHOAMI_MASK 0x0000001f +#define MPIC_CPU_INTACK 0x000a0 +#define MPIC_CPU_EOI 0x000b0 + +/* + * Per-source registers + */ + +#define MPIC_IRQ_BASE 0x10000 +#define MPIC_IRQ_STRIDE 0x00020 +#define MPIC_IRQ_VECTOR_PRI 0x00000 +#define MPIC_VECPRI_MASK 0x80000000 +#define MPIC_VECPRI_ACTIVITY 0x40000000 /* Read Only */ +#define MPIC_VECPRI_PRIORITY_MASK 0x000f0000 +#define MPIC_VECPRI_PRIORITY_SHIFT 16 +#define MPIC_VECPRI_VECTOR_MASK 0x000007ff +#define MPIC_VECPRI_POLARITY_POSITIVE 0x00800000 +#define MPIC_VECPRI_POLARITY_NEGATIVE 0x00000000 +#define MPIC_VECPRI_POLARITY_MASK 0x00800000 +#define MPIC_VECPRI_SENSE_LEVEL 0x00400000 +#define MPIC_VECPRI_SENSE_EDGE 0x00000000 +#define MPIC_VECPRI_SENSE_MASK 0x00400000 +#define MPIC_IRQ_DESTINATION 0x00010 + +#define MPIC_MAX_IRQ_SOURCES 2048 +#define MPIC_MAX_CPUS 32 +#define MPIC_MAX_ISU 32 + +/* + * Special vector numbers (internal use only) + */ +#define MPIC_VEC_SPURRIOUS 255 +#define MPIC_VEC_IPI_3 254 +#define MPIC_VEC_IPI_2 253 +#define MPIC_VEC_IPI_1 252 +#define MPIC_VEC_IPI_0 251 + +/* unused */ +#define MPIC_VEC_TIMER_3 250 +#define MPIC_VEC_TIMER_2 249 +#define MPIC_VEC_TIMER_1 248 +#define MPIC_VEC_TIMER_0 247 + +/* Type definition of the cascade handler */ +typedef int (*mpic_cascade_t)(struct pt_regs *regs, void *data); + +#ifdef CONFIG_MPIC_BROKEN_U3 +/* Fixup table entry */ +struct mpic_irq_fixup +{ + u8 __iomem *base; + unsigned int irq; +}; +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + +/* The instance data of a given MPIC */ +struct mpic +{ + /* The "linux" controller struct */ + hw_irq_controller hc_irq; +#ifdef CONFIG_SMP + hw_irq_controller hc_ipi; +#endif + const char *name; + /* Flags */ + unsigned int flags; + /* How many irq sources in a given ISU */ + unsigned int isu_size; + unsigned int isu_shift; + unsigned int isu_mask; + /* Offset of irq vector numbers */ + unsigned int irq_offset; + unsigned int irq_count; + /* Offset of ipi vector numbers */ + unsigned int ipi_offset; + /* Number of sources */ + unsigned int num_sources; + /* Number of CPUs */ + unsigned int num_cpus; + /* cascade handler */ + mpic_cascade_t cascade; + void *cascade_data; + unsigned int cascade_vec; + /* senses array */ + unsigned char *senses; + unsigned int senses_count; + +#ifdef CONFIG_MPIC_BROKEN_U3 + /* The fixup table */ + struct mpic_irq_fixup *fixups; + spinlock_t fixup_lock; +#endif + + /* The various ioremap'ed bases */ + volatile u32 __iomem *gregs; + volatile u32 __iomem *tmregs; + volatile u32 __iomem *cpuregs[MPIC_MAX_CPUS]; + volatile u32 __iomem *isus[MPIC_MAX_ISU]; + + /* link */ + struct mpic *next; +}; + +/* This is the primary controller, only that one has IPIs and + * has afinity control. A non-primary MPIC always uses CPU0 + * registers only + */ +#define MPIC_PRIMARY 0x00000001 +/* Set this for a big-endian MPIC */ +#define MPIC_BIG_ENDIAN 0x00000002 +/* Broken U3 MPIC */ +#define MPIC_BROKEN_U3 0x00000004 +/* Broken IPI registers (autodetected) */ +#define MPIC_BROKEN_IPI 0x00000008 +/* MPIC wants a reset */ +#define MPIC_WANTS_RESET 0x00000010 + +/* Allocate the controller structure and setup the linux irq descs + * for the range if interrupts passed in. No HW initialization is + * actually performed. + * + * @phys_addr: physial base address of the MPIC + * @flags: flags, see constants above + * @isu_size: number of interrupts in an ISU. Use 0 to use a + * standard ISU-less setup (aka powermac) + * @irq_offset: first irq number to assign to this mpic + * @irq_count: number of irqs to use with this mpic IRQ sources. Pass 0 + * to match the number of sources + * @ipi_offset: first irq number to assign to this mpic IPI sources, + * used only on primary mpic + * @senses: array of sense values + * @senses_num: number of entries in the array + * + * Note about the sense array. If none is passed, all interrupts are + * setup to be level negative unless MPIC_BROKEN_U3 is set in which + * case they are edge positive (and the array is ignored anyway). + * The values in the array start at the first source of the MPIC, + * that is senses[0] correspond to linux irq "irq_offset". + */ +extern struct mpic *mpic_alloc(unsigned long phys_addr, + unsigned int flags, + unsigned int isu_size, + unsigned int irq_offset, + unsigned int irq_count, + unsigned int ipi_offset, + unsigned char *senses, + unsigned int senses_num, + const char *name); + +/* Assign ISUs, to call before mpic_init() + * + * @mpic: controller structure as returned by mpic_alloc() + * @isu_num: ISU number + * @phys_addr: physical address of the ISU + */ +extern void mpic_assign_isu(struct mpic *mpic, unsigned int isu_num, + unsigned long phys_addr); + +/* Initialize the controller. After this has been called, none of the above + * should be called again for this mpic + */ +extern void mpic_init(struct mpic *mpic); + +/* Setup a cascade. Currently, only one cascade is supported this + * way, though you can always do a normal request_irq() and add + * other cascades this way. You should call this _after_ having + * added all the ISUs + * + * @irq_no: "linux" irq number of the cascade (that is offset'ed vector) + * @handler: cascade handler function + */ +extern void mpic_setup_cascade(unsigned int irq_no, mpic_cascade_t hanlder, + void *data); + +/* + * All of the following functions must only be used after the + * ISUs have been assigned and the controller fully initialized + * with mpic_init() + */ + + +/* Change/Read the priority of an interrupt. Default is 8 for irqs and + * 10 for IPIs. You can call this on both IPIs and IRQ numbers, but the + * IPI number is then the offset'ed (linux irq number mapped to the IPI) + */ +extern void mpic_irq_set_priority(unsigned int irq, unsigned int pri); +extern unsigned int mpic_irq_get_priority(unsigned int irq); + +/* Setup a non-boot CPU */ +extern void mpic_setup_this_cpu(void); + +/* Request IPIs on primary mpic */ +extern void mpic_request_ipis(void); + +/* Send an IPI (non offseted number 0..3) */ +extern void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask); + +/* Fetch interrupt from a given mpic */ +extern int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs); +/* This one gets to the primary mpic */ +extern int mpic_get_irq(struct pt_regs *regs); diff --git a/arch/ppc64/kernel/nvram.c b/arch/ppc64/kernel/nvram.c new file mode 100644 index 00000000000..b9069c2d193 --- /dev/null +++ b/arch/ppc64/kernel/nvram.c @@ -0,0 +1,746 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * /dev/nvram driver for PPC64 + * + * This perhaps should live in drivers/char + * + * TODO: Split the /dev/nvram part (that one can use + * drivers/char/generic_nvram.c) from the arch & partition + * parsing code. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG_NVRAM + +static int nvram_scan_partitions(void); +static int nvram_setup_partition(void); +static int nvram_create_os_partition(void); +static int nvram_remove_os_partition(void); + +static struct nvram_partition * nvram_part; +static long nvram_error_log_index = -1; +static long nvram_error_log_size = 0; + +int no_logging = 1; /* Until we initialize everything, + * make sure we don't try logging + * anything */ + +extern volatile int error_log_cnt; + +struct err_log_info { + int error_type; + unsigned int seq_num; +}; + +static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin) +{ + int size; + + if (ppc_md.nvram_size == NULL) + return -ENODEV; + size = ppc_md.nvram_size(); + + switch (origin) { + case 1: + offset += file->f_pos; + break; + case 2: + offset += size; + break; + } + if (offset < 0) + return -EINVAL; + file->f_pos = offset; + return file->f_pos; +} + + +static ssize_t dev_nvram_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + ssize_t len; + char *tmp_buffer; + int size; + + if (ppc_md.nvram_size == NULL) + return -ENODEV; + size = ppc_md.nvram_size(); + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + if (*ppos >= size) + return 0; + if (count > size) + count = size; + + tmp_buffer = (char *) kmalloc(count, GFP_KERNEL); + if (!tmp_buffer) { + printk(KERN_ERR "dev_read_nvram: kmalloc failed\n"); + return -ENOMEM; + } + + len = ppc_md.nvram_read(tmp_buffer, count, ppos); + if ((long)len <= 0) { + kfree(tmp_buffer); + return len; + } + + if (copy_to_user(buf, tmp_buffer, len)) { + kfree(tmp_buffer); + return -EFAULT; + } + + kfree(tmp_buffer); + return len; + +} + +static ssize_t dev_nvram_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + ssize_t len; + char * tmp_buffer; + int size; + + if (ppc_md.nvram_size == NULL) + return -ENODEV; + size = ppc_md.nvram_size(); + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + if (*ppos >= size) + return 0; + if (count > size) + count = size; + + tmp_buffer = (char *) kmalloc(count, GFP_KERNEL); + if (!tmp_buffer) { + printk(KERN_ERR "dev_nvram_write: kmalloc failed\n"); + return -ENOMEM; + } + + if (copy_from_user(tmp_buffer, buf, count)) { + kfree(tmp_buffer); + return -EFAULT; + } + + len = ppc_md.nvram_write(tmp_buffer, count, ppos); + if ((long)len <= 0) { + kfree(tmp_buffer); + return len; + } + + kfree(tmp_buffer); + return len; +} + +static int dev_nvram_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + switch(cmd) { +#ifdef CONFIG_PPC_PMAC + case OBSOLETE_PMAC_NVRAM_GET_OFFSET: + printk(KERN_WARNING "nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n"); + case IOC_NVRAM_GET_OFFSET: { + int part, offset; + + if (systemcfg->platform != PLATFORM_POWERMAC) + return -EINVAL; + if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0) + return -EFAULT; + if (part < pmac_nvram_OF || part > pmac_nvram_NR) + return -EINVAL; + offset = pmac_get_partition(part); + if (offset < 0) + return offset; + if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0) + return -EFAULT; + return 0; + } +#endif /* CONFIG_PPC_PMAC */ + } + return -EINVAL; +} + +struct file_operations nvram_fops = { + .owner = THIS_MODULE, + .llseek = dev_nvram_llseek, + .read = dev_nvram_read, + .write = dev_nvram_write, + .ioctl = dev_nvram_ioctl, +}; + +static struct miscdevice nvram_dev = { + NVRAM_MINOR, + "nvram", + &nvram_fops +}; + + +#ifdef DEBUG_NVRAM +static void nvram_print_partitions(char * label) +{ + struct list_head * p; + struct nvram_partition * tmp_part; + + printk(KERN_WARNING "--------%s---------\n", label); + printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n"); + list_for_each(p, &nvram_part->partition) { + tmp_part = list_entry(p, struct nvram_partition, partition); + printk(KERN_WARNING "%d \t%02x\t%02x\t%d\t%s\n", + tmp_part->index, tmp_part->header.signature, + tmp_part->header.checksum, tmp_part->header.length, + tmp_part->header.name); + } +} +#endif + + +static int nvram_write_header(struct nvram_partition * part) +{ + loff_t tmp_index; + int rc; + + tmp_index = part->index; + rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index); + + return rc; +} + + +static unsigned char nvram_checksum(struct nvram_header *p) +{ + unsigned int c_sum, c_sum2; + unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */ + c_sum = p->signature + p->length + sp[0] + sp[1] + sp[2] + sp[3] + sp[4] + sp[5]; + + /* The sum may have spilled into the 3rd byte. Fold it back. */ + c_sum = ((c_sum & 0xffff) + (c_sum >> 16)) & 0xffff; + /* The sum cannot exceed 2 bytes. Fold it into a checksum */ + c_sum2 = (c_sum >> 8) + (c_sum << 8); + c_sum = ((c_sum + c_sum2) >> 8) & 0xff; + return c_sum; +} + + +/* + * Find an nvram partition, sig can be 0 for any + * partition or name can be NULL for any name, else + * tries to match both + */ +struct nvram_partition *nvram_find_partition(int sig, const char *name) +{ + struct nvram_partition * part; + struct list_head * p; + + list_for_each(p, &nvram_part->partition) { + part = list_entry(p, struct nvram_partition, partition); + + if (sig && part->header.signature != sig) + continue; + if (name && 0 != strncmp(name, part->header.name, 12)) + continue; + return part; + } + return NULL; +} +EXPORT_SYMBOL(nvram_find_partition); + + +static int nvram_remove_os_partition(void) +{ + struct list_head *i; + struct list_head *j; + struct nvram_partition * part; + struct nvram_partition * cur_part; + int rc; + + list_for_each(i, &nvram_part->partition) { + part = list_entry(i, struct nvram_partition, partition); + if (part->header.signature != NVRAM_SIG_OS) + continue; + + /* Make os partition a free partition */ + part->header.signature = NVRAM_SIG_FREE; + sprintf(part->header.name, "wwwwwwwwwwww"); + part->header.checksum = nvram_checksum(&part->header); + + /* Merge contiguous free partitions backwards */ + list_for_each_prev(j, &part->partition) { + cur_part = list_entry(j, struct nvram_partition, partition); + if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) { + break; + } + + part->header.length += cur_part->header.length; + part->header.checksum = nvram_checksum(&part->header); + part->index = cur_part->index; + + list_del(&cur_part->partition); + kfree(cur_part); + j = &part->partition; /* fixup our loop */ + } + + /* Merge contiguous free partitions forwards */ + list_for_each(j, &part->partition) { + cur_part = list_entry(j, struct nvram_partition, partition); + if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) { + break; + } + + part->header.length += cur_part->header.length; + part->header.checksum = nvram_checksum(&part->header); + + list_del(&cur_part->partition); + kfree(cur_part); + j = &part->partition; /* fixup our loop */ + } + + rc = nvram_write_header(part); + if (rc <= 0) { + printk(KERN_ERR "nvram_remove_os_partition: nvram_write failed (%d)\n", rc); + return rc; + } + + } + + return 0; +} + +/* nvram_create_os_partition + * + * Create a OS linux partition to buffer error logs. + * Will create a partition starting at the first free + * space found if space has enough room. + */ +static int nvram_create_os_partition(void) +{ + struct list_head * p; + struct nvram_partition * part; + struct nvram_partition * new_part = NULL; + struct nvram_partition * free_part = NULL; + int seq_init[2] = { 0, 0 }; + loff_t tmp_index; + long size = 0; + int rc; + + /* Find a free partition that will give us the maximum needed size + If can't find one that will give us the minimum size needed */ + list_for_each(p, &nvram_part->partition) { + part = list_entry(p, struct nvram_partition, partition); + if (part->header.signature != NVRAM_SIG_FREE) + continue; + + if (part->header.length >= NVRAM_MAX_REQ) { + size = NVRAM_MAX_REQ; + free_part = part; + break; + } + if (!size && part->header.length >= NVRAM_MIN_REQ) { + size = NVRAM_MIN_REQ; + free_part = part; + } + } + if (!size) { + return -ENOSPC; + } + + /* Create our OS partition */ + new_part = (struct nvram_partition *) + kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); + if (!new_part) { + printk(KERN_ERR "nvram_create_os_partition: kmalloc failed\n"); + return -ENOMEM; + } + + new_part->index = free_part->index; + new_part->header.signature = NVRAM_SIG_OS; + new_part->header.length = size; + sprintf(new_part->header.name, "ppc64,linux"); + new_part->header.checksum = nvram_checksum(&new_part->header); + + rc = nvram_write_header(new_part); + if (rc <= 0) { + printk(KERN_ERR "nvram_create_os_partition: nvram_write_header \ + failed (%d)\n", rc); + return rc; + } + + /* make sure and initialize to zero the sequence number and the error + type logged */ + tmp_index = new_part->index + NVRAM_HEADER_LEN; + rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index); + if (rc <= 0) { + printk(KERN_ERR "nvram_create_os_partition: nvram_write failed (%d)\n", rc); + return rc; + } + + nvram_error_log_index = new_part->index + NVRAM_HEADER_LEN; + nvram_error_log_size = ((part->header.length - 1) * + NVRAM_BLOCK_LEN) - sizeof(struct err_log_info); + + list_add_tail(&new_part->partition, &free_part->partition); + + if (free_part->header.length <= size) { + list_del(&free_part->partition); + kfree(free_part); + return 0; + } + + /* Adjust the partition we stole the space from */ + free_part->index += size * NVRAM_BLOCK_LEN; + free_part->header.length -= size; + free_part->header.checksum = nvram_checksum(&free_part->header); + + rc = nvram_write_header(free_part); + if (rc <= 0) { + printk(KERN_ERR "nvram_create_os_partition: nvram_write_header " + "failed (%d)\n", rc); + return rc; + } + + return 0; +} + + +/* nvram_setup_partition + * + * This will setup the partition we need for buffering the + * error logs and cleanup partitions if needed. + * + * The general strategy is the following: + * 1.) If there is ppc64,linux partition large enough then use it. + * 2.) If there is not a ppc64,linux partition large enough, search + * for a free partition that is large enough. + * 3.) If there is not a free partition large enough remove + * _all_ OS partitions and consolidate the space. + * 4.) Will first try getting a chunk that will satisfy the maximum + * error log size (NVRAM_MAX_REQ). + * 5.) If the max chunk cannot be allocated then try finding a chunk + * that will satisfy the minum needed (NVRAM_MIN_REQ). + */ +static int nvram_setup_partition(void) +{ + struct list_head * p; + struct nvram_partition * part; + int rc; + + /* For now, we don't do any of this on pmac, until I + * have figured out if it's worth killing some unused stuffs + * in our nvram, as Apple defined partitions use pretty much + * all of the space + */ + if (systemcfg->platform == PLATFORM_POWERMAC) + return -ENOSPC; + + /* see if we have an OS partition that meets our needs. + will try getting the max we need. If not we'll delete + partitions and try again. */ + list_for_each(p, &nvram_part->partition) { + part = list_entry(p, struct nvram_partition, partition); + if (part->header.signature != NVRAM_SIG_OS) + continue; + + if (strcmp(part->header.name, "ppc64,linux")) + continue; + + if (part->header.length >= NVRAM_MIN_REQ) { + /* found our partition */ + nvram_error_log_index = part->index + NVRAM_HEADER_LEN; + nvram_error_log_size = ((part->header.length - 1) * + NVRAM_BLOCK_LEN) - sizeof(struct err_log_info); + return 0; + } + } + + /* try creating a partition with the free space we have */ + rc = nvram_create_os_partition(); + if (!rc) { + return 0; + } + + /* need to free up some space */ + rc = nvram_remove_os_partition(); + if (rc) { + return rc; + } + + /* create a partition in this new space */ + rc = nvram_create_os_partition(); + if (rc) { + printk(KERN_ERR "nvram_create_os_partition: Could not find a " + "NVRAM partition large enough\n"); + return rc; + } + + return 0; +} + + +static int nvram_scan_partitions(void) +{ + loff_t cur_index = 0; + struct nvram_header phead; + struct nvram_partition * tmp_part; + unsigned char c_sum; + char * header; + int total_size; + int err; + + if (ppc_md.nvram_size == NULL) + return -ENODEV; + total_size = ppc_md.nvram_size(); + + header = (char *) kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL); + if (!header) { + printk(KERN_ERR "nvram_scan_partitions: Failed kmalloc\n"); + return -ENOMEM; + } + + while (cur_index < total_size) { + + err = ppc_md.nvram_read(header, NVRAM_HEADER_LEN, &cur_index); + if (err != NVRAM_HEADER_LEN) { + printk(KERN_ERR "nvram_scan_partitions: Error parsing " + "nvram partitions\n"); + goto out; + } + + cur_index -= NVRAM_HEADER_LEN; /* nvram_read will advance us */ + + memcpy(&phead, header, NVRAM_HEADER_LEN); + + err = 0; + c_sum = nvram_checksum(&phead); + if (c_sum != phead.checksum) { + printk(KERN_WARNING "WARNING: nvram partition checksum" + " was %02x, should be %02x!\n", + phead.checksum, c_sum); + printk(KERN_WARNING "Terminating nvram partition scan\n"); + goto out; + } + if (!phead.length) { + printk(KERN_WARNING "WARNING: nvram corruption " + "detected: 0-length partition\n"); + goto out; + } + tmp_part = (struct nvram_partition *) + kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); + err = -ENOMEM; + if (!tmp_part) { + printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n"); + goto out; + } + + memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN); + tmp_part->index = cur_index; + list_add_tail(&tmp_part->partition, &nvram_part->partition); + + cur_index += phead.length * NVRAM_BLOCK_LEN; + } + err = 0; + + out: + kfree(header); + return err; +} + +static int __init nvram_init(void) +{ + int error; + int rc; + + if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0) + return -ENODEV; + + rc = misc_register(&nvram_dev); + if (rc != 0) { + printk(KERN_ERR "nvram_init: failed to register device\n"); + return rc; + } + + /* initialize our anchor for the nvram partition list */ + nvram_part = (struct nvram_partition *) kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); + if (!nvram_part) { + printk(KERN_ERR "nvram_init: Failed kmalloc\n"); + return -ENOMEM; + } + INIT_LIST_HEAD(&nvram_part->partition); + + /* Get all the NVRAM partitions */ + error = nvram_scan_partitions(); + if (error) { + printk(KERN_ERR "nvram_init: Failed nvram_scan_partitions\n"); + return error; + } + + if(nvram_setup_partition()) + printk(KERN_WARNING "nvram_init: Could not find nvram partition" + " for nvram buffered error logging.\n"); + +#ifdef DEBUG_NVRAM + nvram_print_partitions("NVRAM Partitions"); +#endif + + return rc; +} + +void __exit nvram_cleanup(void) +{ + misc_deregister( &nvram_dev ); +} + + +#ifdef CONFIG_PPC_PSERIES + +/* nvram_write_error_log + * + * We need to buffer the error logs into nvram to ensure that we have + * the failure information to decode. If we have a severe error there + * is no way to guarantee that the OS or the machine is in a state to + * get back to user land and write the error to disk. For example if + * the SCSI device driver causes a Machine Check by writing to a bad + * IO address, there is no way of guaranteeing that the device driver + * is in any state that is would also be able to write the error data + * captured to disk, thus we buffer it in NVRAM for analysis on the + * next boot. + * + * In NVRAM the partition containing the error log buffer will looks like: + * Header (in bytes): + * +-----------+----------+--------+------------+------------------+ + * | signature | checksum | length | name | data | + * |0 |1 |2 3|4 15|16 length-1| + * +-----------+----------+--------+------------+------------------+ + * + * The 'data' section would look like (in bytes): + * +--------------+------------+-----------------------------------+ + * | event_logged | sequence # | error log | + * |0 3|4 7|8 nvram_error_log_size-1| + * +--------------+------------+-----------------------------------+ + * + * event_logged: 0 if event has not been logged to syslog, 1 if it has + * sequence #: The unique sequence # for each event. (until it wraps) + * error log: The error log from event_scan + */ +int nvram_write_error_log(char * buff, int length, unsigned int err_type) +{ + int rc; + loff_t tmp_index; + struct err_log_info info; + + if (no_logging) { + return -EPERM; + } + + if (nvram_error_log_index == -1) { + return -ESPIPE; + } + + if (length > nvram_error_log_size) { + length = nvram_error_log_size; + } + + info.error_type = err_type; + info.seq_num = error_log_cnt; + + tmp_index = nvram_error_log_index; + + rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index); + if (rc <= 0) { + printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc); + return rc; + } + + rc = ppc_md.nvram_write(buff, length, &tmp_index); + if (rc <= 0) { + printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc); + return rc; + } + + return 0; +} + +/* nvram_read_error_log + * + * Reads nvram for error log for at most 'length' + */ +int nvram_read_error_log(char * buff, int length, unsigned int * err_type) +{ + int rc; + loff_t tmp_index; + struct err_log_info info; + + if (nvram_error_log_index == -1) + return -1; + + if (length > nvram_error_log_size) + length = nvram_error_log_size; + + tmp_index = nvram_error_log_index; + + rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); + if (rc <= 0) { + printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); + return rc; + } + + rc = ppc_md.nvram_read(buff, length, &tmp_index); + if (rc <= 0) { + printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); + return rc; + } + + error_log_cnt = info.seq_num; + *err_type = info.error_type; + + return 0; +} + +/* This doesn't actually zero anything, but it sets the event_logged + * word to tell that this event is safely in syslog. + */ +int nvram_clear_error_log(void) +{ + loff_t tmp_index; + int clear_word = ERR_FLAG_ALREADY_LOGGED; + int rc; + + tmp_index = nvram_error_log_index; + + rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index); + if (rc <= 0) { + printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc); + return rc; + } + + return 0; +} + +#endif /* CONFIG_PPC_PSERIES */ + +module_init(nvram_init); +module_exit(nvram_cleanup); +MODULE_LICENSE("GPL"); diff --git a/arch/ppc64/kernel/of_device.c b/arch/ppc64/kernel/of_device.c new file mode 100644 index 00000000000..b27a75f1b98 --- /dev/null +++ b/arch/ppc64/kernel/of_device.c @@ -0,0 +1,272 @@ +#include +#include +#include +#include +#include +#include +#include + +/** + * of_match_device - Tell if an of_device structure has a matching + * of_match structure + * @ids: array of of device match structures to search in + * @dev: the of device structure to match against + * + * Used by a driver to check whether an of_device present in the + * system is in its list of supported devices. + */ +const struct of_match * of_match_device(const struct of_match *matches, + const struct of_device *dev) +{ + if (!dev->node) + return NULL; + while (matches->name || matches->type || matches->compatible) { + int match = 1; + if (matches->name && matches->name != OF_ANY_MATCH) + match &= dev->node->name + && !strcmp(matches->name, dev->node->name); + if (matches->type && matches->type != OF_ANY_MATCH) + match &= dev->node->type + && !strcmp(matches->type, dev->node->type); + if (matches->compatible && matches->compatible != OF_ANY_MATCH) + match &= device_is_compatible(dev->node, + matches->compatible); + if (match) + return matches; + matches++; + } + return NULL; +} + +static int of_platform_bus_match(struct device *dev, struct device_driver *drv) +{ + struct of_device * of_dev = to_of_device(dev); + struct of_platform_driver * of_drv = to_of_platform_driver(drv); + const struct of_match * matches = of_drv->match_table; + + if (!matches) + return 0; + + return of_match_device(matches, of_dev) != NULL; +} + +struct of_device *of_dev_get(struct of_device *dev) +{ + struct device *tmp; + + if (!dev) + return NULL; + tmp = get_device(&dev->dev); + if (tmp) + return to_of_device(tmp); + else + return NULL; +} + +void of_dev_put(struct of_device *dev) +{ + if (dev) + put_device(&dev->dev); +} + + +static int of_device_probe(struct device *dev) +{ + int error = -ENODEV; + struct of_platform_driver *drv; + struct of_device *of_dev; + const struct of_match *match; + + drv = to_of_platform_driver(dev->driver); + of_dev = to_of_device(dev); + + if (!drv->probe) + return error; + + of_dev_get(of_dev); + + match = of_match_device(drv->match_table, of_dev); + if (match) + error = drv->probe(of_dev, match); + if (error) + of_dev_put(of_dev); + + return error; +} + +static int of_device_remove(struct device *dev) +{ + struct of_device * of_dev = to_of_device(dev); + struct of_platform_driver * drv = to_of_platform_driver(dev->driver); + + if (dev->driver && drv->remove) + drv->remove(of_dev); + return 0; +} + +static int of_device_suspend(struct device *dev, u32 state) +{ + struct of_device * of_dev = to_of_device(dev); + struct of_platform_driver * drv = to_of_platform_driver(dev->driver); + int error = 0; + + if (dev->driver && drv->suspend) + error = drv->suspend(of_dev, state); + return error; +} + +static int of_device_resume(struct device * dev) +{ + struct of_device * of_dev = to_of_device(dev); + struct of_platform_driver * drv = to_of_platform_driver(dev->driver); + int error = 0; + + if (dev->driver && drv->resume) + error = drv->resume(of_dev); + return error; +} + +struct bus_type of_platform_bus_type = { + .name = "of_platform", + .match = of_platform_bus_match, + .suspend = of_device_suspend, + .resume = of_device_resume, +}; + +static int __init of_bus_driver_init(void) +{ + return bus_register(&of_platform_bus_type); +} + +postcore_initcall(of_bus_driver_init); + +int of_register_driver(struct of_platform_driver *drv) +{ + int count = 0; + + /* initialize common driver fields */ + drv->driver.name = drv->name; + drv->driver.bus = &of_platform_bus_type; + drv->driver.probe = of_device_probe; + drv->driver.remove = of_device_remove; + + /* register with core */ + count = driver_register(&drv->driver); + return count ? count : 1; +} + +void of_unregister_driver(struct of_platform_driver *drv) +{ + driver_unregister(&drv->driver); +} + + +static ssize_t dev_show_devspec(struct device *dev, char *buf) +{ + struct of_device *ofdev; + + ofdev = to_of_device(dev); + return sprintf(buf, "%s", ofdev->node->full_name); +} + +static DEVICE_ATTR(devspec, S_IRUGO, dev_show_devspec, NULL); + +/** + * of_release_dev - free an of device structure when all users of it are finished. + * @dev: device that's been disconnected + * + * Will be called only by the device core when all users of this of device are + * done. + */ +void of_release_dev(struct device *dev) +{ + struct of_device *ofdev; + + ofdev = to_of_device(dev); + kfree(ofdev); +} + +int of_device_register(struct of_device *ofdev) +{ + int rc; + struct of_device **odprop; + + BUG_ON(ofdev->node == NULL); + + odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL); + if (!odprop) { + struct property *new_prop; + + new_prop = kmalloc(sizeof(struct property) + sizeof(struct of_device *), + GFP_KERNEL); + if (new_prop == NULL) + return -ENOMEM; + new_prop->name = "linux,device"; + new_prop->length = sizeof(sizeof(struct of_device *)); + new_prop->value = (unsigned char *)&new_prop[1]; + odprop = (struct of_device **)new_prop->value; + *odprop = NULL; + prom_add_property(ofdev->node, new_prop); + } + *odprop = ofdev; + + rc = device_register(&ofdev->dev); + if (rc) + return rc; + + device_create_file(&ofdev->dev, &dev_attr_devspec); + + return 0; +} + +void of_device_unregister(struct of_device *ofdev) +{ + struct of_device **odprop; + + device_remove_file(&ofdev->dev, &dev_attr_devspec); + + odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL); + if (odprop) + *odprop = NULL; + + device_unregister(&ofdev->dev); +} + +struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id) +{ + struct of_device *dev; + u32 *reg; + + dev = kmalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return NULL; + memset(dev, 0, sizeof(*dev)); + + dev->node = np; + dev->dma_mask = 0xffffffffUL; + dev->dev.dma_mask = &dev->dma_mask; + dev->dev.parent = NULL; + dev->dev.bus = &of_platform_bus_type; + dev->dev.release = of_release_dev; + + reg = (u32 *)get_property(np, "reg", NULL); + strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE); + + if (of_device_register(dev) != 0) { + kfree(dev); + return NULL; + } + + return dev; +} + +EXPORT_SYMBOL(of_match_device); +EXPORT_SYMBOL(of_platform_bus_type); +EXPORT_SYMBOL(of_register_driver); +EXPORT_SYMBOL(of_unregister_driver); +EXPORT_SYMBOL(of_device_register); +EXPORT_SYMBOL(of_device_unregister); +EXPORT_SYMBOL(of_dev_get); +EXPORT_SYMBOL(of_dev_put); +EXPORT_SYMBOL(of_platform_device_create); +EXPORT_SYMBOL(of_release_dev); diff --git a/arch/ppc64/kernel/pSeries_hvCall.S b/arch/ppc64/kernel/pSeries_hvCall.S new file mode 100644 index 00000000000..0715d303801 --- /dev/null +++ b/arch/ppc64/kernel/pSeries_hvCall.S @@ -0,0 +1,123 @@ +/* + * arch/ppc64/kernel/pSeries_hvCall.S + * + * This file contains the generic code to perform a call to the + * pSeries LPAR hypervisor. + * NOTE: this file will go away when we move to inline this work. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +#define STK_PARM(i) (48 + ((i)-3)*8) + + .text + +/* long plpar_hcall(unsigned long opcode, R3 + unsigned long arg1, R4 + unsigned long arg2, R5 + unsigned long arg3, R6 + unsigned long arg4, R7 + unsigned long *out1, R8 + unsigned long *out2, R9 + unsigned long *out3); R10 + */ +_GLOBAL(plpar_hcall) + mfcr r0 + + std r8,STK_PARM(r8)(r1) /* Save out ptrs */ + std r9,STK_PARM(r9)(r1) + std r10,STK_PARM(r10)(r1) + + stw r0,8(r1) + + HVSC /* invoke the hypervisor */ + + lwz r0,8(r1) + + ld r8,STK_PARM(r8)(r1) /* Fetch r4-r6 ret args */ + ld r9,STK_PARM(r9)(r1) + ld r10,STK_PARM(r10)(r1) + std r4,0(r8) + std r5,0(r9) + std r6,0(r10) + + mtcrf 0xff,r0 + blr /* return r3 = status */ + + +/* Simple interface with no output values (other than status) */ +_GLOBAL(plpar_hcall_norets) + mfcr r0 + stw r0,8(r1) + + HVSC /* invoke the hypervisor */ + + lwz r0,8(r1) + mtcrf 0xff,r0 + blr /* return r3 = status */ + + +/* long plpar_hcall_8arg_2ret(unsigned long opcode, R3 + unsigned long arg1, R4 + unsigned long arg2, R5 + unsigned long arg3, R6 + unsigned long arg4, R7 + unsigned long arg5, R8 + unsigned long arg6, R9 + unsigned long arg7, R10 + unsigned long arg8, 112(R1) + unsigned long *out1); 120(R1) + */ +_GLOBAL(plpar_hcall_8arg_2ret) + mfcr r0 + ld r11,STK_PARM(r11)(r1) /* put arg8 in R11 */ + stw r0,8(r1) + + HVSC /* invoke the hypervisor */ + + lwz r0,8(r1) + ld r10,STK_PARM(r12)(r1) /* Fetch r4 ret arg */ + std r4,0(r10) + mtcrf 0xff,r0 + blr /* return r3 = status */ + + +/* long plpar_hcall_4out(unsigned long opcode, R3 + unsigned long arg1, R4 + unsigned long arg2, R5 + unsigned long arg3, R6 + unsigned long arg4, R7 + unsigned long *out1, R8 + unsigned long *out2, R9 + unsigned long *out3, R10 + unsigned long *out4); 112(R1) + */ +_GLOBAL(plpar_hcall_4out) + mfcr r0 + stw r0,8(r1) + + std r8,STK_PARM(r8)(r1) /* Save out ptrs */ + std r9,STK_PARM(r9)(r1) + std r10,STK_PARM(r10)(r1) + + HVSC /* invoke the hypervisor */ + + lwz r0,8(r1) + + ld r8,STK_PARM(r8)(r1) /* Fetch r4-r7 ret args */ + ld r9,STK_PARM(r9)(r1) + ld r10,STK_PARM(r10)(r1) + ld r11,STK_PARM(r11)(r1) + std r4,0(r8) + std r5,0(r9) + std r6,0(r10) + std r7,0(r11) + + mtcrf 0xff,r0 + blr /* return r3 = status */ diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c new file mode 100644 index 00000000000..69130522a87 --- /dev/null +++ b/arch/ppc64/kernel/pSeries_iommu.c @@ -0,0 +1,570 @@ +/* + * arch/ppc64/kernel/pSeries_iommu.c + * + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * + * Rewrite, cleanup: + * + * Copyright (C) 2004 Olof Johansson , IBM Corporation + * + * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pci.h" + +#define DBG(fmt...) + +extern int is_python(struct device_node *); + +static void tce_build_pSeries(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction) +{ + union tce_entry t; + union tce_entry *tp; + + t.te_word = 0; + t.te_rdwr = 1; // Read allowed + + if (direction != DMA_TO_DEVICE) + t.te_pciwr = 1; + + tp = ((union tce_entry *)tbl->it_base) + index; + + while (npages--) { + /* can't move this out since we might cross LMB boundary */ + t.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT; + + tp->te_word = t.te_word; + + uaddr += PAGE_SIZE; + tp++; + } +} + + +static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) +{ + union tce_entry t; + union tce_entry *tp; + + t.te_word = 0; + tp = ((union tce_entry *)tbl->it_base) + index; + + while (npages--) { + tp->te_word = t.te_word; + + tp++; + } +} + + +static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, + long npages, unsigned long uaddr, + enum dma_data_direction direction) +{ + u64 rc; + union tce_entry tce; + + tce.te_word = 0; + tce.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT; + tce.te_rdwr = 1; + if (direction != DMA_TO_DEVICE) + tce.te_pciwr = 1; + + while (npages--) { + rc = plpar_tce_put((u64)tbl->it_index, + (u64)tcenum << 12, + tce.te_word ); + + if (rc && printk_ratelimit()) { + printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\ttcenum = 0x%lx\n", (u64)tcenum); + printk("\ttce val = 0x%lx\n", tce.te_word ); + show_stack(current, (unsigned long *)__get_SP()); + } + + tcenum++; + tce.te_rpn++; + } +} + +static DEFINE_PER_CPU(void *, tce_page) = NULL; + +static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + long npages, unsigned long uaddr, + enum dma_data_direction direction) +{ + u64 rc; + union tce_entry tce, *tcep; + long l, limit; + + if (npages == 1) + return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + direction); + + tcep = __get_cpu_var(tce_page); + + /* This is safe to do since interrupts are off when we're called + * from iommu_alloc{,_sg}() + */ + if (!tcep) { + tcep = (void *)__get_free_page(GFP_ATOMIC); + /* If allocation fails, fall back to the loop implementation */ + if (!tcep) + return tce_build_pSeriesLP(tbl, tcenum, npages, + uaddr, direction); + __get_cpu_var(tce_page) = tcep; + } + + tce.te_word = 0; + tce.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT; + tce.te_rdwr = 1; + if (direction != DMA_TO_DEVICE) + tce.te_pciwr = 1; + + /* We can map max one pageful of TCEs at a time */ + do { + /* + * Set up the page with TCE data, looping through and setting + * the values. + */ + limit = min_t(long, npages, PAGE_SIZE/sizeof(union tce_entry)); + + for (l = 0; l < limit; l++) { + tcep[l] = tce; + tce.te_rpn++; + } + + rc = plpar_tce_put_indirect((u64)tbl->it_index, + (u64)tcenum << 12, + (u64)virt_to_abs(tcep), + limit); + + npages -= limit; + tcenum += limit; + } while (npages > 0 && !rc); + + if (rc && printk_ratelimit()) { + printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\tnpages = 0x%lx\n", (u64)npages); + printk("\ttce[0] val = 0x%lx\n", tcep[0].te_word); + show_stack(current, (unsigned long *)__get_SP()); + } +} + +static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +{ + u64 rc; + union tce_entry tce; + + tce.te_word = 0; + + while (npages--) { + rc = plpar_tce_put((u64)tbl->it_index, + (u64)tcenum << 12, + tce.te_word); + + if (rc && printk_ratelimit()) { + printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\ttcenum = 0x%lx\n", (u64)tcenum); + printk("\ttce val = 0x%lx\n", tce.te_word ); + show_stack(current, (unsigned long *)__get_SP()); + } + + tcenum++; + } +} + + +static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +{ + u64 rc; + union tce_entry tce; + + tce.te_word = 0; + + rc = plpar_tce_stuff((u64)tbl->it_index, + (u64)tcenum << 12, + tce.te_word, + npages); + + if (rc && printk_ratelimit()) { + printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); + printk("\trc = %ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\tnpages = 0x%lx\n", (u64)npages); + printk("\ttce val = 0x%lx\n", tce.te_word ); + show_stack(current, (unsigned long *)__get_SP()); + } +} + +static void iommu_table_setparms(struct pci_controller *phb, + struct device_node *dn, + struct iommu_table *tbl) +{ + struct device_node *node; + unsigned long *basep; + unsigned int *sizep; + + node = (struct device_node *)phb->arch_data; + + basep = (unsigned long *)get_property(node, "linux,tce-base", NULL); + sizep = (unsigned int *)get_property(node, "linux,tce-size", NULL); + if (basep == NULL || sizep == NULL) { + printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has " + "missing tce entries !\n", dn->full_name); + return; + } + + tbl->it_base = (unsigned long)__va(*basep); + memset((void *)tbl->it_base, 0, *sizep); + + tbl->it_busno = phb->bus->number; + + /* Units of tce entries */ + tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT; + + /* Test if we are going over 2GB of DMA space */ + if (phb->dma_window_base_cur + phb->dma_window_size > (1L << 31)) + panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); + + phb->dma_window_base_cur += phb->dma_window_size; + + /* Set the tce table size - measured in entries */ + tbl->it_size = phb->dma_window_size >> PAGE_SHIFT; + + tbl->it_index = 0; + tbl->it_blocksize = 16; + tbl->it_type = TCE_PCI; +} + +/* + * iommu_table_setparms_lpar + * + * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. + * + * ToDo: properly interpret the ibm,dma-window property. The definition is: + * logical-bus-number (1 word) + * phys-address (#address-cells words) + * size (#cell-size words) + * + * Currently we hard code these sizes (more or less). + */ +static void iommu_table_setparms_lpar(struct pci_controller *phb, + struct device_node *dn, + struct iommu_table *tbl, + unsigned int *dma_window) +{ + tbl->it_busno = dn->bussubno; + + /* TODO: Parse field size properties properly. */ + tbl->it_size = (((unsigned long)dma_window[4] << 32) | + (unsigned long)dma_window[5]) >> PAGE_SHIFT; + tbl->it_offset = (((unsigned long)dma_window[2] << 32) | + (unsigned long)dma_window[3]) >> PAGE_SHIFT; + tbl->it_base = 0; + tbl->it_index = dma_window[0]; + tbl->it_blocksize = 16; + tbl->it_type = TCE_PCI; +} + +static void iommu_bus_setup_pSeries(struct pci_bus *bus) +{ + struct device_node *dn, *pdn; + struct iommu_table *tbl; + + DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self); + + /* For each (root) bus, we carve up the available DMA space in 256MB + * pieces. Since each piece is used by one (sub) bus/device, that would + * give a maximum of 7 devices per PHB. In most cases, this is plenty. + * + * The exception is on Python PHBs (pre-POWER4). Here we don't have EADS + * bridges below the PHB to allocate the sectioned tables to, so instead + * we allocate a 1GB table at the PHB level. + */ + + dn = pci_bus_to_OF_node(bus); + + if (!bus->self) { + /* Root bus */ + if (is_python(dn)) { + unsigned int *iohole; + + DBG("Python root bus %s\n", bus->name); + + iohole = (unsigned int *)get_property(dn, "io-hole", 0); + + if (iohole) { + /* On first bus we need to leave room for the + * ISA address space. Just skip the first 256MB + * alltogether. This leaves 768MB for the window. + */ + DBG("PHB has io-hole, reserving 256MB\n"); + dn->phb->dma_window_size = 3 << 28; + dn->phb->dma_window_base_cur = 1 << 28; + } else { + /* 1GB window by default */ + dn->phb->dma_window_size = 1 << 30; + dn->phb->dma_window_base_cur = 0; + } + + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + iommu_table_setparms(dn->phb, dn, tbl); + dn->iommu_table = iommu_init_table(tbl); + } else { + /* Do a 128MB table at root. This is used for the IDE + * controller on some SMP-mode POWER4 machines. It + * doesn't hurt to allocate it on other machines + * -- it'll just be unused since new tables are + * allocated on the EADS level. + * + * Allocate at offset 128MB to avoid having to deal + * with ISA holes; 128MB table for IDE is plenty. + */ + dn->phb->dma_window_size = 1 << 27; + dn->phb->dma_window_base_cur = 1 << 27; + + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + iommu_table_setparms(dn->phb, dn, tbl); + dn->iommu_table = iommu_init_table(tbl); + + /* All child buses have 256MB tables */ + dn->phb->dma_window_size = 1 << 28; + } + } else { + pdn = pci_bus_to_OF_node(bus->parent); + + if (!bus->parent->self && !is_python(pdn)) { + struct iommu_table *tbl; + /* First child and not python means this is the EADS + * level. Allocate new table for this slot with 256MB + * window. + */ + + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + iommu_table_setparms(dn->phb, dn, tbl); + + dn->iommu_table = iommu_init_table(tbl); + } else { + /* Lower than first child or under python, use parent table */ + dn->iommu_table = pdn->iommu_table; + } + } +} + + +static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus) +{ + struct iommu_table *tbl; + struct device_node *dn, *pdn; + unsigned int *dma_window = NULL; + + DBG("iommu_bus_setup_pSeriesLP, bus %p, bus->self %p\n", bus, bus->self); + + dn = pci_bus_to_OF_node(bus); + + /* Find nearest ibm,dma-window, walking up the device tree */ + for (pdn = dn; pdn != NULL; pdn = pdn->parent) { + dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL); + if (dma_window != NULL) + break; + } + + if (dma_window == NULL) { + DBG("iommu_bus_setup_pSeriesLP: bus %s seems to have no ibm,dma-window property\n", dn->full_name); + return; + } + + if (!pdn->iommu_table) { + /* Bussubno hasn't been copied yet. + * Do it now because iommu_table_setparms_lpar needs it. + */ + pdn->bussubno = bus->number; + + tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), + GFP_KERNEL); + + iommu_table_setparms_lpar(pdn->phb, pdn, tbl, dma_window); + + pdn->iommu_table = iommu_init_table(tbl); + } + + if (pdn != dn) + dn->iommu_table = pdn->iommu_table; +} + + +static void iommu_dev_setup_pSeries(struct pci_dev *dev) +{ + struct device_node *dn, *mydn; + + DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, dev->pretty_name); + /* Now copy the iommu_table ptr from the bus device down to the + * pci device_node. This means get_iommu_table() won't need to search + * up the device tree to find it. + */ + mydn = dn = pci_device_to_OF_node(dev); + + while (dn && dn->iommu_table == NULL) + dn = dn->parent; + + if (dn) { + mydn->iommu_table = dn->iommu_table; + } else { + DBG("iommu_dev_setup_pSeries, dev %p (%s) has no iommu table\n", dev, dev->pretty_name); + } +} + +static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) +{ + int err = NOTIFY_OK; + struct device_node *np = node; + + switch (action) { + case PSERIES_RECONFIG_REMOVE: + if (np->iommu_table && + get_property(np, "ibm,dma-window", NULL)) + iommu_free_table(np); + break; + default: + err = NOTIFY_DONE; + break; + } + return err; +} + +static struct notifier_block iommu_reconfig_nb = { + .notifier_call = iommu_reconfig_notifier, +}; + +static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev) +{ + struct device_node *pdn, *dn; + struct iommu_table *tbl; + int *dma_window = NULL; + + DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, dev->pretty_name); + + /* dev setup for LPAR is a little tricky, since the device tree might + * contain the dma-window properties per-device and not neccesarily + * for the bus. So we need to search upwards in the tree until we + * either hit a dma-window property, OR find a parent with a table + * already allocated. + */ + dn = pci_device_to_OF_node(dev); + + for (pdn = dn; pdn && !pdn->iommu_table; pdn = pdn->parent) { + dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL); + if (dma_window) + break; + } + + /* Check for parent == NULL so we don't try to setup the empty EADS + * slots on POWER4 machines. + */ + if (dma_window == NULL || pdn->parent == NULL) { + /* Fall back to regular (non-LPAR) dev setup */ + DBG("No dma window for device, falling back to regular setup\n"); + iommu_dev_setup_pSeries(dev); + return; + } else { + DBG("Found DMA window, allocating table\n"); + } + + if (!pdn->iommu_table) { + /* iommu_table_setparms_lpar needs bussubno. */ + pdn->bussubno = pdn->phb->bus->number; + + tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), + GFP_KERNEL); + + iommu_table_setparms_lpar(pdn->phb, pdn, tbl, dma_window); + + pdn->iommu_table = iommu_init_table(tbl); + } + + if (pdn != dn) + dn->iommu_table = pdn->iommu_table; +} + +static void iommu_bus_setup_null(struct pci_bus *b) { } +static void iommu_dev_setup_null(struct pci_dev *d) { } + +/* These are called very early. */ +void iommu_init_early_pSeries(void) +{ + if (of_chosen && get_property(of_chosen, "linux,iommu-off", NULL)) { + /* Direct I/O, IOMMU off */ + ppc_md.iommu_dev_setup = iommu_dev_setup_null; + ppc_md.iommu_bus_setup = iommu_bus_setup_null; + pci_direct_iommu_init(); + + return; + } + + if (systemcfg->platform & PLATFORM_LPAR) { + if (cur_cpu_spec->firmware_features & FW_FEATURE_MULTITCE) { + ppc_md.tce_build = tce_buildmulti_pSeriesLP; + ppc_md.tce_free = tce_freemulti_pSeriesLP; + } else { + ppc_md.tce_build = tce_build_pSeriesLP; + ppc_md.tce_free = tce_free_pSeriesLP; + } + ppc_md.iommu_bus_setup = iommu_bus_setup_pSeriesLP; + ppc_md.iommu_dev_setup = iommu_dev_setup_pSeriesLP; + } else { + ppc_md.tce_build = tce_build_pSeries; + ppc_md.tce_free = tce_free_pSeries; + ppc_md.iommu_bus_setup = iommu_bus_setup_pSeries; + ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries; + } + + + pSeries_reconfig_notifier_register(&iommu_reconfig_nb); + + pci_iommu_init(); +} + diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c new file mode 100644 index 00000000000..6534812db43 --- /dev/null +++ b/arch/ppc64/kernel/pSeries_lpar.c @@ -0,0 +1,531 @@ +/* + * pSeries_lpar.c + * Copyright (C) 2001 Todd Inglett, IBM Corporation + * + * pSeries LPAR support. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* in pSeries_hvCall.S */ +EXPORT_SYMBOL(plpar_hcall); +EXPORT_SYMBOL(plpar_hcall_4out); +EXPORT_SYMBOL(plpar_hcall_norets); +EXPORT_SYMBOL(plpar_hcall_8arg_2ret); + +extern void fw_feature_init(void); +extern void pSeries_find_serial_port(void); + + +int vtermno; /* virtual terminal# for udbg */ + +#define __ALIGNED__ __attribute__((__aligned__(sizeof(long)))) +static void udbg_hvsi_putc(unsigned char c) +{ + /* packet's seqno isn't used anyways */ + uint8_t packet[] __ALIGNED__ = { 0xff, 5, 0, 0, c }; + int rc; + + if (c == '\n') + udbg_hvsi_putc('\r'); + + do { + rc = plpar_put_term_char(vtermno, sizeof(packet), packet); + } while (rc == H_Busy); +} + +static long hvsi_udbg_buf_len; +static uint8_t hvsi_udbg_buf[256]; + +static int udbg_hvsi_getc_poll(void) +{ + unsigned char ch; + int rc, i; + + if (hvsi_udbg_buf_len == 0) { + rc = plpar_get_term_char(vtermno, &hvsi_udbg_buf_len, hvsi_udbg_buf); + if (rc != H_Success || hvsi_udbg_buf[0] != 0xff) { + /* bad read or non-data packet */ + hvsi_udbg_buf_len = 0; + } else { + /* remove the packet header */ + for (i = 4; i < hvsi_udbg_buf_len; i++) + hvsi_udbg_buf[i-4] = hvsi_udbg_buf[i]; + hvsi_udbg_buf_len -= 4; + } + } + + if (hvsi_udbg_buf_len <= 0 || hvsi_udbg_buf_len > 256) { + /* no data ready */ + hvsi_udbg_buf_len = 0; + return -1; + } + + ch = hvsi_udbg_buf[0]; + /* shift remaining data down */ + for (i = 1; i < hvsi_udbg_buf_len; i++) { + hvsi_udbg_buf[i-1] = hvsi_udbg_buf[i]; + } + hvsi_udbg_buf_len--; + + return ch; +} + +static unsigned char udbg_hvsi_getc(void) +{ + int ch; + for (;;) { + ch = udbg_hvsi_getc_poll(); + if (ch == -1) { + /* This shouldn't be needed...but... */ + volatile unsigned long delay; + for (delay=0; delay < 2000000; delay++) + ; + } else { + return ch; + } + } +} + +static void udbg_putcLP(unsigned char c) +{ + char buf[16]; + unsigned long rc; + + if (c == '\n') + udbg_putcLP('\r'); + + buf[0] = c; + do { + rc = plpar_put_term_char(vtermno, 1, buf); + } while(rc == H_Busy); +} + +/* Buffered chars getc */ +static long inbuflen; +static long inbuf[2]; /* must be 2 longs */ + +static int udbg_getc_pollLP(void) +{ + /* The interface is tricky because it may return up to 16 chars. + * We save them statically for future calls to udbg_getc(). + */ + char ch, *buf = (char *)inbuf; + int i; + long rc; + if (inbuflen == 0) { + /* get some more chars. */ + inbuflen = 0; + rc = plpar_get_term_char(vtermno, &inbuflen, buf); + if (rc != H_Success) + inbuflen = 0; /* otherwise inbuflen is garbage */ + } + if (inbuflen <= 0 || inbuflen > 16) { + /* Catch error case as well as other oddities (corruption) */ + inbuflen = 0; + return -1; + } + ch = buf[0]; + for (i = 1; i < inbuflen; i++) /* shuffle them down. */ + buf[i-1] = buf[i]; + inbuflen--; + return ch; +} + +static unsigned char udbg_getcLP(void) +{ + int ch; + for (;;) { + ch = udbg_getc_pollLP(); + if (ch == -1) { + /* This shouldn't be needed...but... */ + volatile unsigned long delay; + for (delay=0; delay < 2000000; delay++) + ; + } else { + return ch; + } + } +} + +/* call this from early_init() for a working debug console on + * vterm capable LPAR machines + */ +void udbg_init_debug_lpar(void) +{ + vtermno = 0; + ppc_md.udbg_putc = udbg_putcLP; + ppc_md.udbg_getc = udbg_getcLP; + ppc_md.udbg_getc_poll = udbg_getc_pollLP; +} + +/* returns 0 if couldn't find or use /chosen/stdout as console */ +int find_udbg_vterm(void) +{ + struct device_node *stdout_node; + u32 *termno; + char *name; + int found = 0; + + /* find the boot console from /chosen/stdout */ + if (!of_chosen) + return 0; + name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + if (name == NULL) + return 0; + stdout_node = of_find_node_by_path(name); + if (!stdout_node) + return 0; + + /* now we have the stdout node; figure out what type of device it is. */ + name = (char *)get_property(stdout_node, "name", NULL); + if (!name) { + printk(KERN_WARNING "stdout node missing 'name' property!\n"); + goto out; + } + + if (strncmp(name, "vty", 3) == 0) { + if (device_is_compatible(stdout_node, "hvterm1")) { + termno = (u32 *)get_property(stdout_node, "reg", NULL); + if (termno) { + vtermno = termno[0]; + ppc_md.udbg_putc = udbg_putcLP; + ppc_md.udbg_getc = udbg_getcLP; + ppc_md.udbg_getc_poll = udbg_getc_pollLP; + found = 1; + } + } else if (device_is_compatible(stdout_node, "hvterm-protocol")) { + termno = (u32 *)get_property(stdout_node, "reg", NULL); + if (termno) { + vtermno = termno[0]; + ppc_md.udbg_putc = udbg_hvsi_putc; + ppc_md.udbg_getc = udbg_hvsi_getc; + ppc_md.udbg_getc_poll = udbg_hvsi_getc_poll; + found = 1; + } + } + } else if (strncmp(name, "serial", 6)) { + /* XXX fix ISA serial console */ + printk(KERN_WARNING "serial stdout on LPAR ('%s')! " + "can't print udbg messages\n", + stdout_node->full_name); + } else { + printk(KERN_WARNING "don't know how to print to stdout '%s'\n", + stdout_node->full_name); + } + +out: + of_node_put(stdout_node); + return found; +} + +void vpa_init(int cpu) +{ + int hwcpu = get_hard_smp_processor_id(cpu); + unsigned long vpa = (unsigned long)&(paca[cpu].lppaca); + long ret; + unsigned long flags; + + /* Register the Virtual Processor Area (VPA) */ + flags = 1UL << (63 - 18); + ret = register_vpa(flags, hwcpu, __pa(vpa)); + + if (ret) + printk(KERN_ERR "WARNING: vpa_init: VPA registration for " + "cpu %d (hw %d) of area %lx returns %ld\n", + cpu, hwcpu, __pa(vpa), ret); +} + +long pSeries_lpar_hpte_insert(unsigned long hpte_group, + unsigned long va, unsigned long prpn, + int secondary, unsigned long hpteflags, + int bolted, int large) +{ + unsigned long arpn = physRpn_to_absRpn(prpn); + unsigned long lpar_rc; + unsigned long flags; + unsigned long slot; + HPTE lhpte; + unsigned long dummy0, dummy1; + + /* Fill in the local HPTE with absolute rpn, avpn and flags */ + lhpte.dw1.dword1 = 0; + lhpte.dw1.dw1.rpn = arpn; + lhpte.dw1.flags.flags = hpteflags; + + lhpte.dw0.dword0 = 0; + lhpte.dw0.dw0.avpn = va >> 23; + lhpte.dw0.dw0.h = secondary; + lhpte.dw0.dw0.bolted = bolted; + lhpte.dw0.dw0.v = 1; + + if (large) { + lhpte.dw0.dw0.l = 1; + lhpte.dw0.dw0.avpn &= ~0x1UL; + } + + /* Now fill in the actual HPTE */ + /* Set CEC cookie to 0 */ + /* Zero page = 0 */ + /* I-cache Invalidate = 0 */ + /* I-cache synchronize = 0 */ + /* Exact = 0 */ + flags = 0; + + /* XXX why is this here? - Anton */ + if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE)) + lhpte.dw1.flags.flags &= ~_PAGE_COHERENT; + + lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, lhpte.dw0.dword0, + lhpte.dw1.dword1, &slot, &dummy0, &dummy1); + + if (unlikely(lpar_rc == H_PTEG_Full)) + return -1; + + /* + * Since we try and ioremap PHBs we don't own, the pte insert + * will fail. However we must catch the failure in hash_page + * or we will loop forever, so return -2 in this case. + */ + if (unlikely(lpar_rc != H_Success)) + return -2; + + /* Because of iSeries, we have to pass down the secondary + * bucket bit here as well + */ + return (slot & 7) | (secondary << 3); +} + +static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock); + +static long pSeries_lpar_hpte_remove(unsigned long hpte_group) +{ + unsigned long slot_offset; + unsigned long lpar_rc; + int i; + unsigned long dummy1, dummy2; + + /* pick a random slot to start at */ + slot_offset = mftb() & 0x7; + + for (i = 0; i < HPTES_PER_GROUP; i++) { + + /* don't remove a bolted entry */ + lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, + (0x1UL << 4), &dummy1, &dummy2); + + if (lpar_rc == H_Success) + return i; + + BUG_ON(lpar_rc != H_Not_Found); + + slot_offset++; + slot_offset &= 0x7; + } + + return -1; +} + +static void pSeries_lpar_hptab_clear(void) +{ + unsigned long size_bytes = 1UL << ppc64_pft_size; + unsigned long hpte_count = size_bytes >> 4; + unsigned long dummy1, dummy2; + int i; + + /* TODO: Use bulk call */ + for (i = 0; i < hpte_count; i++) + plpar_pte_remove(0, i, 0, &dummy1, &dummy2); +} + +/* + * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and + * the low 3 bits of flags happen to line up. So no transform is needed. + * We can probably optimize here and assume the high bits of newpp are + * already zero. For now I am paranoid. + */ +static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long va, int large, int local) +{ + unsigned long lpar_rc; + unsigned long flags = (newpp & 7) | H_AVPN; + unsigned long avpn = va >> 23; + + if (large) + avpn &= ~0x1UL; + + lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7)); + + if (lpar_rc == H_Not_Found) + return -1; + + BUG_ON(lpar_rc != H_Success); + + return 0; +} + +static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot) +{ + unsigned long dword0; + unsigned long lpar_rc; + unsigned long dummy_word1; + unsigned long flags; + + /* Read 1 pte at a time */ + /* Do not need RPN to logical page translation */ + /* No cross CEC PFT access */ + flags = 0; + + lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1); + + BUG_ON(lpar_rc != H_Success); + + return dword0; +} + +static long pSeries_lpar_hpte_find(unsigned long vpn) +{ + unsigned long hash; + unsigned long i, j; + long slot; + union { + unsigned long dword0; + Hpte_dword0 dw0; + } hpte_dw0; + Hpte_dword0 dw0; + + hash = hpt_hash(vpn, 0); + + for (j = 0; j < 2; j++) { + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + for (i = 0; i < HPTES_PER_GROUP; i++) { + hpte_dw0.dword0 = pSeries_lpar_hpte_getword0(slot); + dw0 = hpte_dw0.dw0; + + if ((dw0.avpn == (vpn >> 11)) && dw0.v && + (dw0.h == j)) { + /* HPTE matches */ + if (j) + slot = -slot; + return slot; + } + ++slot; + } + hash = ~hash; + } + + return -1; +} + +static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, + unsigned long ea) +{ + unsigned long lpar_rc; + unsigned long vsid, va, vpn, flags; + long slot; + + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0x0fffffff); + vpn = va >> PAGE_SHIFT; + + slot = pSeries_lpar_hpte_find(vpn); + BUG_ON(slot == -1); + + flags = newpp & 7; + lpar_rc = plpar_pte_protect(flags, slot, 0); + + BUG_ON(lpar_rc != H_Success); +} + +static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, + int large, int local) +{ + unsigned long avpn = va >> 23; + unsigned long lpar_rc; + unsigned long dummy1, dummy2; + + if (large) + avpn &= ~0x1UL; + + lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1, + &dummy2); + + if (lpar_rc == H_Not_Found) + return; + + BUG_ON(lpar_rc != H_Success); +} + +/* + * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie + * lock. + */ +void pSeries_lpar_flush_hash_range(unsigned long context, unsigned long number, + int local) +{ + int i; + unsigned long flags = 0; + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); + + for (i = 0; i < number; i++) + flush_hash_page(context, batch->addr[i], batch->pte[i], local); + + if (lock_tlbie) + spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); +} + +void hpte_init_lpar(void) +{ + ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate; + ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp; + ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; + ppc_md.hpte_insert = pSeries_lpar_hpte_insert; + ppc_md.hpte_remove = pSeries_lpar_hpte_remove; + ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; + ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; + + htab_finish_init(); +} diff --git a/arch/ppc64/kernel/pSeries_nvram.c b/arch/ppc64/kernel/pSeries_nvram.c new file mode 100644 index 00000000000..18abfb1f4e2 --- /dev/null +++ b/arch/ppc64/kernel/pSeries_nvram.c @@ -0,0 +1,148 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * /dev/nvram driver for PPC64 + * + * This perhaps should live in drivers/char + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nvram_size; +static int nvram_fetch, nvram_store; +static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ +static DEFINE_SPINLOCK(nvram_lock); + + +static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) +{ + unsigned int i; + unsigned long len; + int done; + unsigned long flags; + char *p = buf; + + + if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE) + return -ENODEV; + + if (*index >= nvram_size) + return 0; + + i = *index; + if (i + count > nvram_size) + count = nvram_size - i; + + spin_lock_irqsave(&nvram_lock, flags); + + for (; count != 0; count -= len) { + len = count; + if (len > NVRW_CNT) + len = NVRW_CNT; + + if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf), + len) != 0) || len != done) { + spin_unlock_irqrestore(&nvram_lock, flags); + return -EIO; + } + + memcpy(p, nvram_buf, len); + + p += len; + i += len; + } + + spin_unlock_irqrestore(&nvram_lock, flags); + + *index = i; + return p - buf; +} + +static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index) +{ + unsigned int i; + unsigned long len; + int done; + unsigned long flags; + const char *p = buf; + + if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE) + return -ENODEV; + + if (*index >= nvram_size) + return 0; + + i = *index; + if (i + count > nvram_size) + count = nvram_size - i; + + spin_lock_irqsave(&nvram_lock, flags); + + for (; count != 0; count -= len) { + len = count; + if (len > NVRW_CNT) + len = NVRW_CNT; + + memcpy(nvram_buf, p, len); + + if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf), + len) != 0) || len != done) { + spin_unlock_irqrestore(&nvram_lock, flags); + return -EIO; + } + + p += len; + i += len; + } + spin_unlock_irqrestore(&nvram_lock, flags); + + *index = i; + return p - buf; +} + +static ssize_t pSeries_nvram_get_size(void) +{ + return nvram_size ? nvram_size : -ENODEV; +} + +int __init pSeries_nvram_init(void) +{ + struct device_node *nvram; + unsigned int *nbytes_p, proplen; + + nvram = of_find_node_by_type(NULL, "nvram"); + if (nvram == NULL) + return -ENODEV; + + nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen); + if (nbytes_p == NULL || proplen != sizeof(unsigned int)) + return -EIO; + + nvram_size = *nbytes_p; + + nvram_fetch = rtas_token("nvram-fetch"); + nvram_store = rtas_token("nvram-store"); + printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size); + of_node_put(nvram); + + ppc_md.nvram_read = pSeries_nvram_read; + ppc_md.nvram_write = pSeries_nvram_write; + ppc_md.nvram_size = pSeries_nvram_get_size; + + return 0; +} diff --git a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c new file mode 100644 index 00000000000..0b1cca28140 --- /dev/null +++ b/arch/ppc64/kernel/pSeries_pci.c @@ -0,0 +1,602 @@ +/* + * pSeries_pci.c + * + * Copyright (C) 2001 Dave Engebretsen, IBM Corporation + * Copyright (C) 2003 Anton Blanchard , IBM + * + * pSeries specific routines for PCI. + * + * Based on code from pci.c and chrp_pci.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpic.h" +#include "pci.h" + +/* RTAS tokens */ +static int read_pci_config; +static int write_pci_config; +static int ibm_read_pci_config; +static int ibm_write_pci_config; + +static int s7a_workaround; + +extern struct mpic *pSeries_mpic; + +static int config_access_valid(struct device_node *dn, int where) +{ + if (where < 256) + return 1; + if (where < 4096 && dn->pci_ext_config_space) + return 1; + + return 0; +} + +static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val) +{ + int returnval = -1; + unsigned long buid, addr; + int ret; + + if (!dn) + return PCIBIOS_DEVICE_NOT_FOUND; + if (!config_access_valid(dn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = ((where & 0xf00) << 20) | (dn->busno << 16) | + (dn->devfn << 8) | (where & 0xff); + buid = dn->phb->buid; + if (buid) { + ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval, + addr, buid >> 32, buid & 0xffffffff, size); + } else { + ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size); + } + *val = returnval; + + if (ret) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (returnval == EEH_IO_ERROR_VALUE(size) + && eeh_dn_check_failure (dn, NULL)) + return PCIBIOS_DEVICE_NOT_FOUND; + + return PCIBIOS_SUCCESSFUL; +} + +static int rtas_pci_read_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 *val) +{ + struct device_node *busdn, *dn; + + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = bus->sysdata; /* must be a phb */ + + /* Search only direct children of the bus */ + for (dn = busdn->child; dn; dn = dn->sibling) + if (dn->devfn == devfn) + return rtas_read_config(dn, where, size, val); + return PCIBIOS_DEVICE_NOT_FOUND; +} + +static int rtas_write_config(struct device_node *dn, int where, int size, u32 val) +{ + unsigned long buid, addr; + int ret; + + if (!dn) + return PCIBIOS_DEVICE_NOT_FOUND; + if (!config_access_valid(dn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = ((where & 0xf00) << 20) | (dn->busno << 16) | + (dn->devfn << 8) | (where & 0xff); + buid = dn->phb->buid; + if (buid) { + ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr, buid >> 32, buid & 0xffffffff, size, (ulong) val); + } else { + ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val); + } + + if (ret) + return PCIBIOS_DEVICE_NOT_FOUND; + + return PCIBIOS_SUCCESSFUL; +} + +static int rtas_pci_write_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 val) +{ + struct device_node *busdn, *dn; + + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = bus->sysdata; /* must be a phb */ + + /* Search only direct children of the bus */ + for (dn = busdn->child; dn; dn = dn->sibling) + if (dn->devfn == devfn) + return rtas_write_config(dn, where, size, val); + return PCIBIOS_DEVICE_NOT_FOUND; +} + +struct pci_ops rtas_pci_ops = { + rtas_pci_read_config, + rtas_pci_write_config +}; + +int is_python(struct device_node *dev) +{ + char *model = (char *)get_property(dev, "model", NULL); + + if (model && strstr(model, "Python")) + return 1; + + return 0; +} + +static int get_phb_reg_prop(struct device_node *dev, + unsigned int addr_size_words, + struct reg_property64 *reg) +{ + unsigned int *ui_ptr = NULL, len; + + /* Found a PHB, now figure out where his registers are mapped. */ + ui_ptr = (unsigned int *)get_property(dev, "reg", &len); + if (ui_ptr == NULL) + return 1; + + if (addr_size_words == 1) { + reg->address = ((struct reg_property32 *)ui_ptr)->address; + reg->size = ((struct reg_property32 *)ui_ptr)->size; + } else { + *reg = *((struct reg_property64 *)ui_ptr); + } + + return 0; +} + +static void python_countermeasures(struct device_node *dev, + unsigned int addr_size_words) +{ + struct reg_property64 reg_struct; + void __iomem *chip_regs; + volatile u32 val; + + if (get_phb_reg_prop(dev, addr_size_words, ®_struct)) + return; + + /* Python's register file is 1 MB in size. */ + chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000); + + /* + * Firmware doesn't always clear this bit which is critical + * for good performance - Anton + */ + +#define PRG_CL_RESET_VALID 0x00010000 + + val = in_be32(chip_regs + 0xf6030); + if (val & PRG_CL_RESET_VALID) { + printk(KERN_INFO "Python workaround: "); + val &= ~PRG_CL_RESET_VALID; + out_be32(chip_regs + 0xf6030, val); + /* + * We must read it back for changes to + * take effect + */ + val = in_be32(chip_regs + 0xf6030); + printk("reg0: %x\n", val); + } + + iounmap(chip_regs); +} + +void __init init_pci_config_tokens (void) +{ + read_pci_config = rtas_token("read-pci-config"); + write_pci_config = rtas_token("write-pci-config"); + ibm_read_pci_config = rtas_token("ibm,read-pci-config"); + ibm_write_pci_config = rtas_token("ibm,write-pci-config"); +} + +unsigned long __devinit get_phb_buid (struct device_node *phb) +{ + int addr_cells; + unsigned int *buid_vals; + unsigned int len; + unsigned long buid; + + if (ibm_read_pci_config == -1) return 0; + + /* PHB's will always be children of the root node, + * or so it is promised by the current firmware. */ + if (phb->parent == NULL) + return 0; + if (phb->parent->parent) + return 0; + + buid_vals = (unsigned int *) get_property(phb, "reg", &len); + if (buid_vals == NULL) + return 0; + + addr_cells = prom_n_addr_cells(phb); + if (addr_cells == 1) { + buid = (unsigned long) buid_vals[0]; + } else { + buid = (((unsigned long)buid_vals[0]) << 32UL) | + (((unsigned long)buid_vals[1]) & 0xffffffff); + } + return buid; +} + +static int phb_set_bus_ranges(struct device_node *dev, + struct pci_controller *phb) +{ + int *bus_range; + unsigned int len; + + bus_range = (int *) get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + return 1; + } + + phb->first_busno = bus_range[0]; + phb->last_busno = bus_range[1]; + + return 0; +} + +static int __devinit setup_phb(struct device_node *dev, + struct pci_controller *phb, + unsigned int addr_size_words) +{ + pci_setup_pci_controller(phb); + + if (is_python(dev)) + python_countermeasures(dev, addr_size_words); + + if (phb_set_bus_ranges(dev, phb)) + return 1; + + phb->arch_data = dev; + phb->ops = &rtas_pci_ops; + phb->buid = get_phb_buid(dev); + + return 0; +} + +static void __devinit add_linux_pci_domain(struct device_node *dev, + struct pci_controller *phb, + struct property *of_prop) +{ + memset(of_prop, 0, sizeof(struct property)); + of_prop->name = "linux,pci-domain"; + of_prop->length = sizeof(phb->global_number); + of_prop->value = (unsigned char *)&of_prop[1]; + memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number)); + prom_add_property(dev, of_prop); +} + +static struct pci_controller * __init alloc_phb(struct device_node *dev, + unsigned int addr_size_words) +{ + struct pci_controller *phb; + struct property *of_prop; + + phb = alloc_bootmem(sizeof(struct pci_controller)); + if (phb == NULL) + return NULL; + + of_prop = alloc_bootmem(sizeof(struct property) + + sizeof(phb->global_number)); + if (!of_prop) + return NULL; + + if (setup_phb(dev, phb, addr_size_words)) + return NULL; + + add_linux_pci_domain(dev, phb, of_prop); + + return phb; +} + +static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words) +{ + struct pci_controller *phb; + + phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), + GFP_KERNEL); + if (phb == NULL) + return NULL; + + if (setup_phb(dev, phb, addr_size_words)) + return NULL; + + phb->is_dynamic = 1; + + /* TODO: linux,pci-domain? */ + + return phb; +} + +unsigned long __init find_and_init_phbs(void) +{ + struct device_node *node; + struct pci_controller *phb; + unsigned int root_size_cells = 0; + unsigned int index; + unsigned int *opprop = NULL; + struct device_node *root = of_find_node_by_path("/"); + + if (ppc64_interrupt_controller == IC_OPEN_PIC) { + opprop = (unsigned int *)get_property(root, + "platform-open-pic", NULL); + } + + root_size_cells = prom_n_size_cells(root); + + index = 0; + + for (node = of_get_next_child(root, NULL); + node != NULL; + node = of_get_next_child(root, node)) { + if (node->type == NULL || strcmp(node->type, "pci") != 0) + continue; + + phb = alloc_phb(node, root_size_cells); + if (!phb) + continue; + + pci_process_bridge_OF_ranges(phb, node); + pci_setup_phb_io(phb, index == 0); + + if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) { + int addr = root_size_cells * (index + 2) - 1; + mpic_assign_isu(pSeries_mpic, index, opprop[addr]); + } + + index++; + } + + of_node_put(root); + pci_devs_phb_init(); + + /* + * pci_probe_only and pci_assign_all_buses can be set via properties + * in chosen. + */ + if (of_chosen) { + int *prop; + + prop = (int *)get_property(of_chosen, "linux,pci-probe-only", + NULL); + if (prop) + pci_probe_only = *prop; + + prop = (int *)get_property(of_chosen, + "linux,pci-assign-all-buses", NULL); + if (prop) + pci_assign_all_buses = *prop; + } + + return 0; +} + +struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) +{ + struct device_node *root = of_find_node_by_path("/"); + unsigned int root_size_cells = 0; + struct pci_controller *phb; + struct pci_bus *bus; + int primary; + + root_size_cells = prom_n_size_cells(root); + + primary = list_empty(&hose_list); + phb = alloc_phb_dynamic(dn, root_size_cells); + if (!phb) + return NULL; + + pci_process_bridge_OF_ranges(phb, dn); + + pci_setup_phb_io_dynamic(phb, primary); + of_node_put(root); + + pci_devs_phb_init_dynamic(phb); + phb->last_busno = 0xff; + bus = pci_scan_bus(phb->first_busno, phb->ops, phb->arch_data); + phb->bus = bus; + phb->last_busno = bus->subordinate; + + return phb; +} +EXPORT_SYMBOL(init_phb_dynamic); + +#if 0 +void pcibios_name_device(struct pci_dev *dev) +{ + struct device_node *dn; + + /* + * Add IBM loc code (slot) as a prefix to the device names for service + */ + dn = pci_device_to_OF_node(dev); + if (dn) { + char *loc_code = get_property(dn, "ibm,loc-code", 0); + if (loc_code) { + int loc_len = strlen(loc_code); + if (loc_len < sizeof(dev->dev.name)) { + memmove(dev->dev.name+loc_len+1, dev->dev.name, + sizeof(dev->dev.name)-loc_len-1); + memcpy(dev->dev.name, loc_code, loc_len); + dev->dev.name[loc_len] = ' '; + dev->dev.name[sizeof(dev->dev.name)-1] = '\0'; + } + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); +#endif + +static void check_s7a(void) +{ + struct device_node *root; + char *model; + + root = of_find_node_by_path("/"); + if (root) { + model = get_property(root, "model", NULL); + if (model && !strcmp(model, "IBM,7013-S7A")) + s7a_workaround = 1; + of_node_put(root); + } +} + +/* RPA-specific bits for removing PHBs */ +int pcibios_remove_root_bus(struct pci_controller *phb) +{ + struct pci_bus *b = phb->bus; + struct resource *res; + int rc, i; + + res = b->resource[0]; + if (!res->flags) { + printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__, + b->name); + return 1; + } + + rc = unmap_bus_range(b); + if (rc) { + printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", + __FUNCTION__, b->name); + return 1; + } + + if (release_resource(res)) { + printk(KERN_ERR "%s: failed to release IO on bus %s\n", + __FUNCTION__, b->name); + return 1; + } + + for (i = 1; i < 3; ++i) { + res = b->resource[i]; + if (!res->flags && i == 0) { + printk(KERN_ERR "%s: no MEM resource for PHB %s\n", + __FUNCTION__, b->name); + return 1; + } + if (res->flags && release_resource(res)) { + printk(KERN_ERR + "%s: failed to release IO %d on bus %s\n", + __FUNCTION__, i, b->name); + return 1; + } + } + + list_del(&phb->list_node); + if (phb->is_dynamic) + kfree(phb); + + return 0; +} +EXPORT_SYMBOL(pcibios_remove_root_bus); + +static void __init pSeries_request_regions(void) +{ + if (!isa_io_base) + return; + + request_region(0x20,0x20,"pic1"); + request_region(0xa0,0x20,"pic2"); + request_region(0x00,0x20,"dma1"); + request_region(0x40,0x20,"timer"); + request_region(0x80,0x10,"dma page reg"); + request_region(0xc0,0x20,"dma2"); +} + +void __init pSeries_final_fixup(void) +{ + struct pci_dev *dev = NULL; + + check_s7a(); + + for_each_pci_dev(dev) { + pci_read_irq_line(dev); + if (s7a_workaround) { + if (dev->irq > 16) { + dev->irq -= 3; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); + } + } + } + + phbs_remap_io(); + pSeries_request_regions(); + + pci_addr_cache_build(); +} + +/* + * Assume the winbond 82c105 is the IDE controller on a + * p610. We should probably be more careful in case + * someone tries to plug in a similar adapter. + */ +static void fixup_winbond_82c105(struct pci_dev* dev) +{ + int i; + unsigned int reg; + + if (!(systemcfg->platform & PLATFORM_PSERIES)) + return; + + printk("Using INTC for W82c105 IDE controller.\n"); + pci_read_config_dword(dev, 0x40, ®); + /* Enable LEGIRQ to use INTC instead of ISA interrupts */ + pci_write_config_dword(dev, 0x40, reg | (1<<11)); + + for (i = 0; i < DEVICE_COUNT_RESOURCE; ++i) { + /* zap the 2nd function of the winbond chip */ + if (dev->resource[i].flags & IORESOURCE_IO + && dev->bus->number == 0 && dev->devfn == 0x81) + dev->resource[i].flags &= ~IORESOURCE_IO; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, + fixup_winbond_82c105); diff --git a/arch/ppc64/kernel/pSeries_reconfig.c b/arch/ppc64/kernel/pSeries_reconfig.c new file mode 100644 index 00000000000..cb5443f2e49 --- /dev/null +++ b/arch/ppc64/kernel/pSeries_reconfig.c @@ -0,0 +1,434 @@ +/* + * pSeries_reconfig.c - support for dynamic reconfiguration (including PCI + * Hotplug and Dynamic Logical Partitioning on RPA platforms). + * + * Copyright (C) 2005 Nathan Lynch + * Copyright (C) 2005 IBM Corporation + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include + + + +/* + * Routines for "runtime" addition and removal of device tree nodes. + */ +#ifdef CONFIG_PROC_DEVICETREE +/* + * Add a node to /proc/device-tree. + */ +static void add_node_proc_entries(struct device_node *np) +{ + struct proc_dir_entry *ent; + + ent = proc_mkdir(strrchr(np->full_name, '/') + 1, np->parent->pde); + if (ent) + proc_device_tree_add_node(np, ent); +} + +static void remove_node_proc_entries(struct device_node *np) +{ + struct property *pp = np->properties; + struct device_node *parent = np->parent; + + while (pp) { + remove_proc_entry(pp->name, np->pde); + pp = pp->next; + } + + /* Assuming that symlinks have the same parent directory as + * np->pde. + */ + if (np->name_link) + remove_proc_entry(np->name_link->name, parent->pde); + if (np->addr_link) + remove_proc_entry(np->addr_link->name, parent->pde); + if (np->pde) + remove_proc_entry(np->pde->name, parent->pde); +} +#else /* !CONFIG_PROC_DEVICETREE */ +static void add_node_proc_entries(struct device_node *np) +{ + return; +} + +static void remove_node_proc_entries(struct device_node *np) +{ + return; +} +#endif /* CONFIG_PROC_DEVICETREE */ + +/** + * derive_parent - basically like dirname(1) + * @path: the full_name of a node to be added to the tree + * + * Returns the node which should be the parent of the node + * described by path. E.g., for path = "/foo/bar", returns + * the node with full_name = "/foo". + */ +static struct device_node *derive_parent(const char *path) +{ + struct device_node *parent = NULL; + char *parent_path = "/"; + size_t parent_path_len = strrchr(path, '/') - path + 1; + + /* reject if path is "/" */ + if (!strcmp(path, "/")) + return ERR_PTR(-EINVAL); + + if (strrchr(path, '/') != path) { + parent_path = kmalloc(parent_path_len, GFP_KERNEL); + if (!parent_path) + return ERR_PTR(-ENOMEM); + strlcpy(parent_path, path, parent_path_len); + } + parent = of_find_node_by_path(parent_path); + if (!parent) + return ERR_PTR(-EINVAL); + if (strcmp(parent_path, "/")) + kfree(parent_path); + return parent; +} + +static struct notifier_block *pSeries_reconfig_chain; + +int pSeries_reconfig_notifier_register(struct notifier_block *nb) +{ + return notifier_chain_register(&pSeries_reconfig_chain, nb); +} + +void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) +{ + notifier_chain_unregister(&pSeries_reconfig_chain, nb); +} + +static int pSeries_reconfig_add_node(const char *path, struct property *proplist) +{ + struct device_node *np; + int err = -ENOMEM; + + np = kcalloc(1, sizeof(*np), GFP_KERNEL); + if (!np) + goto out_err; + + np->full_name = kmalloc(strlen(path) + 1, GFP_KERNEL); + if (!np->full_name) + goto out_err; + + strcpy(np->full_name, path); + + np->properties = proplist; + OF_MARK_DYNAMIC(np); + kref_init(&np->kref); + + np->parent = derive_parent(path); + if (IS_ERR(np->parent)) { + err = PTR_ERR(np->parent); + goto out_err; + } + + err = notifier_call_chain(&pSeries_reconfig_chain, + PSERIES_RECONFIG_ADD, np); + if (err == NOTIFY_BAD) { + printk(KERN_ERR "Failed to add device node %s\n", path); + err = -ENOMEM; /* For now, safe to assume kmalloc failure */ + goto out_err; + } + + of_attach_node(np); + + add_node_proc_entries(np); + + of_node_put(np->parent); + + return 0; + +out_err: + if (np) { + of_node_put(np->parent); + kfree(np->full_name); + kfree(np); + } + return err; +} + +static int pSeries_reconfig_remove_node(struct device_node *np) +{ + struct device_node *parent, *child; + + parent = of_get_parent(np); + if (!parent) + return -EINVAL; + + if ((child = of_get_next_child(np, NULL))) { + of_node_put(child); + return -EBUSY; + } + + remove_node_proc_entries(np); + + notifier_call_chain(&pSeries_reconfig_chain, + PSERIES_RECONFIG_REMOVE, np); + of_detach_node(np); + + of_node_put(parent); + of_node_put(np); /* Must decrement the refcount */ + return 0; +} + +/* + * /proc/ppc64/ofdt - yucky binary interface for adding and removing + * OF device nodes. Should be deprecated as soon as we get an + * in-kernel wrapper for the RTAS ibm,configure-connector call. + */ + +static void release_prop_list(const struct property *prop) +{ + struct property *next; + for (; prop; prop = next) { + next = prop->next; + kfree(prop->name); + kfree(prop->value); + kfree(prop); + } + +} + +/** + * parse_next_property - process the next property from raw input buffer + * @buf: input buffer, must be nul-terminated + * @end: end of the input buffer + 1, for validation + * @name: return value; set to property name in buf + * @length: return value; set to length of value + * @value: return value; set to the property value in buf + * + * Note that the caller must make copies of the name and value returned, + * this function does no allocation or copying of the data. Return value + * is set to the next name in buf, or NULL on error. + */ +static char * parse_next_property(char *buf, char *end, char **name, int *length, + unsigned char **value) +{ + char *tmp; + + *name = buf; + + tmp = strchr(buf, ' '); + if (!tmp) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + *tmp = '\0'; + + if (++tmp >= end) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + + /* now we're on the length */ + *length = -1; + *length = simple_strtoul(tmp, &tmp, 10); + if (*length == -1) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + if (*tmp != ' ' || ++tmp >= end) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + + /* now we're on the value */ + *value = tmp; + tmp += *length; + if (tmp > end) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + else if (tmp < end && *tmp != ' ' && *tmp != '\0') { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + tmp++; + + /* and now we should be on the next name, or the end */ + return tmp; +} + +static struct property *new_property(const char *name, const int length, + const unsigned char *value, struct property *last) +{ + struct property *new = kmalloc(sizeof(*new), GFP_KERNEL); + + if (!new) + return NULL; + memset(new, 0, sizeof(*new)); + + if (!(new->name = kmalloc(strlen(name) + 1, GFP_KERNEL))) + goto cleanup; + if (!(new->value = kmalloc(length + 1, GFP_KERNEL))) + goto cleanup; + + strcpy(new->name, name); + memcpy(new->value, value, length); + *(((char *)new->value) + length) = 0; + new->length = length; + new->next = last; + return new; + +cleanup: + if (new->name) + kfree(new->name); + if (new->value) + kfree(new->value); + kfree(new); + return NULL; +} + +static int do_add_node(char *buf, size_t bufsize) +{ + char *path, *end, *name; + struct device_node *np; + struct property *prop = NULL; + unsigned char* value; + int length, rv = 0; + + end = buf + bufsize; + path = buf; + buf = strchr(buf, ' '); + if (!buf) + return -EINVAL; + *buf = '\0'; + buf++; + + if ((np = of_find_node_by_path(path))) { + of_node_put(np); + return -EINVAL; + } + + /* rv = build_prop_list(tmp, bufsize - (tmp - buf), &proplist); */ + while (buf < end && + (buf = parse_next_property(buf, end, &name, &length, &value))) { + struct property *last = prop; + + prop = new_property(name, length, value, last); + if (!prop) { + rv = -ENOMEM; + prop = last; + goto out; + } + } + if (!buf) { + rv = -EINVAL; + goto out; + } + + rv = pSeries_reconfig_add_node(path, prop); + +out: + if (rv) + release_prop_list(prop); + return rv; +} + +static int do_remove_node(char *buf) +{ + struct device_node *node; + int rv = -ENODEV; + + if ((node = of_find_node_by_path(buf))) + rv = pSeries_reconfig_remove_node(node); + + of_node_put(node); + return rv; +} + +/** + * ofdt_write - perform operations on the Open Firmware device tree + * + * @file: not used + * @buf: command and arguments + * @count: size of the command buffer + * @off: not used + * + * Operations supported at this time are addition and removal of + * whole nodes along with their properties. Operations on individual + * properties are not implemented (yet). + */ +static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count, + loff_t *off) +{ + int rv = 0; + char *kbuf; + char *tmp; + + if (!(kbuf = kmalloc(count + 1, GFP_KERNEL))) { + rv = -ENOMEM; + goto out; + } + if (copy_from_user(kbuf, buf, count)) { + rv = -EFAULT; + goto out; + } + + kbuf[count] = '\0'; + + tmp = strchr(kbuf, ' '); + if (!tmp) { + rv = -EINVAL; + goto out; + } + *tmp = '\0'; + tmp++; + + if (!strcmp(kbuf, "add_node")) + rv = do_add_node(tmp, count - (tmp - kbuf)); + else if (!strcmp(kbuf, "remove_node")) + rv = do_remove_node(tmp); + else + rv = -EINVAL; +out: + kfree(kbuf); + return rv ? rv : count; +} + +static struct file_operations ofdt_fops = { + .write = ofdt_write +}; + +/* create /proc/ppc64/ofdt write-only by root */ +static int proc_ppc64_create_ofdt(void) +{ + struct proc_dir_entry *ent; + + if (!(systemcfg->platform & PLATFORM_PSERIES)) + return 0; + + ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL); + if (ent) { + ent->nlink = 1; + ent->data = NULL; + ent->size = 0; + ent->proc_fops = &ofdt_fops; + } + + return 0; +} +__initcall(proc_ppc64_create_ofdt); diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c new file mode 100644 index 00000000000..06536de5125 --- /dev/null +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -0,0 +1,612 @@ +/* + * linux/arch/ppc/kernel/setup.c + * + * Copyright (C) 1995 Linus Torvalds + * Adapted from 'alpha' version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * Modified by PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * bootup setup stuff.. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "i8259.h" +#include "mpic.h" +#include "pci.h" + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +extern void pSeries_final_fixup(void); + +extern void pSeries_get_boot_time(struct rtc_time *rtc_time); +extern void pSeries_get_rtc_time(struct rtc_time *rtc_time); +extern int pSeries_set_rtc_time(struct rtc_time *rtc_time); +extern void find_udbg_vterm(void); +extern void system_reset_fwnmi(void); /* from head.S */ +extern void machine_check_fwnmi(void); /* from head.S */ +extern void generic_find_legacy_serial_ports(u64 *physport, + unsigned int *default_speed); + +int fwnmi_active; /* TRUE if an FWNMI handler is present */ + +extern unsigned long ppc_proc_freq; +extern unsigned long ppc_tb_freq; + +extern void pSeries_system_reset_exception(struct pt_regs *regs); +extern int pSeries_machine_check_exception(struct pt_regs *regs); + +static volatile void __iomem * chrp_int_ack_special; +struct mpic *pSeries_mpic; + +void pSeries_get_cpuinfo(struct seq_file *m) +{ + struct device_node *root; + const char *model = ""; + + root = of_find_node_by_path("/"); + if (root) + model = get_property(root, "model", NULL); + seq_printf(m, "machine\t\t: CHRP %s\n", model); + of_node_put(root); +} + +/* Initialize firmware assisted non-maskable interrupts if + * the firmware supports this feature. + * + */ +static void __init fwnmi_init(void) +{ + int ret; + int ibm_nmi_register = rtas_token("ibm,nmi-register"); + if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE) + return; + ret = rtas_call(ibm_nmi_register, 2, 1, NULL, + __pa((unsigned long)system_reset_fwnmi), + __pa((unsigned long)machine_check_fwnmi)); + if (ret == 0) + fwnmi_active = 1; +} + +static int pSeries_irq_cascade(struct pt_regs *regs, void *data) +{ + if (chrp_int_ack_special) + return readb(chrp_int_ack_special); + else + return i8259_irq(smp_processor_id()); +} + +static void __init pSeries_init_mpic(void) +{ + unsigned int *addrp; + struct device_node *np; + int i; + + /* All ISUs are setup, complete initialization */ + mpic_init(pSeries_mpic); + + /* Check what kind of cascade ACK we have */ + if (!(np = of_find_node_by_name(NULL, "pci")) + || !(addrp = (unsigned int *) + get_property(np, "8259-interrupt-acknowledge", NULL))) + printk(KERN_ERR "Cannot find pci to get ack address\n"); + else + chrp_int_ack_special = ioremap(addrp[prom_n_addr_cells(np)-1], 1); + of_node_put(np); + + /* Setup the legacy interrupts & controller */ + for (i = 0; i < NUM_ISA_INTERRUPTS; i++) + irq_desc[i].handler = &i8259_pic; + i8259_init(0); + + /* Hook cascade to mpic */ + mpic_setup_cascade(NUM_ISA_INTERRUPTS, pSeries_irq_cascade, NULL); +} + +static void __init pSeries_setup_mpic(void) +{ + unsigned int *opprop; + unsigned long openpic_addr = 0; + unsigned char senses[NR_IRQS - NUM_ISA_INTERRUPTS]; + struct device_node *root; + int irq_count; + + /* Find the Open PIC if present */ + root = of_find_node_by_path("/"); + opprop = (unsigned int *) get_property(root, "platform-open-pic", NULL); + if (opprop != 0) { + int n = prom_n_addr_cells(root); + + for (openpic_addr = 0; n > 0; --n) + openpic_addr = (openpic_addr << 32) + *opprop++; + printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); + } + of_node_put(root); + + BUG_ON(openpic_addr == 0); + + /* Get the sense values from OF */ + prom_get_irq_senses(senses, NUM_ISA_INTERRUPTS, NR_IRQS); + + /* Setup the openpic driver */ + irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */ + pSeries_mpic = mpic_alloc(openpic_addr, MPIC_PRIMARY, + 16, 16, irq_count, /* isu size, irq offset, irq count */ + NR_IRQS - 4, /* ipi offset */ + senses, irq_count, /* sense & sense size */ + " MPIC "); +} + +static void __init pSeries_setup_arch(void) +{ + /* Fixup ppc_md depending on the type of interrupt controller */ + if (ppc64_interrupt_controller == IC_OPEN_PIC) { + ppc_md.init_IRQ = pSeries_init_mpic; + ppc_md.get_irq = mpic_get_irq; + /* Allocate the mpic now, so that find_and_init_phbs() can + * fill the ISUs */ + pSeries_setup_mpic(); + } else { + ppc_md.init_IRQ = xics_init_IRQ; + ppc_md.get_irq = xics_get_irq; + } + +#ifdef CONFIG_SMP + smp_init_pSeries(); +#endif + /* openpic global configuration register (64-bit format). */ + /* openpic Interrupt Source Unit pointer (64-bit format). */ + /* python0 facility area (mmio) (64-bit format) REAL address. */ + + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + if (ROOT_DEV == 0) { + printk("No ramdisk, default root is /dev/sda2\n"); + ROOT_DEV = Root_SDA2; + } + + fwnmi_init(); + + /* Find and initialize PCI host bridges */ + init_pci_config_tokens(); + eeh_init(); + find_and_init_phbs(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + + pSeries_nvram_init(); + + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) + vpa_init(boot_cpuid); +} + +static int __init pSeries_init_panel(void) +{ + /* Manually leave the kernel version on the panel. */ + ppc_md.progress("Linux ppc64\n", 0); + ppc_md.progress(UTS_RELEASE, 0); + + return 0; +} +arch_initcall(pSeries_init_panel); + + +/* Build up the firmware_features bitmask field + * using contents of device-tree/ibm,hypertas-functions. + * Ultimately this functionality may be moved into prom.c prom_init(). + */ +void __init fw_feature_init(void) +{ + struct device_node * dn; + char * hypertas; + unsigned int len; + + DBG(" -> fw_feature_init()\n"); + + cur_cpu_spec->firmware_features = 0; + dn = of_find_node_by_path("/rtas"); + if (dn == NULL) { + printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n"); + goto no_rtas; + } + + hypertas = get_property(dn, "ibm,hypertas-functions", &len); + if (hypertas) { + while (len > 0){ + int i, hypertas_len; + /* check value against table of strings */ + for(i=0; i < FIRMWARE_MAX_FEATURES ;i++) { + if ((firmware_features_table[i].name) && + (strcmp(firmware_features_table[i].name,hypertas))==0) { + /* we have a match */ + cur_cpu_spec->firmware_features |= + (firmware_features_table[i].val); + break; + } + } + hypertas_len = strlen(hypertas); + len -= hypertas_len +1; + hypertas+= hypertas_len +1; + } + } + + of_node_put(dn); + no_rtas: + printk(KERN_INFO "firmware_features = 0x%lx\n", + cur_cpu_spec->firmware_features); + + DBG(" <- fw_feature_init()\n"); +} + + +static void __init pSeries_discover_pic(void) +{ + struct device_node *np; + char *typep; + + /* + * Setup interrupt mapping options that are needed for finish_device_tree + * to properly parse the OF interrupt tree & do the virtual irq mapping + */ + __irq_offset_value = NUM_ISA_INTERRUPTS; + ppc64_interrupt_controller = IC_INVALID; + for (np = NULL; (np = of_find_node_by_name(np, "interrupt-controller"));) { + typep = (char *)get_property(np, "compatible", NULL); + if (strstr(typep, "open-pic")) + ppc64_interrupt_controller = IC_OPEN_PIC; + else if (strstr(typep, "ppc-xicp")) + ppc64_interrupt_controller = IC_PPC_XIC; + else + printk("pSeries_discover_pic: failed to recognize" + " interrupt-controller\n"); + break; + } +} + +static void pSeries_mach_cpu_die(void) +{ + local_irq_disable(); + idle_task_exit(); + /* Some hardware requires clearing the CPPR, while other hardware does not + * it is safe either way + */ + pSeriesLP_cppr_info(0, 0); + rtas_stop_self(); + /* Should never get here... */ + BUG(); + for(;;); +} + + +/* + * Early initialization. Relocation is on but do not reference unbolted pages + */ +static void __init pSeries_init_early(void) +{ + void *comport; + int iommu_off = 0; + unsigned int default_speed; + u64 physport; + + DBG(" -> pSeries_init_early()\n"); + + fw_feature_init(); + + if (systemcfg->platform & PLATFORM_LPAR) + hpte_init_lpar(); + else { + hpte_init_native(); + iommu_off = (of_chosen && + get_property(of_chosen, "linux,iommu-off", NULL)); + } + + generic_find_legacy_serial_ports(&physport, &default_speed); + + if (systemcfg->platform & PLATFORM_LPAR) + find_udbg_vterm(); + else if (physport) { + /* Map the uart for udbg. */ + comport = (void *)__ioremap(physport, 16, _PAGE_NO_CACHE); + udbg_init_uart(comport, default_speed); + + ppc_md.udbg_putc = udbg_putc; + ppc_md.udbg_getc = udbg_getc; + ppc_md.udbg_getc_poll = udbg_getc_poll; + DBG("Hello World !\n"); + } + + + iommu_init_early_pSeries(); + + pSeries_discover_pic(); + + DBG(" <- pSeries_init_early()\n"); +} + + +static void pSeries_progress(char *s, unsigned short hex) +{ + struct device_node *root; + int width, *p; + char *os; + static int display_character, set_indicator; + static int max_width; + static DEFINE_SPINLOCK(progress_lock); + static int pending_newline = 0; /* did last write end with unprinted newline? */ + + if (!rtas.base) + return; + + if (max_width == 0) { + if ((root = find_path_device("/rtas")) && + (p = (unsigned int *)get_property(root, + "ibm,display-line-length", + NULL))) + max_width = *p; + else + max_width = 0x10; + display_character = rtas_token("display-character"); + set_indicator = rtas_token("set-indicator"); + } + + if (display_character == RTAS_UNKNOWN_SERVICE) { + /* use hex display if available */ + if (set_indicator != RTAS_UNKNOWN_SERVICE) + rtas_call(set_indicator, 3, 1, NULL, 6, 0, hex); + return; + } + + spin_lock(&progress_lock); + + /* + * Last write ended with newline, but we didn't print it since + * it would just clear the bottom line of output. Print it now + * instead. + * + * If no newline is pending, print a CR to start output at the + * beginning of the line. + */ + if (pending_newline) { + rtas_call(display_character, 1, 1, NULL, '\r'); + rtas_call(display_character, 1, 1, NULL, '\n'); + pending_newline = 0; + } else { + rtas_call(display_character, 1, 1, NULL, '\r'); + } + + width = max_width; + os = s; + while (*os) { + if (*os == '\n' || *os == '\r') { + /* Blank to end of line. */ + while (width-- > 0) + rtas_call(display_character, 1, 1, NULL, ' '); + + /* If newline is the last character, save it + * until next call to avoid bumping up the + * display output. + */ + if (*os == '\n' && !os[1]) { + pending_newline = 1; + spin_unlock(&progress_lock); + return; + } + + /* RTAS wants CR-LF, not just LF */ + + if (*os == '\n') { + rtas_call(display_character, 1, 1, NULL, '\r'); + rtas_call(display_character, 1, 1, NULL, '\n'); + } else { + /* CR might be used to re-draw a line, so we'll + * leave it alone and not add LF. + */ + rtas_call(display_character, 1, 1, NULL, *os); + } + + width = max_width; + } else { + width--; + rtas_call(display_character, 1, 1, NULL, *os); + } + + os++; + + /* if we overwrite the screen length */ + if (width <= 0) + while ((*os != 0) && (*os != '\n') && (*os != '\r')) + os++; + } + + /* Blank to end of line. */ + while (width-- > 0) + rtas_call(display_character, 1, 1, NULL, ' '); + + spin_unlock(&progress_lock); +} + +extern void setup_default_decr(void); + +/* Some sane defaults: 125 MHz timebase, 1GHz processor */ +#define DEFAULT_TB_FREQ 125000000UL +#define DEFAULT_PROC_FREQ (DEFAULT_TB_FREQ * 8) + +static void __init pSeries_calibrate_decr(void) +{ + struct device_node *cpu; + struct div_result divres; + unsigned int *fp; + int node_found; + + /* + * The cpu node should have a timebase-frequency property + * to tell us the rate at which the decrementer counts. + */ + cpu = of_find_node_by_type(NULL, "cpu"); + + ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */ + node_found = 0; + if (cpu != 0) { + fp = (unsigned int *)get_property(cpu, "timebase-frequency", + NULL); + if (fp != 0) { + node_found = 1; + ppc_tb_freq = *fp; + } + } + if (!node_found) + printk(KERN_ERR "WARNING: Estimating decrementer frequency " + "(not found)\n"); + + ppc_proc_freq = DEFAULT_PROC_FREQ; + node_found = 0; + if (cpu != 0) { + fp = (unsigned int *)get_property(cpu, "clock-frequency", + NULL); + if (fp != 0) { + node_found = 1; + ppc_proc_freq = *fp; + } + } + if (!node_found) + printk(KERN_ERR "WARNING: Estimating processor frequency " + "(not found)\n"); + + of_node_put(cpu); + + printk(KERN_INFO "time_init: decrementer frequency = %lu.%.6lu MHz\n", + ppc_tb_freq/1000000, ppc_tb_freq%1000000); + printk(KERN_INFO "time_init: processor frequency = %lu.%.6lu MHz\n", + ppc_proc_freq/1000000, ppc_proc_freq%1000000); + + tb_ticks_per_jiffy = ppc_tb_freq / HZ; + tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; + tb_ticks_per_usec = ppc_tb_freq / 1000000; + tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); + div128_by_32(1024*1024, 0, tb_ticks_per_sec, &divres); + tb_to_xs = divres.result_low; + + setup_default_decr(); +} + +static int pSeries_check_legacy_ioport(unsigned int baseport) +{ + struct device_node *np; + +#define I8042_DATA_REG 0x60 +#define FDC_BASE 0x3f0 + + + switch(baseport) { + case I8042_DATA_REG: + np = of_find_node_by_type(NULL, "8042"); + if (np == NULL) + return -ENODEV; + of_node_put(np); + break; + case FDC_BASE: + np = of_find_node_by_type(NULL, "fdc"); + if (np == NULL) + return -ENODEV; + of_node_put(np); + break; + } + return 0; +} + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +extern struct machdep_calls pSeries_md; + +static int __init pSeries_probe(int platform) +{ + if (platform != PLATFORM_PSERIES && + platform != PLATFORM_PSERIES_LPAR) + return 0; + + /* if we have some ppc_md fixups for LPAR to do, do + * it here ... + */ + + return 1; +} + +struct machdep_calls __initdata pSeries_md = { + .probe = pSeries_probe, + .setup_arch = pSeries_setup_arch, + .init_early = pSeries_init_early, + .get_cpuinfo = pSeries_get_cpuinfo, + .log_error = pSeries_log_error, + .pcibios_fixup = pSeries_final_fixup, + .restart = rtas_restart, + .power_off = rtas_power_off, + .halt = rtas_halt, + .panic = rtas_os_term, + .cpu_die = pSeries_mach_cpu_die, + .get_boot_time = pSeries_get_boot_time, + .get_rtc_time = pSeries_get_rtc_time, + .set_rtc_time = pSeries_set_rtc_time, + .calibrate_decr = pSeries_calibrate_decr, + .progress = pSeries_progress, + .check_legacy_ioport = pSeries_check_legacy_ioport, + .system_reset_exception = pSeries_system_reset_exception, + .machine_check_exception = pSeries_machine_check_exception, +}; diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c new file mode 100644 index 00000000000..c60d8cb2b84 --- /dev/null +++ b/arch/ppc64/kernel/pSeries_smp.c @@ -0,0 +1,451 @@ +/* + * SMP support for pSeries machines. + * + * Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * + * Plus various changes from other IBM teams... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpic.h" + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* + * The primary thread of each non-boot processor is recorded here before + * smp init. + */ +static cpumask_t of_spin_map; + +extern void pSeries_secondary_smp_init(unsigned long); + +#ifdef CONFIG_HOTPLUG_CPU + +/* Get state of physical CPU. + * Return codes: + * 0 - The processor is in the RTAS stopped state + * 1 - stop-self is in progress + * 2 - The processor is not in the RTAS stopped state + * -1 - Hardware Error + * -2 - Hardware Busy, Try again later. + */ +static int query_cpu_stopped(unsigned int pcpu) +{ + int cpu_status; + int status, qcss_tok; + + qcss_tok = rtas_token("query-cpu-stopped-state"); + if (qcss_tok == RTAS_UNKNOWN_SERVICE) + return -1; + status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu); + if (status != 0) { + printk(KERN_ERR + "RTAS query-cpu-stopped-state failed: %i\n", status); + return status; + } + + return cpu_status; +} + +int pSeries_cpu_disable(void) +{ + systemcfg->processorCount--; + + /*fix boot_cpuid here*/ + if (smp_processor_id() == boot_cpuid) + boot_cpuid = any_online_cpu(cpu_online_map); + + /* FIXME: abstract this to not be platform specific later on */ + xics_migrate_irqs_away(); + return 0; +} + +void pSeries_cpu_die(unsigned int cpu) +{ + int tries; + int cpu_status; + unsigned int pcpu = get_hard_smp_processor_id(cpu); + + for (tries = 0; tries < 25; tries++) { + cpu_status = query_cpu_stopped(pcpu); + if (cpu_status == 0 || cpu_status == -1) + break; + msleep(200); + } + if (cpu_status != 0) { + printk("Querying DEAD? cpu %i (%i) shows %i\n", + cpu, pcpu, cpu_status); + } + + /* Isolation and deallocation are definatly done by + * drslot_chrp_cpu. If they were not they would be + * done here. Change isolate state to Isolate and + * change allocation-state to Unusable. + */ + paca[cpu].cpu_start = 0; +} + +/* + * Update cpu_present_map and paca(s) for a new cpu node. The wrinkle + * here is that a cpu device node may represent up to two logical cpus + * in the SMT case. We must honor the assumption in other code that + * the logical ids for sibling SMT threads x and y are adjacent, such + * that x^1 == y and y^1 == x. + */ +static int pSeries_add_processor(struct device_node *np) +{ + unsigned int cpu; + cpumask_t candidate_map, tmp = CPU_MASK_NONE; + int err = -ENOSPC, len, nthreads, i; + u32 *intserv; + + intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + return 0; + + nthreads = len / sizeof(u32); + for (i = 0; i < nthreads; i++) + cpu_set(i, tmp); + + lock_cpu_hotplug(); + + BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map)); + + /* Get a bitmap of unoccupied slots. */ + cpus_xor(candidate_map, cpu_possible_map, cpu_present_map); + if (cpus_empty(candidate_map)) { + /* If we get here, it most likely means that NR_CPUS is + * less than the partition's max processors setting. + */ + printk(KERN_ERR "Cannot add cpu %s; this system configuration" + " supports %d logical cpus.\n", np->full_name, + cpus_weight(cpu_possible_map)); + goto out_unlock; + } + + while (!cpus_empty(tmp)) + if (cpus_subset(tmp, candidate_map)) + /* Found a range where we can insert the new cpu(s) */ + break; + else + cpus_shift_left(tmp, tmp, nthreads); + + if (cpus_empty(tmp)) { + printk(KERN_ERR "Unable to find space in cpu_present_map for" + " processor %s with %d thread(s)\n", np->name, + nthreads); + goto out_unlock; + } + + for_each_cpu_mask(cpu, tmp) { + BUG_ON(cpu_isset(cpu, cpu_present_map)); + cpu_set(cpu, cpu_present_map); + set_hard_smp_processor_id(cpu, *intserv++); + } + err = 0; +out_unlock: + unlock_cpu_hotplug(); + return err; +} + +/* + * Update the present map for a cpu node which is going away, and set + * the hard id in the paca(s) to -1 to be consistent with boot time + * convention for non-present cpus. + */ +static void pSeries_remove_processor(struct device_node *np) +{ + unsigned int cpu; + int len, nthreads, i; + u32 *intserv; + + intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + return; + + nthreads = len / sizeof(u32); + + lock_cpu_hotplug(); + for (i = 0; i < nthreads; i++) { + for_each_present_cpu(cpu) { + if (get_hard_smp_processor_id(cpu) != intserv[i]) + continue; + BUG_ON(cpu_online(cpu)); + cpu_clear(cpu, cpu_present_map); + set_hard_smp_processor_id(cpu, -1); + break; + } + if (cpu == NR_CPUS) + printk(KERN_WARNING "Could not find cpu to remove " + "with physical id 0x%x\n", intserv[i]); + } + unlock_cpu_hotplug(); +} + +static int pSeries_smp_notifier(struct notifier_block *nb, unsigned long action, void *node) +{ + int err = NOTIFY_OK; + + switch (action) { + case PSERIES_RECONFIG_ADD: + if (pSeries_add_processor(node)) + err = NOTIFY_BAD; + break; + case PSERIES_RECONFIG_REMOVE: + pSeries_remove_processor(node); + break; + default: + err = NOTIFY_DONE; + break; + } + return err; +} + +static struct notifier_block pSeries_smp_nb = { + .notifier_call = pSeries_smp_notifier, +}; + +#endif /* CONFIG_HOTPLUG_CPU */ + +/** + * smp_startup_cpu() - start the given cpu + * + * At boot time, there is nothing to do for primary threads which were + * started from Open Firmware. For anything else, call RTAS with the + * appropriate start location. + * + * Returns: + * 0 - failure + * 1 - success + */ +static inline int __devinit smp_startup_cpu(unsigned int lcpu) +{ + int status; + unsigned long start_here = __pa((u32)*((unsigned long *) + pSeries_secondary_smp_init)); + unsigned int pcpu; + + if (cpu_isset(lcpu, of_spin_map)) + /* Already started by OF and sitting in spin loop */ + return 1; + + pcpu = get_hard_smp_processor_id(lcpu); + + /* Fixup atomic count: it exited inside IRQ handler. */ + paca[lcpu].__current->thread_info->preempt_count = 0; + + status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL, + pcpu, start_here, lcpu); + if (status != 0) { + printk(KERN_ERR "start-cpu failed: %i\n", status); + return 0; + } + return 1; +} + +static inline void smp_xics_do_message(int cpu, int msg) +{ + set_bit(msg, &xics_ipi_message[cpu].value); + mb(); + xics_cause_IPI(cpu); +} + +static void smp_xics_message_pass(int target, int msg) +{ + unsigned int i; + + if (target < NR_CPUS) { + smp_xics_do_message(target, msg); + } else { + for_each_online_cpu(i) { + if (target == MSG_ALL_BUT_SELF + && i == smp_processor_id()) + continue; + smp_xics_do_message(i, msg); + } + } +} + +static int __init smp_xics_probe(void) +{ + xics_request_IPIs(); + + return cpus_weight(cpu_possible_map); +} + +static void __devinit smp_xics_setup_cpu(int cpu) +{ + if (cpu != boot_cpuid) + xics_setup_cpu(); + + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) + vpa_init(cpu); + + cpu_clear(cpu, of_spin_map); + + /* + * Put the calling processor into the GIQ. This is really only + * necessary from a secondary thread as the OF start-cpu interface + * performs this function for us on primary threads. + */ + rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, + (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); +} + +static DEFINE_SPINLOCK(timebase_lock); +static unsigned long timebase = 0; + +static void __devinit pSeries_give_timebase(void) +{ + spin_lock(&timebase_lock); + rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); + timebase = get_tb(); + spin_unlock(&timebase_lock); + + while (timebase) + barrier(); + rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); +} + +static void __devinit pSeries_take_timebase(void) +{ + while (!timebase) + barrier(); + spin_lock(&timebase_lock); + set_tb(timebase >> 32, timebase & 0xffffffff); + timebase = 0; + spin_unlock(&timebase_lock); +} + +static void __devinit smp_pSeries_kick_cpu(int nr) +{ + BUG_ON(nr < 0 || nr >= NR_CPUS); + + if (!smp_startup_cpu(nr)) + return; + + /* + * The processor is currently spinning, waiting for the + * cpu_start field to become non-zero After we set cpu_start, + * the processor will continue on to secondary_start + */ + paca[nr].cpu_start = 1; +} + +static int smp_pSeries_cpu_bootable(unsigned int nr) +{ + /* Special case - we inhibit secondary thread startup + * during boot if the user requests it. Odd-numbered + * cpus are assumed to be secondary threads. + */ + if (system_state < SYSTEM_RUNNING && + cur_cpu_spec->cpu_features & CPU_FTR_SMT && + !smt_enabled_at_boot && nr % 2 != 0) + return 0; + + return 1; +} + +static struct smp_ops_t pSeries_mpic_smp_ops = { + .message_pass = smp_mpic_message_pass, + .probe = smp_mpic_probe, + .kick_cpu = smp_pSeries_kick_cpu, + .setup_cpu = smp_mpic_setup_cpu, +}; + +static struct smp_ops_t pSeries_xics_smp_ops = { + .message_pass = smp_xics_message_pass, + .probe = smp_xics_probe, + .kick_cpu = smp_pSeries_kick_cpu, + .setup_cpu = smp_xics_setup_cpu, + .cpu_bootable = smp_pSeries_cpu_bootable, +}; + +/* This is called very early */ +void __init smp_init_pSeries(void) +{ + int i; + + DBG(" -> smp_init_pSeries()\n"); + + if (ppc64_interrupt_controller == IC_OPEN_PIC) + smp_ops = &pSeries_mpic_smp_ops; + else + smp_ops = &pSeries_xics_smp_ops; + +#ifdef CONFIG_HOTPLUG_CPU + smp_ops->cpu_disable = pSeries_cpu_disable; + smp_ops->cpu_die = pSeries_cpu_die; + + /* Processors can be added/removed only on LPAR */ + if (systemcfg->platform == PLATFORM_PSERIES_LPAR) + pSeries_reconfig_notifier_register(&pSeries_smp_nb); +#endif + + /* Mark threads which are still spinning in hold loops. */ + if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) + for_each_present_cpu(i) { + if (i % 2 == 0) + /* + * Even-numbered logical cpus correspond to + * primary threads. + */ + cpu_set(i, of_spin_map); + } + else + of_spin_map = cpu_present_map; + + cpu_clear(boot_cpuid, of_spin_map); + + /* Non-lpar has additional take/give timebase */ + if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { + smp_ops->give_timebase = pSeries_give_timebase; + smp_ops->take_timebase = pSeries_take_timebase; + } + + DBG(" <- smp_init_pSeries()\n"); +} + diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c new file mode 100644 index 00000000000..a3e0975c26c --- /dev/null +++ b/arch/ppc64/kernel/pacaData.c @@ -0,0 +1,224 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +static union { + struct systemcfg data; + u8 page[PAGE_SIZE]; +} systemcfg_store __page_aligned; +struct systemcfg *systemcfg = &systemcfg_store.data; +EXPORT_SYMBOL(systemcfg); + + +/* This symbol is provided by the linker - let it fill in the paca + * field correctly */ +extern unsigned long __toc_start; + +/* The Paca is an array with one entry per processor. Each contains an + * lppaca, which contains the information shared between the + * hypervisor and Linux. Each also contains an ItLpRegSave area which + * is used by the hypervisor to save registers. + * On systems with hardware multi-threading, there are two threads + * per processor. The Paca array must contain an entry for each thread. + * The VPD Areas will give a max logical processors = 2 * max physical + * processors. The processor VPD array needs one entry per physical + * processor (not thread). + */ +#ifdef CONFIG_PPC_ISERIES +#define EXTRA_INITS(number, lpq) \ + .lppaca_ptr = &paca[number].lppaca, \ + .lpqueue_ptr = (lpq), /* &xItLpQueue, */ \ + .reg_save_ptr = &paca[number].reg_save, \ + .reg_save = { \ + .xDesc = 0xd397d9e2, /* "LpRS" */ \ + .xSize = sizeof(struct ItLpRegSave) \ + }, +#else +#define EXTRA_INITS(number, lpq) +#endif + +#define PACAINITDATA(number,start,lpq,asrr,asrv) \ +{ \ + .lock_token = 0x8000, \ + .paca_index = (number), /* Paca Index */ \ + .default_decr = 0x00ff0000, /* Initial Decr */ \ + .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \ + .stab_real = (asrr), /* Real pointer to segment table */ \ + .stab_addr = (asrv), /* Virt pointer to segment table */ \ + .cpu_start = (start), /* Processor start */ \ + .hw_cpu_id = 0xffff, \ + .lppaca = { \ + .desc = 0xd397d781, /* "LpPa" */ \ + .size = sizeof(struct lppaca), \ + .dyn_proc_status = 2, \ + .decr_val = 0x00ff0000, \ + .fpregs_in_use = 1, \ + .end_of_quantum = 0xfffffffffffffffful, \ + .slb_count = 64, \ + }, \ + EXTRA_INITS((number), (lpq)) \ +} + +struct paca_struct paca[] = { +#ifdef CONFIG_PPC_ISERIES + PACAINITDATA( 0, 1, &xItLpQueue, 0, STAB0_VIRT_ADDR), +#else + PACAINITDATA( 0, 1, NULL, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR), +#endif +#if NR_CPUS > 1 + PACAINITDATA( 1, 0, NULL, 0, 0), + PACAINITDATA( 2, 0, NULL, 0, 0), + PACAINITDATA( 3, 0, NULL, 0, 0), +#if NR_CPUS > 4 + PACAINITDATA( 4, 0, NULL, 0, 0), + PACAINITDATA( 5, 0, NULL, 0, 0), + PACAINITDATA( 6, 0, NULL, 0, 0), + PACAINITDATA( 7, 0, NULL, 0, 0), +#if NR_CPUS > 8 + PACAINITDATA( 8, 0, NULL, 0, 0), + PACAINITDATA( 9, 0, NULL, 0, 0), + PACAINITDATA(10, 0, NULL, 0, 0), + PACAINITDATA(11, 0, NULL, 0, 0), + PACAINITDATA(12, 0, NULL, 0, 0), + PACAINITDATA(13, 0, NULL, 0, 0), + PACAINITDATA(14, 0, NULL, 0, 0), + PACAINITDATA(15, 0, NULL, 0, 0), + PACAINITDATA(16, 0, NULL, 0, 0), + PACAINITDATA(17, 0, NULL, 0, 0), + PACAINITDATA(18, 0, NULL, 0, 0), + PACAINITDATA(19, 0, NULL, 0, 0), + PACAINITDATA(20, 0, NULL, 0, 0), + PACAINITDATA(21, 0, NULL, 0, 0), + PACAINITDATA(22, 0, NULL, 0, 0), + PACAINITDATA(23, 0, NULL, 0, 0), + PACAINITDATA(24, 0, NULL, 0, 0), + PACAINITDATA(25, 0, NULL, 0, 0), + PACAINITDATA(26, 0, NULL, 0, 0), + PACAINITDATA(27, 0, NULL, 0, 0), + PACAINITDATA(28, 0, NULL, 0, 0), + PACAINITDATA(29, 0, NULL, 0, 0), + PACAINITDATA(30, 0, NULL, 0, 0), + PACAINITDATA(31, 0, NULL, 0, 0), +#if NR_CPUS > 32 + PACAINITDATA(32, 0, NULL, 0, 0), + PACAINITDATA(33, 0, NULL, 0, 0), + PACAINITDATA(34, 0, NULL, 0, 0), + PACAINITDATA(35, 0, NULL, 0, 0), + PACAINITDATA(36, 0, NULL, 0, 0), + PACAINITDATA(37, 0, NULL, 0, 0), + PACAINITDATA(38, 0, NULL, 0, 0), + PACAINITDATA(39, 0, NULL, 0, 0), + PACAINITDATA(40, 0, NULL, 0, 0), + PACAINITDATA(41, 0, NULL, 0, 0), + PACAINITDATA(42, 0, NULL, 0, 0), + PACAINITDATA(43, 0, NULL, 0, 0), + PACAINITDATA(44, 0, NULL, 0, 0), + PACAINITDATA(45, 0, NULL, 0, 0), + PACAINITDATA(46, 0, NULL, 0, 0), + PACAINITDATA(47, 0, NULL, 0, 0), + PACAINITDATA(48, 0, NULL, 0, 0), + PACAINITDATA(49, 0, NULL, 0, 0), + PACAINITDATA(50, 0, NULL, 0, 0), + PACAINITDATA(51, 0, NULL, 0, 0), + PACAINITDATA(52, 0, NULL, 0, 0), + PACAINITDATA(53, 0, NULL, 0, 0), + PACAINITDATA(54, 0, NULL, 0, 0), + PACAINITDATA(55, 0, NULL, 0, 0), + PACAINITDATA(56, 0, NULL, 0, 0), + PACAINITDATA(57, 0, NULL, 0, 0), + PACAINITDATA(58, 0, NULL, 0, 0), + PACAINITDATA(59, 0, NULL, 0, 0), + PACAINITDATA(60, 0, NULL, 0, 0), + PACAINITDATA(61, 0, NULL, 0, 0), + PACAINITDATA(62, 0, NULL, 0, 0), + PACAINITDATA(63, 0, NULL, 0, 0), +#if NR_CPUS > 64 + PACAINITDATA(64, 0, NULL, 0, 0), + PACAINITDATA(65, 0, NULL, 0, 0), + PACAINITDATA(66, 0, NULL, 0, 0), + PACAINITDATA(67, 0, NULL, 0, 0), + PACAINITDATA(68, 0, NULL, 0, 0), + PACAINITDATA(69, 0, NULL, 0, 0), + PACAINITDATA(70, 0, NULL, 0, 0), + PACAINITDATA(71, 0, NULL, 0, 0), + PACAINITDATA(72, 0, NULL, 0, 0), + PACAINITDATA(73, 0, NULL, 0, 0), + PACAINITDATA(74, 0, NULL, 0, 0), + PACAINITDATA(75, 0, NULL, 0, 0), + PACAINITDATA(76, 0, NULL, 0, 0), + PACAINITDATA(77, 0, NULL, 0, 0), + PACAINITDATA(78, 0, NULL, 0, 0), + PACAINITDATA(79, 0, NULL, 0, 0), + PACAINITDATA(80, 0, NULL, 0, 0), + PACAINITDATA(81, 0, NULL, 0, 0), + PACAINITDATA(82, 0, NULL, 0, 0), + PACAINITDATA(83, 0, NULL, 0, 0), + PACAINITDATA(84, 0, NULL, 0, 0), + PACAINITDATA(85, 0, NULL, 0, 0), + PACAINITDATA(86, 0, NULL, 0, 0), + PACAINITDATA(87, 0, NULL, 0, 0), + PACAINITDATA(88, 0, NULL, 0, 0), + PACAINITDATA(89, 0, NULL, 0, 0), + PACAINITDATA(90, 0, NULL, 0, 0), + PACAINITDATA(91, 0, NULL, 0, 0), + PACAINITDATA(92, 0, NULL, 0, 0), + PACAINITDATA(93, 0, NULL, 0, 0), + PACAINITDATA(94, 0, NULL, 0, 0), + PACAINITDATA(95, 0, NULL, 0, 0), + PACAINITDATA(96, 0, NULL, 0, 0), + PACAINITDATA(97, 0, NULL, 0, 0), + PACAINITDATA(98, 0, NULL, 0, 0), + PACAINITDATA(99, 0, NULL, 0, 0), + PACAINITDATA(100, 0, NULL, 0, 0), + PACAINITDATA(101, 0, NULL, 0, 0), + PACAINITDATA(102, 0, NULL, 0, 0), + PACAINITDATA(103, 0, NULL, 0, 0), + PACAINITDATA(104, 0, NULL, 0, 0), + PACAINITDATA(105, 0, NULL, 0, 0), + PACAINITDATA(106, 0, NULL, 0, 0), + PACAINITDATA(107, 0, NULL, 0, 0), + PACAINITDATA(108, 0, NULL, 0, 0), + PACAINITDATA(109, 0, NULL, 0, 0), + PACAINITDATA(110, 0, NULL, 0, 0), + PACAINITDATA(111, 0, NULL, 0, 0), + PACAINITDATA(112, 0, NULL, 0, 0), + PACAINITDATA(113, 0, NULL, 0, 0), + PACAINITDATA(114, 0, NULL, 0, 0), + PACAINITDATA(115, 0, NULL, 0, 0), + PACAINITDATA(116, 0, NULL, 0, 0), + PACAINITDATA(117, 0, NULL, 0, 0), + PACAINITDATA(118, 0, NULL, 0, 0), + PACAINITDATA(119, 0, NULL, 0, 0), + PACAINITDATA(120, 0, NULL, 0, 0), + PACAINITDATA(121, 0, NULL, 0, 0), + PACAINITDATA(122, 0, NULL, 0, 0), + PACAINITDATA(123, 0, NULL, 0, 0), + PACAINITDATA(124, 0, NULL, 0, 0), + PACAINITDATA(125, 0, NULL, 0, 0), + PACAINITDATA(126, 0, NULL, 0, 0), + PACAINITDATA(127, 0, NULL, 0, 0), +#endif +#endif +#endif +#endif +#endif +}; +EXPORT_SYMBOL(paca); diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c new file mode 100644 index 00000000000..fdd8f7869a6 --- /dev/null +++ b/arch/ppc64/kernel/pci.c @@ -0,0 +1,942 @@ +/* + * Port for PPC64 David Engebretsen, IBM Corp. + * Contains common pci routines for ppc64 platform, pSeries and iSeries brands. + * + * Copyright (C) 2003 Anton Blanchard , IBM + * Rework, based on alpha PCI code. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pci.h" + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +unsigned long pci_probe_only = 1; +unsigned long pci_assign_all_buses = 0; + +/* + * legal IO pages under MAX_ISA_PORT. This is to ensure we don't touch + * devices we don't have access to. + */ +unsigned long io_page_mask; + +EXPORT_SYMBOL(io_page_mask); + + +unsigned int pcibios_assign_all_busses(void) +{ + return pci_assign_all_buses; +} + +/* pci_io_base -- the base address from which io bars are offsets. + * This is the lowest I/O base address (so bar values are always positive), + * and it *must* be the start of ISA space if an ISA bus exists because + * ISA drivers use hard coded offsets. If no ISA bus exists a dummy + * page is mapped and isa_io_limit prevents access to it. + */ +unsigned long isa_io_base; /* NULL if no ISA bus */ +EXPORT_SYMBOL(isa_io_base); +unsigned long pci_io_base; +EXPORT_SYMBOL(pci_io_base); + +void iSeries_pcibios_init(void); + +LIST_HEAD(hose_list); + +struct dma_mapping_ops pci_dma_ops; +EXPORT_SYMBOL(pci_dma_ops); + +int global_phb_number; /* Global phb counter */ + +/* Cached ISA bridge dev. */ +struct pci_dev *ppc64_isabridge_dev = NULL; + +static void fixup_broken_pcnet32(struct pci_dev* dev) +{ + if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) { + dev->vendor = PCI_VENDOR_ID_AMD; + pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD); + pci_name_device(dev); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32); + +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + unsigned long offset = 0; + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + if (!hose) + return; + + if (res->flags & IORESOURCE_IO) + offset = (unsigned long)hose->io_base_virt - pci_io_base; + + if (res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; + + region->start = res->start - offset; + region->end = res->end - offset; +} + +#ifdef CONFIG_HOTPLUG +EXPORT_SYMBOL(pcibios_resource_to_bus); +#endif + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + * + * Why? Because some silly external IO cards only decode + * the low 10 bits of the IO address. The 0x00-0xff region + * is reserved for motherboard devices that decode all 16 + * bits, so it's ok to allocate at, say, 0x2800-0x28ff, + * but we want to try to avoid allocating at 0x2900-0x2bff + * which might have be mirrored at 0x0100-0x03ff.. + */ +void pcibios_align_resource(void *data, struct resource *res, + unsigned long size, unsigned long align) +{ + struct pci_dev *dev = data; + struct pci_controller *hose = pci_bus_to_host(dev->bus); + unsigned long start = res->start; + unsigned long alignto; + + if (res->flags & IORESOURCE_IO) { + unsigned long offset = (unsigned long)hose->io_base_virt - + pci_io_base; + /* Make sure we start at our min on all hoses */ + if (start - offset < PCIBIOS_MIN_IO) + start = PCIBIOS_MIN_IO + offset; + + /* + * Put everything into 0x00-0xff region modulo 0x400 + */ + if (start & 0x300) + start = (start + 0x3ff) & ~0x3ff; + + } else if (res->flags & IORESOURCE_MEM) { + /* Make sure we start at our min on all hoses */ + if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM) + start = PCIBIOS_MIN_MEM + hose->pci_mem_offset; + + /* Align to multiple of size of minimum base. */ + alignto = max(0x1000UL, align); + start = ALIGN(start, alignto); + } + + res->start = start; +} + +static DEFINE_SPINLOCK(hose_spinlock); + +/* + * pci_controller(phb) initialized common variables. + */ +void __devinit pci_setup_pci_controller(struct pci_controller *hose) +{ + memset(hose, 0, sizeof(struct pci_controller)); + + spin_lock(&hose_spinlock); + hose->global_number = global_phb_number++; + list_add_tail(&hose->list_node, &hose_list); + spin_unlock(&hose_spinlock); +} + +static void __init pcibios_claim_one_bus(struct pci_bus *b) +{ + struct pci_dev *dev; + struct pci_bus *child_bus; + + list_for_each_entry(dev, &b->devices, bus_list) { + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *r = &dev->resource[i]; + + if (r->parent || !r->start || !r->flags) + continue; + pci_claim_resource(dev, i); + } + } + + list_for_each_entry(child_bus, &b->children, node) + pcibios_claim_one_bus(child_bus); +} + +#ifndef CONFIG_PPC_ISERIES +static void __init pcibios_claim_of_setup(void) +{ + struct pci_bus *b; + + list_for_each_entry(b, &pci_root_buses, node) + pcibios_claim_one_bus(b); +} +#endif + +static int __init pcibios_init(void) +{ + struct pci_controller *hose, *tmp; + struct pci_bus *bus; + + /* For now, override phys_mem_access_prot. If we need it, + * later, we may move that initialization to each ppc_md + */ + ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot; + +#ifdef CONFIG_PPC_ISERIES + iSeries_pcibios_init(); +#endif + + printk("PCI: Probing PCI hardware\n"); + + /* Scan all of the recorded PCI controllers. */ + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + hose->last_busno = 0xff; + bus = pci_scan_bus(hose->first_busno, hose->ops, + hose->arch_data); + hose->bus = bus; + hose->last_busno = bus->subordinate; + } + +#ifndef CONFIG_PPC_ISERIES + if (pci_probe_only) + pcibios_claim_of_setup(); + else + /* FIXME: `else' will be removed when + pci_assign_unassigned_resources() is able to work + correctly with [partially] allocated PCI tree. */ + pci_assign_unassigned_resources(); +#endif /* !CONFIG_PPC_ISERIES */ + + /* Call machine dependent final fixup */ + if (ppc_md.pcibios_fixup) + ppc_md.pcibios_fixup(); + + /* Cache the location of the ISA bridge (if we have one) */ + ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); + if (ppc64_isabridge_dev != NULL) + printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev)); + + printk("PCI: Probing PCI hardware done\n"); + + return 0; +} + +subsys_initcall(pcibios_init); + +char __init *pcibios_setup(char *str) +{ + return str; +} + +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, oldcmd; + int i; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + oldcmd = cmd; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + + /* Only set up the requested stuff */ + if (!(mask & (1<flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (res->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + + if (cmd != oldcmd) { + printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n", + pci_name(dev), cmd); + /* Enable the appropriate bits in the PCI command register. */ + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +/* + * Return the domain number for this bus. + */ +int pci_domain_nr(struct pci_bus *bus) +{ +#ifdef CONFIG_PPC_ISERIES + return 0; +#else + struct pci_controller *hose = pci_bus_to_host(bus); + + return hose->global_number; +#endif +} + +EXPORT_SYMBOL(pci_domain_nr); + +/* Decide whether to display the domain number in /proc */ +int pci_proc_domain(struct pci_bus *bus) +{ +#ifdef CONFIG_PPC_ISERIES + return 0; +#else + struct pci_controller *hose = pci_bus_to_host(bus); + return hose->buid; +#endif +} + +/* + * Platform support for /proc/bus/pci/X/Y mmap()s, + * modelled on the sparc64 implementation by Dave Miller. + * -- paulus. + */ + +/* + * Adjust vm_pgoff of VMA such that it is the physical page offset + * corresponding to the 32-bit pci bus offset for DEV requested by the user. + * + * Basically, the user finds the base address for his device which he wishes + * to mmap. They read the 32-bit value from the config space base register, + * add whatever PAGE_SIZE multiple offset they wish, and feed this into the + * offset parameter of mmap on /proc/bus/pci/XXX for that device. + * + * Returns negative error code on failure, zero on success. + */ +static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, + unsigned long *offset, + enum pci_mmap_state mmap_state) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + unsigned long io_offset = 0; + int i, res_bit; + + if (hose == 0) + return NULL; /* should never happen */ + + /* If memory, add on the PCI bridge address offset */ + if (mmap_state == pci_mmap_mem) { + *offset += hose->pci_mem_offset; + res_bit = IORESOURCE_MEM; + } else { + io_offset = (unsigned long)hose->io_base_virt; + *offset += io_offset; + res_bit = IORESOURCE_IO; + } + + /* + * Check that the offset requested corresponds to one of the + * resources of the device. + */ + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + struct resource *rp = &dev->resource[i]; + int flags = rp->flags; + + /* treat ROM as memory (should be already) */ + if (i == PCI_ROM_RESOURCE) + flags |= IORESOURCE_MEM; + + /* Active and same type? */ + if ((flags & res_bit) == 0) + continue; + + /* In the range of this resource? */ + if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end) + continue; + + /* found it! construct the final physical address */ + if (mmap_state == pci_mmap_io) + *offset += hose->io_base_phys - io_offset; + return rp; + } + + return NULL; +} + +/* + * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci + * device mapping. + */ +static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, + pgprot_t protection, + enum pci_mmap_state mmap_state, + int write_combine) +{ + unsigned long prot = pgprot_val(protection); + + /* Write combine is always 0 on non-memory space mappings. On + * memory space, if the user didn't pass 1, we check for a + * "prefetchable" resource. This is a bit hackish, but we use + * this to workaround the inability of /sysfs to provide a write + * combine bit + */ + if (mmap_state != pci_mmap_mem) + write_combine = 0; + else if (write_combine == 0) { + if (rp->flags & IORESOURCE_PREFETCH) + write_combine = 1; + } + + /* XXX would be nice to have a way to ask for write-through */ + prot |= _PAGE_NO_CACHE; + if (write_combine) + prot &= ~_PAGE_GUARDED; + else + prot |= _PAGE_GUARDED; + + printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start, + prot); + + return __pgprot(prot); +} + +/* + * This one is used by /dev/mem and fbdev who have no clue about the + * PCI device, it tries to find the PCI device first and calls the + * above routine + */ +pgprot_t pci_phys_mem_access_prot(struct file *file, + unsigned long offset, + unsigned long size, + pgprot_t protection) +{ + struct pci_dev *pdev = NULL; + struct resource *found = NULL; + unsigned long prot = pgprot_val(protection); + int i; + + if (page_is_ram(offset >> PAGE_SHIFT)) + return prot; + + prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; + + for_each_pci_dev(pdev) { + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + struct resource *rp = &pdev->resource[i]; + int flags = rp->flags; + + /* Active and same type? */ + if ((flags & IORESOURCE_MEM) == 0) + continue; + /* In the range of this resource? */ + if (offset < (rp->start & PAGE_MASK) || + offset > rp->end) + continue; + found = rp; + break; + } + if (found) + break; + } + if (found) { + if (found->flags & IORESOURCE_PREFETCH) + prot &= ~_PAGE_GUARDED; + pci_dev_put(pdev); + } + + DBG("non-PCI map for %lx, prot: %lx\n", offset, prot); + + return __pgprot(prot); +} + + +/* + * Perform the actual remap of the pages for a PCI device mapping, as + * appropriate for this architecture. The region in the process to map + * is described by vm_start and vm_end members of VMA, the base physical + * address is found in vm_pgoff. + * The pci device structure is provided so that architectures may make mapping + * decisions on a per-device or per-bus basis. + * + * Returns a negative error code on failure, zero on success. + */ +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, + int write_combine) +{ + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + struct resource *rp; + int ret; + + rp = __pci_mmap_make_offset(dev, &offset, mmap_state); + if (rp == NULL) + return -EINVAL; + + vma->vm_pgoff = offset >> PAGE_SHIFT; + vma->vm_flags |= VM_SHM | VM_LOCKED | VM_IO; + vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp, + vma->vm_page_prot, + mmap_state, write_combine); + + ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot); + + return ret; +} + +#ifdef CONFIG_PPC_MULTIPLATFORM +static ssize_t pci_show_devspec(struct device *dev, char *buf) +{ + struct pci_dev *pdev; + struct device_node *np; + + pdev = to_pci_dev (dev); + np = pci_device_to_OF_node(pdev); + if (np == NULL || np->full_name == NULL) + return 0; + return sprintf(buf, "%s", np->full_name); +} +static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL); +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +void pcibios_add_platform_entries(struct pci_dev *pdev) +{ +#ifdef CONFIG_PPC_MULTIPLATFORM + device_create_file(&pdev->dev, &dev_attr_devspec); +#endif /* CONFIG_PPC_MULTIPLATFORM */ +} + +#ifdef CONFIG_PPC_MULTIPLATFORM + +#define ISA_SPACE_MASK 0x1 +#define ISA_SPACE_IO 0x1 + +static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node, + unsigned long phb_io_base_phys, + void __iomem * phb_io_base_virt) +{ + struct isa_range *range; + unsigned long pci_addr; + unsigned int isa_addr; + unsigned int size; + int rlen = 0; + + range = (struct isa_range *) get_property(isa_node, "ranges", &rlen); + if (range == NULL || (rlen < sizeof(struct isa_range))) { + printk(KERN_ERR "no ISA ranges or unexpected isa range size," + "mapping 64k\n"); + __ioremap_explicit(phb_io_base_phys, (unsigned long)phb_io_base_virt, + 0x10000, _PAGE_NO_CACHE); + return; + } + + /* From "ISA Binding to 1275" + * The ranges property is laid out as an array of elements, + * each of which comprises: + * cells 0 - 1: an ISA address + * cells 2 - 4: a PCI address + * (size depending on dev->n_addr_cells) + * cell 5: the size of the range + */ + if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) { + isa_addr = range->isa_addr.a_lo; + pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | + range->pci_addr.a_lo; + + /* Assume these are both zero */ + if ((pci_addr != 0) || (isa_addr != 0)) { + printk(KERN_ERR "unexpected isa to pci mapping: %s\n", + __FUNCTION__); + return; + } + + size = PAGE_ALIGN(range->size); + + __ioremap_explicit(phb_io_base_phys, + (unsigned long) phb_io_base_virt, + size, _PAGE_NO_CACHE); + } +} + +void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose, + struct device_node *dev) +{ + unsigned int *ranges; + unsigned long size; + int rlen = 0; + int memno = 0; + struct resource *res; + int np, na = prom_n_addr_cells(dev); + unsigned long pci_addr, cpu_phys_addr; + + np = na + 5; + + /* From "PCI Binding to 1275" + * The ranges property is laid out as an array of elements, + * each of which comprises: + * cells 0 - 2: a PCI address + * cells 3 or 3+4: a CPU physical address + * (size depending on dev->n_addr_cells) + * cells 4+5 or 5+6: the size of the range + */ + rlen = 0; + hose->io_base_phys = 0; + ranges = (unsigned int *) get_property(dev, "ranges", &rlen); + while ((rlen -= np * sizeof(unsigned int)) >= 0) { + res = NULL; + pci_addr = (unsigned long)ranges[1] << 32 | ranges[2]; + + cpu_phys_addr = ranges[3]; + if (na == 2) + cpu_phys_addr = cpu_phys_addr << 32 | ranges[4]; + + size = (unsigned long)ranges[na+3] << 32 | ranges[na+4]; + if (size == 0) + continue; + switch ((ranges[0] >> 24) & 0x3) { + case 1: /* I/O space */ + hose->io_base_phys = cpu_phys_addr; + hose->pci_io_size = size; + + res = &hose->io_resource; + res->flags = IORESOURCE_IO; + res->start = pci_addr; + DBG("phb%d: IO 0x%lx -> 0x%lx\n", hose->global_number, + res->start, res->start + size - 1); + break; + case 2: /* memory space */ + memno = 0; + while (memno < 3 && hose->mem_resources[memno].flags) + ++memno; + + if (memno == 0) + hose->pci_mem_offset = cpu_phys_addr - pci_addr; + if (memno < 3) { + res = &hose->mem_resources[memno]; + res->flags = IORESOURCE_MEM; + res->start = cpu_phys_addr; + DBG("phb%d: MEM 0x%lx -> 0x%lx\n", hose->global_number, + res->start, res->start + size - 1); + } + break; + } + if (res != NULL) { + res->name = dev->full_name; + res->end = res->start + size - 1; + res->parent = NULL; + res->sibling = NULL; + res->child = NULL; + } + ranges += np; + } +} + +void __init pci_setup_phb_io(struct pci_controller *hose, int primary) +{ + unsigned long size = hose->pci_io_size; + unsigned long io_virt_offset; + struct resource *res; + struct device_node *isa_dn; + + hose->io_base_virt = reserve_phb_iospace(size); + DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n", + hose->global_number, hose->io_base_phys, + (unsigned long) hose->io_base_virt); + + if (primary) { + pci_io_base = (unsigned long)hose->io_base_virt; + isa_dn = of_find_node_by_type(NULL, "isa"); + if (isa_dn) { + isa_io_base = pci_io_base; + pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys, + hose->io_base_virt); + of_node_put(isa_dn); + /* Allow all IO */ + io_page_mask = -1; + } + } + + io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base; + res = &hose->io_resource; + res->start += io_virt_offset; + res->end += io_virt_offset; +} + +void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose, + int primary) +{ + unsigned long size = hose->pci_io_size; + unsigned long io_virt_offset; + struct resource *res; + + hose->io_base_virt = __ioremap(hose->io_base_phys, size, + _PAGE_NO_CACHE); + DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n", + hose->global_number, hose->io_base_phys, + (unsigned long) hose->io_base_virt); + + if (primary) + pci_io_base = (unsigned long)hose->io_base_virt; + + io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base; + res = &hose->io_resource; + res->start += io_virt_offset; + res->end += io_virt_offset; +} + + +static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys, + unsigned long *start_virt, unsigned long *size) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + struct pci_bus_region region; + struct resource *res; + + if (bus->self) { + res = bus->resource[0]; + pcibios_resource_to_bus(bus->self, ®ion, res); + *start_phys = hose->io_base_phys + region.start; + *start_virt = (unsigned long) hose->io_base_virt + + region.start; + if (region.end > region.start) + *size = region.end - region.start + 1; + else { + printk("%s(): unexpected region 0x%lx->0x%lx\n", + __FUNCTION__, region.start, region.end); + return 1; + } + + } else { + /* Root Bus */ + res = &hose->io_resource; + *start_phys = hose->io_base_phys; + *start_virt = (unsigned long) hose->io_base_virt; + if (res->end > res->start) + *size = res->end - res->start + 1; + else { + printk("%s(): unexpected region 0x%lx->0x%lx\n", + __FUNCTION__, res->start, res->end); + return 1; + } + } + + return 0; +} + +int unmap_bus_range(struct pci_bus *bus) +{ + unsigned long start_phys; + unsigned long start_virt; + unsigned long size; + + if (!bus) { + printk(KERN_ERR "%s() expected bus\n", __FUNCTION__); + return 1; + } + + if (get_bus_io_range(bus, &start_phys, &start_virt, &size)) + return 1; + if (iounmap_explicit((void __iomem *) start_virt, size)) + return 1; + + return 0; +} +EXPORT_SYMBOL(unmap_bus_range); + +int remap_bus_range(struct pci_bus *bus) +{ + unsigned long start_phys; + unsigned long start_virt; + unsigned long size; + + if (!bus) { + printk(KERN_ERR "%s() expected bus\n", __FUNCTION__); + return 1; + } + + + if (get_bus_io_range(bus, &start_phys, &start_virt, &size)) + return 1; + printk("mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size); + if (__ioremap_explicit(start_phys, start_virt, size, _PAGE_NO_CACHE)) + return 1; + + return 0; +} +EXPORT_SYMBOL(remap_bus_range); + +void phbs_remap_io(void) +{ + struct pci_controller *hose, *tmp; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) + remap_bus_range(hose->bus); +} + +/* + * ppc64 can have multifunction devices that do not respond to function 0. + * In this case we must scan all functions. + */ +int pcibios_scan_all_fns(struct pci_bus *bus, int devfn) +{ + struct device_node *busdn, *dn; + + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = bus->sysdata; /* must be a phb */ + + if (busdn == NULL) + return 0; + + /* + * Check to see if there is any of the 8 functions are in the + * device tree. If they are then we need to scan all the + * functions of this slot. + */ + for (dn = busdn->child; dn; dn = dn->sibling) + if ((dn->devfn >> 3) == (devfn >> 3)) + return 1; + + return 0; +} + + +void __devinit pcibios_fixup_device_resources(struct pci_dev *dev, + struct pci_bus *bus) +{ + /* Update device resources. */ + struct pci_controller *hose = pci_bus_to_host(bus); + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + if (dev->resource[i].flags & IORESOURCE_IO) { + unsigned long offset = (unsigned long)hose->io_base_virt + - pci_io_base; + unsigned long start, end, mask; + + start = dev->resource[i].start += offset; + end = dev->resource[i].end += offset; + + /* Need to allow IO access to pages that are in the + ISA range */ + if (start < MAX_ISA_PORT) { + if (end > MAX_ISA_PORT) + end = MAX_ISA_PORT; + + start >>= PAGE_SHIFT; + end >>= PAGE_SHIFT; + + /* get the range of pages for the map */ + mask = ((1 << (end+1))-1) ^ ((1 << start)-1); + io_page_mask |= mask; + } + } + else if (dev->resource[i].flags & IORESOURCE_MEM) { + dev->resource[i].start += hose->pci_mem_offset; + dev->resource[i].end += hose->pci_mem_offset; + } + } +} +EXPORT_SYMBOL(pcibios_fixup_device_resources); + +void __devinit pcibios_fixup_bus(struct pci_bus *bus) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + struct pci_dev *dev = bus->self; + struct resource *res; + int i; + + if (!dev) { + /* Root bus. */ + + hose->bus = bus; + bus->resource[0] = res = &hose->io_resource; + + if (res->flags && request_resource(&ioport_resource, res)) + printk(KERN_ERR "Failed to request IO on " + "PCI domain %d\n", pci_domain_nr(bus)); + + for (i = 0; i < 3; ++i) { + res = &hose->mem_resources[i]; + bus->resource[i+1] = res; + if (res->flags && request_resource(&iomem_resource, res)) + printk(KERN_ERR "Failed to request MEM on " + "PCI domain %d\n", + pci_domain_nr(bus)); + } + } else if (pci_probe_only && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + /* This is a subordinate bridge */ + + pci_read_bridge_bases(bus); + pcibios_fixup_device_resources(dev, bus); + } + + ppc_md.iommu_bus_setup(bus); + + list_for_each_entry(dev, &bus->devices, bus_list) + ppc_md.iommu_dev_setup(dev); + + if (!pci_probe_only) + return; + + list_for_each_entry(dev, &bus->devices, bus_list) { + if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) + pcibios_fixup_device_resources(dev, bus); + } +} +EXPORT_SYMBOL(pcibios_fixup_bus); + +/* + * Reads the interrupt pin to determine if interrupt is use by card. + * If the interrupt is used, then gets the interrupt line from the + * openfirmware and sets it in the pci_dev and pci_config line. + */ +int pci_read_irq_line(struct pci_dev *pci_dev) +{ + u8 intpin; + struct device_node *node; + + pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin); + if (intpin == 0) + return 0; + + node = pci_device_to_OF_node(pci_dev); + if (node == NULL) + return -1; + + if (node->n_intrs == 0) + return -1; + + pci_dev->irq = node->intrs[0].line; + + pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq); + + return 0; +} +EXPORT_SYMBOL(pci_read_irq_line); + +#endif /* CONFIG_PPC_MULTIPLATFORM */ diff --git a/arch/ppc64/kernel/pci.h b/arch/ppc64/kernel/pci.h new file mode 100644 index 00000000000..0fd7d849aa7 --- /dev/null +++ b/arch/ppc64/kernel/pci.h @@ -0,0 +1,51 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef __PPC_KERNEL_PCI_H__ +#define __PPC_KERNEL_PCI_H__ + +#include +#include + +extern unsigned long isa_io_base; + +extern void pci_setup_pci_controller(struct pci_controller *hose); +extern void pci_setup_phb_io(struct pci_controller *hose, int primary); +extern void pci_setup_phb_io_dynamic(struct pci_controller *hose, int primary); + + +extern struct list_head hose_list; +extern int global_phb_number; + +extern unsigned long find_and_init_phbs(void); + +extern struct pci_dev *ppc64_isabridge_dev; /* may be NULL if no ISA bus */ + +/* PCI device_node operations */ +struct device_node; +typedef void *(*traverse_func)(struct device_node *me, void *data); +void *traverse_pci_devices(struct device_node *start, traverse_func pre, + void *data); + +void pci_devs_phb_init(void); +void pci_devs_phb_init_dynamic(struct pci_controller *phb); +struct device_node *fetch_dev_dn(struct pci_dev *dev); + +/* PCI address cache management routines */ +void pci_addr_cache_insert_device(struct pci_dev *dev); +void pci_addr_cache_remove_device(struct pci_dev *dev); + +/* From pSeries_pci.h */ +void init_pci_config_tokens (void); +unsigned long get_phb_buid (struct device_node *); + +extern unsigned long pci_probe_only; +extern unsigned long pci_assign_all_buses; +extern int pci_read_irq_line(struct pci_dev *pci_dev); + +#endif /* __PPC_KERNEL_PCI_H__ */ diff --git a/arch/ppc64/kernel/pci_direct_iommu.c b/arch/ppc64/kernel/pci_direct_iommu.c new file mode 100644 index 00000000000..b8f7f58824f --- /dev/null +++ b/arch/ppc64/kernel/pci_direct_iommu.c @@ -0,0 +1,95 @@ +/* + * Support for DMA from PCI devices to main memory on + * machines without an iommu or with directly addressable + * RAM (typically a pmac with 2Gb of RAM or less) + * + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "pci.h" + +static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, unsigned int __nocast flag) +{ + void *ret; + + ret = (void *)__get_free_pages(flag, get_order(size)); + if (ret != NULL) { + memset(ret, 0, size); + *dma_handle = virt_to_abs(ret); + } + return ret; +} + +static void pci_direct_free_coherent(struct device *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + free_pages((unsigned long)vaddr, get_order(size)); +} + +static dma_addr_t pci_direct_map_single(struct device *hwdev, void *ptr, + size_t size, enum dma_data_direction direction) +{ + return virt_to_abs(ptr); +} + +static void pci_direct_unmap_single(struct device *hwdev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction direction) +{ +} + +static int pci_direct_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + int i; + + for (i = 0; i < nents; i++, sg++) { + sg->dma_address = page_to_phys(sg->page) + sg->offset; + sg->dma_length = sg->length; + } + + return nents; +} + +static void pci_direct_unmap_sg(struct device *hwdev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ +} + +static int pci_direct_dma_supported(struct device *dev, u64 mask) +{ + return mask < 0x100000000ull; +} + +void __init pci_direct_iommu_init(void) +{ + pci_dma_ops.alloc_coherent = pci_direct_alloc_coherent; + pci_dma_ops.free_coherent = pci_direct_free_coherent; + pci_dma_ops.map_single = pci_direct_map_single; + pci_dma_ops.unmap_single = pci_direct_unmap_single; + pci_dma_ops.map_sg = pci_direct_map_sg; + pci_dma_ops.unmap_sg = pci_direct_unmap_sg; + pci_dma_ops.dma_supported = pci_direct_dma_supported; +} diff --git a/arch/ppc64/kernel/pci_dn.c b/arch/ppc64/kernel/pci_dn.c new file mode 100644 index 00000000000..ec345462afc --- /dev/null +++ b/arch/ppc64/kernel/pci_dn.c @@ -0,0 +1,198 @@ +/* + * pci_dn.c + * + * Copyright (C) 2001 Todd Inglett, IBM Corporation + * + * PCI manipulation via device_nodes. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "pci.h" + +/* + * Traverse_func that inits the PCI fields of the device node. + * NOTE: this *must* be done before read/write config to the device. + */ +static void * __devinit update_dn_pci_info(struct device_node *dn, void *data) +{ + struct pci_controller *phb = data; + int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL); + u32 *regs; + + dn->phb = phb; + regs = (u32 *)get_property(dn, "reg", NULL); + if (regs) { + /* First register entry is addr (00BBSS00) */ + dn->busno = (regs[0] >> 16) & 0xff; + dn->devfn = (regs[0] >> 8) & 0xff; + } + + dn->pci_ext_config_space = (type && *type == 1); + return NULL; +} + +/* + * Traverse a device tree stopping each PCI device in the tree. + * This is done depth first. As each node is processed, a "pre" + * function is called and the children are processed recursively. + * + * The "pre" func returns a value. If non-zero is returned from + * the "pre" func, the traversal stops and this value is returned. + * This return value is useful when using traverse as a method of + * finding a device. + * + * NOTE: we do not run the func for devices that do not appear to + * be PCI except for the start node which we assume (this is good + * because the start node is often a phb which may be missing PCI + * properties). + * We use the class-code as an indicator. If we run into + * one of these nodes we also assume its siblings are non-pci for + * performance. + */ +void *traverse_pci_devices(struct device_node *start, traverse_func pre, + void *data) +{ + struct device_node *dn, *nextdn; + void *ret; + + /* We started with a phb, iterate all childs */ + for (dn = start->child; dn; dn = nextdn) { + u32 *classp, class; + + nextdn = NULL; + classp = (u32 *)get_property(dn, "class-code", NULL); + class = classp ? *classp : 0; + + if (pre && ((ret = pre(dn, data)) != NULL)) + return ret; + + /* If we are a PCI bridge, go down */ + if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI || + (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)) + /* Depth first...do children */ + nextdn = dn->child; + else if (dn->sibling) + /* ok, try next sibling instead. */ + nextdn = dn->sibling; + if (!nextdn) { + /* Walk up to next valid sibling. */ + do { + dn = dn->parent; + if (dn == start) + return NULL; + } while (dn->sibling == NULL); + nextdn = dn->sibling; + } + } + return NULL; +} + +void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) +{ + struct device_node * dn = (struct device_node *) phb->arch_data; + + /* PHB nodes themselves must not match */ + dn->devfn = dn->busno = -1; + dn->phb = phb; + + /* Update dn->phb ptrs for new phb and children devices */ + traverse_pci_devices(dn, update_dn_pci_info, phb); +} + +/* + * Traversal func that looks for a value. + * If found, the device_node is returned (thus terminating the traversal). + */ +static void *is_devfn_node(struct device_node *dn, void *data) +{ + int busno = ((unsigned long)data >> 8) & 0xff; + int devfn = ((unsigned long)data) & 0xff; + + return ((devfn == dn->devfn) && (busno == dn->busno)) ? dn : NULL; +} + +/* + * This is the "slow" path for looking up a device_node from a + * pci_dev. It will hunt for the device under its parent's + * phb and then update sysdata for a future fastpath. + * + * It may also do fixups on the actual device since this happens + * on the first read/write. + * + * Note that it also must deal with devices that don't exist. + * In this case it may probe for real hardware ("just in case") + * and add a device_node to the device tree if necessary. + * + */ +struct device_node *fetch_dev_dn(struct pci_dev *dev) +{ + struct device_node *orig_dn = dev->sysdata; + struct pci_controller *phb = orig_dn->phb; /* assume same phb as orig_dn */ + struct device_node *phb_dn; + struct device_node *dn; + unsigned long searchval = (dev->bus->number << 8) | dev->devfn; + + phb_dn = phb->arch_data; + dn = traverse_pci_devices(phb_dn, is_devfn_node, (void *)searchval); + if (dn) + dev->sysdata = dn; + return dn; +} +EXPORT_SYMBOL(fetch_dev_dn); + +static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) +{ + struct device_node *np = node; + int err = NOTIFY_OK; + + switch (action) { + case PSERIES_RECONFIG_ADD: + update_dn_pci_info(np, np->parent->phb); + break; + default: + err = NOTIFY_DONE; + break; + } + return err; +} + +static struct notifier_block pci_dn_reconfig_nb = { + .notifier_call = pci_dn_reconfig_notifier, +}; + +/* + * Actually initialize the phbs. + * The buswalk on this phb has not happened yet. + */ +void __init pci_devs_phb_init(void) +{ + struct pci_controller *phb, *tmp; + + /* This must be done first so the device nodes have valid pci info! */ + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) + pci_devs_phb_init_dynamic(phb); + + pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); +} diff --git a/arch/ppc64/kernel/pci_iommu.c b/arch/ppc64/kernel/pci_iommu.c new file mode 100644 index 00000000000..ef0a62b916b --- /dev/null +++ b/arch/ppc64/kernel/pci_iommu.c @@ -0,0 +1,139 @@ +/* + * arch/ppc64/kernel/pci_iommu.c + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * + * Rewrite, cleanup, new allocation schemes: + * Copyright (C) 2004 Olof Johansson, IBM Corporation + * + * Dynamic DMA mapping support, platform-independent parts. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pci.h" + +#ifdef CONFIG_PPC_ISERIES +#include +#endif /* CONFIG_PPC_ISERIES */ + +/* + * We can use ->sysdata directly and avoid the extra work in + * pci_device_to_OF_node since ->sysdata will have been initialised + * in the iommu init code for all devices. + */ +#define PCI_GET_DN(dev) ((struct device_node *)((dev)->sysdata)) + +static inline struct iommu_table *devnode_table(struct device *dev) +{ + struct pci_dev *pdev; + + if (!dev) { + pdev = ppc64_isabridge_dev; + if (!pdev) + return NULL; + } else + pdev = to_pci_dev(dev); + +#ifdef CONFIG_PPC_ISERIES + return ISERIES_DEVNODE(pdev)->iommu_table; +#endif /* CONFIG_PPC_ISERIES */ + +#ifdef CONFIG_PPC_MULTIPLATFORM + return PCI_GET_DN(pdev)->iommu_table; +#endif /* CONFIG_PPC_MULTIPLATFORM */ +} + + +/* Allocates a contiguous real buffer and creates mappings over it. + * Returns the virtual address of the buffer and sets dma_handle + * to the dma address (mapping) of the first page. + */ +static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, unsigned int __nocast flag) +{ + return iommu_alloc_coherent(devnode_table(hwdev), size, dma_handle, + flag); +} + +static void pci_iommu_free_coherent(struct device *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + iommu_free_coherent(devnode_table(hwdev), size, vaddr, dma_handle); +} + +/* Creates TCEs for a user provided buffer. The user buffer must be + * contiguous real kernel storage (not vmalloc). The address of the buffer + * passed here is the kernel (virtual) address of the buffer. The buffer + * need not be page aligned, the dma_addr_t returned will point to the same + * byte within the page as vaddr. + */ +static dma_addr_t pci_iommu_map_single(struct device *hwdev, void *vaddr, + size_t size, enum dma_data_direction direction) +{ + return iommu_map_single(devnode_table(hwdev), vaddr, size, direction); +} + + +static void pci_iommu_unmap_single(struct device *hwdev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + iommu_unmap_single(devnode_table(hwdev), dma_handle, size, direction); +} + + +static int pci_iommu_map_sg(struct device *pdev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + return iommu_map_sg(pdev, devnode_table(pdev), sglist, + nelems, direction); +} + +static void pci_iommu_unmap_sg(struct device *pdev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + iommu_unmap_sg(devnode_table(pdev), sglist, nelems, direction); +} + +/* We support DMA to/from any memory page via the iommu */ +static int pci_iommu_dma_supported(struct device *dev, u64 mask) +{ + return 1; +} + +void pci_iommu_init(void) +{ + pci_dma_ops.alloc_coherent = pci_iommu_alloc_coherent; + pci_dma_ops.free_coherent = pci_iommu_free_coherent; + pci_dma_ops.map_single = pci_iommu_map_single; + pci_dma_ops.unmap_single = pci_iommu_unmap_single; + pci_dma_ops.map_sg = pci_iommu_map_sg; + pci_dma_ops.unmap_sg = pci_iommu_unmap_sg; + pci_dma_ops.dma_supported = pci_iommu_dma_supported; +} diff --git a/arch/ppc64/kernel/pmac.h b/arch/ppc64/kernel/pmac.h new file mode 100644 index 00000000000..40e1c5030f7 --- /dev/null +++ b/arch/ppc64/kernel/pmac.h @@ -0,0 +1,31 @@ +#ifndef __PMAC_H__ +#define __PMAC_H__ + +#include +#include + +/* + * Declaration for the various functions exported by the + * pmac_* files. Mostly for use by pmac_setup + */ + +extern void pmac_get_boot_time(struct rtc_time *tm); +extern void pmac_get_rtc_time(struct rtc_time *tm); +extern int pmac_set_rtc_time(struct rtc_time *tm); +extern void pmac_read_rtc_time(void); +extern void pmac_calibrate_decr(void); + +extern void pmac_pcibios_fixup(void); +extern void pmac_pci_init(void); +extern void pmac_setup_pci_dma(void); +extern void pmac_check_ht_link(void); + +extern void pmac_setup_smp(void); + +extern unsigned long pmac_ide_get_base(int index); +extern void pmac_ide_init_hwif_ports(hw_regs_t *hw, + unsigned long data_port, unsigned long ctrl_port, int *irq); + +extern void pmac_nvram_init(void); + +#endif /* __PMAC_H__ */ diff --git a/arch/ppc64/kernel/pmac_feature.c b/arch/ppc64/kernel/pmac_feature.c new file mode 100644 index 00000000000..7f1062d222c --- /dev/null +++ b/arch/ppc64/kernel/pmac_feature.c @@ -0,0 +1,676 @@ +/* + * arch/ppc/platforms/pmac_feature.c + * + * Copyright (C) 1996-2001 Paul Mackerras (paulus@cs.anu.edu.au) + * Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * TODO: + * + * - Replace mdelay with some schedule loop if possible + * - Shorten some obfuscated delays on some routines (like modem + * power) + * - Refcount some clocks (see darwin) + * - Split split split... + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG_FEATURE + +#ifdef DEBUG_FEATURE +#define DBG(fmt...) printk(KERN_DEBUG fmt) +#else +#define DBG(fmt...) +#endif + +/* + * We use a single global lock to protect accesses. Each driver has + * to take care of its own locking + */ +static DEFINE_SPINLOCK(feature_lock __pmacdata); + +#define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); +#define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); + + +/* + * Instance of some macio stuffs + */ +struct macio_chip macio_chips[MAX_MACIO_CHIPS] __pmacdata; + +struct macio_chip* __pmac +macio_find(struct device_node* child, int type) +{ + while(child) { + int i; + + for (i=0; i < MAX_MACIO_CHIPS && macio_chips[i].of_node; i++) + if (child == macio_chips[i].of_node && + (!type || macio_chips[i].type == type)) + return &macio_chips[i]; + child = child->parent; + } + return NULL; +} + +static const char* macio_names[] __pmacdata = +{ + "Unknown", + "Grand Central", + "OHare", + "OHareII", + "Heathrow", + "Gatwick", + "Paddington", + "Keylargo", + "Pangea", + "Intrepid", + "K2" +}; + + + +/* + * Uninorth reg. access. Note that Uni-N regs are big endian + */ + +#define UN_REG(r) (uninorth_base + ((r) >> 2)) +#define UN_IN(r) (in_be32(UN_REG(r))) +#define UN_OUT(r,v) (out_be32(UN_REG(r), (v))) +#define UN_BIS(r,v) (UN_OUT((r), UN_IN(r) | (v))) +#define UN_BIC(r,v) (UN_OUT((r), UN_IN(r) & ~(v))) + +static struct device_node* uninorth_node __pmacdata; +static u32* uninorth_base __pmacdata; +static u32 uninorth_rev __pmacdata; +static void *u3_ht; + +extern struct device_node *k2_skiplist[2]; + +/* + * For each motherboard family, we have a table of functions pointers + * that handle the various features. + */ + +typedef long (*feature_call)(struct device_node* node, long param, long value); + +struct feature_table_entry { + unsigned int selector; + feature_call function; +}; + +struct pmac_mb_def +{ + const char* model_string; + const char* model_name; + int model_id; + struct feature_table_entry* features; + unsigned long board_flags; +}; +static struct pmac_mb_def pmac_mb __pmacdata; + +/* + * Here are the chip specific feature functions + */ + + +static long __pmac g5_read_gpio(struct device_node* node, long param, long value) +{ + struct macio_chip* macio = &macio_chips[0]; + + return MACIO_IN8(param); +} + + +static long __pmac g5_write_gpio(struct device_node* node, long param, long value) +{ + struct macio_chip* macio = &macio_chips[0]; + + MACIO_OUT8(param, (u8)(value & 0xff)); + return 0; +} + +static long __pmac g5_gmac_enable(struct device_node* node, long param, long value) +{ + struct macio_chip* macio = &macio_chips[0]; + unsigned long flags; + + if (node == NULL) + return -ENODEV; + + LOCK(flags); + if (value) { + MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE); + mb(); + k2_skiplist[0] = NULL; + } else { + k2_skiplist[0] = node; + mb(); + MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE); + } + + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long __pmac g5_fw_enable(struct device_node* node, long param, long value) +{ + struct macio_chip* macio = &macio_chips[0]; + unsigned long flags; + + if (node == NULL) + return -ENODEV; + + LOCK(flags); + if (value) { + MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE); + mb(); + k2_skiplist[1] = NULL; + } else { + k2_skiplist[1] = node; + mb(); + MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE); + } + + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long __pmac g5_mpic_enable(struct device_node* node, long param, long value) +{ + unsigned long flags; + + if (node->parent == NULL || strcmp(node->parent->name, "u3")) + return 0; + + LOCK(flags); + UN_BIS(U3_TOGGLE_REG, U3_MPIC_RESET | U3_MPIC_OUTPUT_ENABLE); + UNLOCK(flags); + + return 0; +} + +static long __pmac g5_eth_phy_reset(struct device_node* node, long param, long value) +{ + struct macio_chip* macio = &macio_chips[0]; + struct device_node *phy; + int need_reset; + + /* + * We must not reset the combo PHYs, only the BCM5221 found in + * the iMac G5. + */ + phy = of_get_next_child(node, NULL); + if (!phy) + return -ENODEV; + need_reset = device_is_compatible(phy, "B5221"); + of_node_put(phy); + if (!need_reset) + return 0; + + /* PHY reset is GPIO 29, not in device-tree unfortunately */ + MACIO_OUT8(K2_GPIO_EXTINT_0 + 29, + KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA); + /* Thankfully, this is now always called at a time when we can + * schedule by sungem. + */ + msleep(10); + MACIO_OUT8(K2_GPIO_EXTINT_0 + 29, 0); + + return 0; +} + +#ifdef CONFIG_SMP +static long __pmac g5_reset_cpu(struct device_node* node, long param, long value) +{ + unsigned int reset_io = 0; + unsigned long flags; + struct macio_chip* macio; + struct device_node* np; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo2) + return -ENODEV; + + np = find_path_device("/cpus"); + if (np == NULL) + return -ENODEV; + for (np = np->child; np != NULL; np = np->sibling) { + u32* num = (u32 *)get_property(np, "reg", NULL); + u32* rst = (u32 *)get_property(np, "soft-reset", NULL); + if (num == NULL || rst == NULL) + continue; + if (param == *num) { + reset_io = *rst; + break; + } + } + if (np == NULL || reset_io == 0) + return -ENODEV; + + LOCK(flags); + MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(reset_io); + udelay(1); + MACIO_OUT8(reset_io, 0); + (void)MACIO_IN8(reset_io); + UNLOCK(flags); + + return 0; +} +#endif /* CONFIG_SMP */ + +/* + * This can be called from pmac_smp so isn't static + * + * This takes the second CPU off the bus on dual CPU machines + * running UP + */ +void __pmac g5_phy_disable_cpu1(void) +{ + UN_OUT(U3_API_PHY_CONFIG_1, 0); +} + +static long __pmac generic_get_mb_info(struct device_node* node, long param, long value) +{ + switch(param) { + case PMAC_MB_INFO_MODEL: + return pmac_mb.model_id; + case PMAC_MB_INFO_FLAGS: + return pmac_mb.board_flags; + case PMAC_MB_INFO_NAME: + /* hack hack hack... but should work */ + *((const char **)value) = pmac_mb.model_name; + return 0; + } + return -EINVAL; +} + + +/* + * Table definitions + */ + +/* Used on any machine + */ +static struct feature_table_entry any_features[] __pmacdata = { + { PMAC_FTR_GET_MB_INFO, generic_get_mb_info }, + { 0, NULL } +}; + +/* G5 features + */ +static struct feature_table_entry g5_features[] __pmacdata = { + { PMAC_FTR_GMAC_ENABLE, g5_gmac_enable }, + { PMAC_FTR_1394_ENABLE, g5_fw_enable }, + { PMAC_FTR_ENABLE_MPIC, g5_mpic_enable }, + { PMAC_FTR_READ_GPIO, g5_read_gpio }, + { PMAC_FTR_WRITE_GPIO, g5_write_gpio }, + { PMAC_FTR_GMAC_PHY_RESET, g5_eth_phy_reset }, +#ifdef CONFIG_SMP + { PMAC_FTR_RESET_CPU, g5_reset_cpu }, +#endif /* CONFIG_SMP */ + { 0, NULL } +}; + +static struct pmac_mb_def pmac_mb_defs[] __pmacdata = { + { "PowerMac7,2", "PowerMac G5", + PMAC_TYPE_POWERMAC_G5, g5_features, + 0, + }, + { "PowerMac7,3", "PowerMac G5", + PMAC_TYPE_POWERMAC_G5, g5_features, + 0, + }, + { "PowerMac8,1", "iMac G5", + PMAC_TYPE_IMAC_G5, g5_features, + 0, + }, + { "PowerMac9,1", "PowerMac G5", + PMAC_TYPE_POWERMAC_G5_U3L, g5_features, + 0, + }, + { "RackMac3,1", "XServe G5", + PMAC_TYPE_XSERVE_G5, g5_features, + 0, + }, +}; + +/* + * The toplevel feature_call callback + */ +long __pmac pmac_do_feature_call(unsigned int selector, ...) +{ + struct device_node* node; + long param, value; + int i; + feature_call func = NULL; + va_list args; + + if (pmac_mb.features) + for (i=0; pmac_mb.features[i].function; i++) + if (pmac_mb.features[i].selector == selector) { + func = pmac_mb.features[i].function; + break; + } + if (!func) + for (i=0; any_features[i].function; i++) + if (any_features[i].selector == selector) { + func = any_features[i].function; + break; + } + if (!func) + return -ENODEV; + + va_start(args, selector); + node = (struct device_node*)va_arg(args, void*); + param = va_arg(args, long); + value = va_arg(args, long); + va_end(args); + + return func(node, param, value); +} + +static int __init probe_motherboard(void) +{ + int i; + struct macio_chip* macio = &macio_chips[0]; + const char* model = NULL; + struct device_node *dt; + + /* Lookup known motherboard type in device-tree. First try an + * exact match on the "model" property, then try a "compatible" + * match is none is found. + */ + dt = find_devices("device-tree"); + if (dt != NULL) + model = (const char *) get_property(dt, "model", NULL); + for(i=0; model && i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) { + if (strcmp(model, pmac_mb_defs[i].model_string) == 0) { + pmac_mb = pmac_mb_defs[i]; + goto found; + } + } + for(i=0; i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) { + if (machine_is_compatible(pmac_mb_defs[i].model_string)) { + pmac_mb = pmac_mb_defs[i]; + goto found; + } + } + + /* Fallback to selection depending on mac-io chip type */ + switch(macio->type) { + case macio_keylargo2: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2; + pmac_mb.model_name = "Unknown K2-based"; + pmac_mb.features = g5_features; + + default: + return -ENODEV; + } +found: + /* Check for "mobile" machine */ + if (model && (strncmp(model, "PowerBook", 9) == 0 + || strncmp(model, "iBook", 5) == 0)) + pmac_mb.board_flags |= PMAC_MB_MOBILE; + + + printk(KERN_INFO "PowerMac motherboard: %s\n", pmac_mb.model_name); + return 0; +} + +/* Initialize the Core99 UniNorth host bridge and memory controller + */ +static void __init probe_uninorth(void) +{ + uninorth_node = of_find_node_by_name(NULL, "u3"); + if (uninorth_node && uninorth_node->n_addrs > 0) { + /* Small hack until I figure out if parsing in prom.c is correct. I should + * get rid of those pre-parsed junk anyway + */ + unsigned long address = uninorth_node->addrs[0].address; + uninorth_base = ioremap(address, 0x40000); + uninorth_rev = in_be32(UN_REG(UNI_N_VERSION)); + u3_ht = ioremap(address + U3_HT_CONFIG_BASE, 0x1000); + } else + uninorth_node = NULL; + + if (!uninorth_node) + return; + + printk(KERN_INFO "Found U3 memory controller & host bridge, revision: %d\n", + uninorth_rev); + printk(KERN_INFO "Mapped at 0x%08lx\n", (unsigned long)uninorth_base); + +} + +static void __init probe_one_macio(const char* name, const char* compat, int type) +{ + struct device_node* node; + int i; + volatile u32* base; + u32* revp; + + node = find_devices(name); + if (!node || !node->n_addrs) + return; + if (compat) + do { + if (device_is_compatible(node, compat)) + break; + node = node->next; + } while (node); + if (!node) + return; + for(i=0; i= MAX_MACIO_CHIPS) { + printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n"); + printk(KERN_ERR "pmac_feature: %s skipped\n", node->full_name); + return; + } + base = (volatile u32*)ioremap(node->addrs[0].address, node->addrs[0].size); + if (!base) { + printk(KERN_ERR "pmac_feature: Can't map mac-io chip !\n"); + return; + } + if (type == macio_keylargo) { + u32* did = (u32 *)get_property(node, "device-id", NULL); + if (*did == 0x00000025) + type = macio_pangea; + if (*did == 0x0000003e) + type = macio_intrepid; + } + macio_chips[i].of_node = node; + macio_chips[i].type = type; + macio_chips[i].base = base; + macio_chips[i].flags = MACIO_FLAG_SCCB_ON | MACIO_FLAG_SCCB_ON; + macio_chips[i].name = macio_names[type]; + revp = (u32 *)get_property(node, "revision-id", NULL); + if (revp) + macio_chips[i].rev = *revp; + printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n", + macio_names[type], macio_chips[i].rev, macio_chips[i].base); +} + +static int __init +probe_macios(void) +{ + probe_one_macio("mac-io", "K2-Keylargo", macio_keylargo2); + + macio_chips[0].lbus.index = 0; + macio_chips[1].lbus.index = 1; + + return (macio_chips[0].of_node == NULL) ? -ENODEV : 0; +} + +static void __init +set_initial_features(void) +{ + struct device_node *np; + + if (macio_chips[0].type == macio_keylargo2) { +#ifndef CONFIG_SMP + /* On SMP machines running UP, we have the second CPU eating + * bus cycles. We need to take it off the bus. This is done + * from pmac_smp for SMP kernels running on one CPU + */ + np = of_find_node_by_type(NULL, "cpu"); + if (np != NULL) + np = of_find_node_by_type(np, "cpu"); + if (np != NULL) { + g5_phy_disable_cpu1(); + of_node_put(np); + } +#endif /* CONFIG_SMP */ + /* Enable GMAC for now for PCI probing. It will be disabled + * later on after PCI probe + */ + np = of_find_node_by_name(NULL, "ethernet"); + while(np) { + if (device_is_compatible(np, "K2-GMAC")) + g5_gmac_enable(np, 0, 1); + np = of_find_node_by_name(np, "ethernet"); + } + + /* Enable FW before PCI probe. Will be disabled later on + * Note: We should have a batter way to check that we are + * dealing with uninorth internal cell and not a PCI cell + * on the external PCI. The code below works though. + */ + np = of_find_node_by_name(NULL, "firewire"); + while(np) { + if (device_is_compatible(np, "pci106b,5811")) { + macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; + g5_fw_enable(np, 0, 1); + } + np = of_find_node_by_name(np, "firewire"); + } + } +} + +void __init +pmac_feature_init(void) +{ + /* Detect the UniNorth memory controller */ + probe_uninorth(); + + /* Probe mac-io controllers */ + if (probe_macios()) { + printk(KERN_WARNING "No mac-io chip found\n"); + return; + } + + /* Setup low-level i2c stuffs */ + pmac_init_low_i2c(); + + /* Probe machine type */ + if (probe_motherboard()) + printk(KERN_WARNING "Unknown PowerMac !\n"); + + /* Set some initial features (turn off some chips that will + * be later turned on) + */ + set_initial_features(); +} + +int __init pmac_feature_late_init(void) +{ +#if 0 + struct device_node* np; + + /* Request some resources late */ + if (uninorth_node) + request_OF_resource(uninorth_node, 0, NULL); + np = find_devices("hammerhead"); + if (np) + request_OF_resource(np, 0, NULL); + np = find_devices("interrupt-controller"); + if (np) + request_OF_resource(np, 0, NULL); +#endif + return 0; +} + +device_initcall(pmac_feature_late_init); + +#if 0 +static void dump_HT_speeds(char *name, u32 cfg, u32 frq) +{ + int freqs[16] = { 200,300,400,500,600,800,1000,0,0,0,0,0,0,0,0,0 }; + int bits[8] = { 8,16,0,32,2,4,0,0 }; + int freq = (frq >> 8) & 0xf; + + if (freqs[freq] == 0) + printk("%s: Unknown HT link frequency %x\n", name, freq); + else + printk("%s: %d MHz on main link, (%d in / %d out) bits width\n", + name, freqs[freq], + bits[(cfg >> 28) & 0x7], bits[(cfg >> 24) & 0x7]); +} +#endif + +void __init pmac_check_ht_link(void) +{ +#if 0 /* Disabled for now */ + u32 ufreq, freq, ucfg, cfg; + struct device_node *pcix_node; + u8 px_bus, px_devfn; + struct pci_controller *px_hose; + + (void)in_be32(u3_ht + U3_HT_LINK_COMMAND); + ucfg = cfg = in_be32(u3_ht + U3_HT_LINK_CONFIG); + ufreq = freq = in_be32(u3_ht + U3_HT_LINK_FREQ); + dump_HT_speeds("U3 HyperTransport", cfg, freq); + + pcix_node = of_find_compatible_node(NULL, "pci", "pci-x"); + if (pcix_node == NULL) { + printk("No PCI-X bridge found\n"); + return; + } + px_hose = pcix_node->phb; + px_bus = pcix_node->busno; + px_devfn = pcix_node->devfn; + + early_read_config_dword(px_hose, px_bus, px_devfn, 0xc4, &cfg); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xcc, &freq); + dump_HT_speeds("PCI-X HT Uplink", cfg, freq); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xc8, &cfg); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xd0, &freq); + dump_HT_speeds("PCI-X HT Downlink", cfg, freq); +#endif +} diff --git a/arch/ppc64/kernel/pmac_low_i2c.c b/arch/ppc64/kernel/pmac_low_i2c.c new file mode 100644 index 00000000000..f3f39e8e337 --- /dev/null +++ b/arch/ppc64/kernel/pmac_low_i2c.c @@ -0,0 +1,523 @@ +/* + * arch/ppc/platforms/pmac_low_i2c.c + * + * Copyright (C) 2003 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This file contains some low-level i2c access routines that + * need to be used by various bits of the PowerMac platform code + * at times where the real asynchronous & interrupt driven driver + * cannot be used. The API borrows some semantics from the darwin + * driver in order to ease the implementation of the platform + * properties parser + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_LOW_I2C_HOST 4 + +#ifdef DEBUG +#define DBG(x...) do {\ + printk(KERN_DEBUG "KW:" x); \ + } while(0) +#else +#define DBG(x...) +#endif + +struct low_i2c_host; + +typedef int (*low_i2c_func_t)(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len); + +struct low_i2c_host +{ + struct device_node *np; /* OF device node */ + struct semaphore mutex; /* Access mutex for use by i2c-keywest */ + low_i2c_func_t func; /* Access function */ + unsigned int is_open : 1; /* Poor man's access control */ + int mode; /* Current mode */ + int channel; /* Current channel */ + int num_channels; /* Number of channels */ + void __iomem *base; /* For keywest-i2c, base address */ + int bsteps; /* And register stepping */ + int speed; /* And speed */ +}; + +static struct low_i2c_host low_i2c_hosts[MAX_LOW_I2C_HOST]; + +/* No locking is necessary on allocation, we are running way before + * anything can race with us + */ +static struct low_i2c_host *find_low_i2c_host(struct device_node *np) +{ + int i; + + for (i = 0; i < MAX_LOW_I2C_HOST; i++) + if (low_i2c_hosts[i].np == np) + return &low_i2c_hosts[i]; + return NULL; +} + +/* + * + * i2c-keywest implementation (UniNorth, U2, U3, Keylargo's) + * + */ + +/* + * Keywest i2c definitions borrowed from drivers/i2c/i2c-keywest.h, + * should be moved somewhere in include/asm-ppc/ + */ +/* Register indices */ +typedef enum { + reg_mode = 0, + reg_control, + reg_status, + reg_isr, + reg_ier, + reg_addr, + reg_subaddr, + reg_data +} reg_t; + + +/* Mode register */ +#define KW_I2C_MODE_100KHZ 0x00 +#define KW_I2C_MODE_50KHZ 0x01 +#define KW_I2C_MODE_25KHZ 0x02 +#define KW_I2C_MODE_DUMB 0x00 +#define KW_I2C_MODE_STANDARD 0x04 +#define KW_I2C_MODE_STANDARDSUB 0x08 +#define KW_I2C_MODE_COMBINED 0x0C +#define KW_I2C_MODE_MODE_MASK 0x0C +#define KW_I2C_MODE_CHAN_MASK 0xF0 + +/* Control register */ +#define KW_I2C_CTL_AAK 0x01 +#define KW_I2C_CTL_XADDR 0x02 +#define KW_I2C_CTL_STOP 0x04 +#define KW_I2C_CTL_START 0x08 + +/* Status register */ +#define KW_I2C_STAT_BUSY 0x01 +#define KW_I2C_STAT_LAST_AAK 0x02 +#define KW_I2C_STAT_LAST_RW 0x04 +#define KW_I2C_STAT_SDA 0x08 +#define KW_I2C_STAT_SCL 0x10 + +/* IER & ISR registers */ +#define KW_I2C_IRQ_DATA 0x01 +#define KW_I2C_IRQ_ADDR 0x02 +#define KW_I2C_IRQ_STOP 0x04 +#define KW_I2C_IRQ_START 0x08 +#define KW_I2C_IRQ_MASK 0x0F + +/* State machine states */ +enum { + state_idle, + state_addr, + state_read, + state_write, + state_stop, + state_dead +}; + +#define WRONG_STATE(name) do {\ + printk(KERN_DEBUG "KW: wrong state. Got %s, state: %s (isr: %02x)\n", \ + name, __kw_state_names[state], isr); \ + } while(0) + +static const char *__kw_state_names[] = { + "state_idle", + "state_addr", + "state_read", + "state_write", + "state_stop", + "state_dead" +}; + +static inline u8 __kw_read_reg(struct low_i2c_host *host, reg_t reg) +{ + return readb(host->base + (((unsigned int)reg) << host->bsteps)); +} + +static inline void __kw_write_reg(struct low_i2c_host *host, reg_t reg, u8 val) +{ + writeb(val, host->base + (((unsigned)reg) << host->bsteps)); + (void)__kw_read_reg(host, reg_subaddr); +} + +#define kw_write_reg(reg, val) __kw_write_reg(host, reg, val) +#define kw_read_reg(reg) __kw_read_reg(host, reg) + + +/* Don't schedule, the g5 fan controller is too + * timing sensitive + */ +static u8 kw_wait_interrupt(struct low_i2c_host* host) +{ + int i, j; + u8 isr; + + for (i = 0; i < 100000; i++) { + isr = kw_read_reg(reg_isr) & KW_I2C_IRQ_MASK; + if (isr != 0) + return isr; + + /* This code is used with the timebase frozen, we cannot rely + * on udelay ! For now, just use a bogus loop + */ + for (j = 1; j < 10000; j++) + mb(); + } + return isr; +} + +static int kw_handle_interrupt(struct low_i2c_host *host, int state, int rw, int *rc, u8 **data, int *len, u8 isr) +{ + u8 ack; + + DBG("kw_handle_interrupt(%s, isr: %x)\n", __kw_state_names[state], isr); + + if (isr == 0) { + if (state != state_stop) { + DBG("KW: Timeout !\n"); + *rc = -EIO; + goto stop; + } + if (state == state_stop) { + ack = kw_read_reg(reg_status); + if (!(ack & KW_I2C_STAT_BUSY)) { + state = state_idle; + kw_write_reg(reg_ier, 0x00); + } + } + return state; + } + + if (isr & KW_I2C_IRQ_ADDR) { + ack = kw_read_reg(reg_status); + if (state != state_addr) { + kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR); + WRONG_STATE("KW_I2C_IRQ_ADDR"); + *rc = -EIO; + goto stop; + } + if ((ack & KW_I2C_STAT_LAST_AAK) == 0) { + *rc = -ENODEV; + DBG("KW: NAK on address\n"); + return state_stop; + } else { + if (rw) { + state = state_read; + if (*len > 1) + kw_write_reg(reg_control, KW_I2C_CTL_AAK); + } else { + state = state_write; + kw_write_reg(reg_data, **data); + (*data)++; (*len)--; + } + } + kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR); + } + + if (isr & KW_I2C_IRQ_DATA) { + if (state == state_read) { + **data = kw_read_reg(reg_data); + (*data)++; (*len)--; + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + if ((*len) == 0) + state = state_stop; + else if ((*len) == 1) + kw_write_reg(reg_control, 0); + } else if (state == state_write) { + ack = kw_read_reg(reg_status); + if ((ack & KW_I2C_STAT_LAST_AAK) == 0) { + DBG("KW: nack on data write\n"); + *rc = -EIO; + goto stop; + } else if (*len) { + kw_write_reg(reg_data, **data); + (*data)++; (*len)--; + } else { + kw_write_reg(reg_control, KW_I2C_CTL_STOP); + state = state_stop; + *rc = 0; + } + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + } else { + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + WRONG_STATE("KW_I2C_IRQ_DATA"); + if (state != state_stop) { + *rc = -EIO; + goto stop; + } + } + } + + if (isr & KW_I2C_IRQ_STOP) { + kw_write_reg(reg_isr, KW_I2C_IRQ_STOP); + if (state != state_stop) { + WRONG_STATE("KW_I2C_IRQ_STOP"); + *rc = -EIO; + } + return state_idle; + } + + if (isr & KW_I2C_IRQ_START) + kw_write_reg(reg_isr, KW_I2C_IRQ_START); + + return state; + + stop: + kw_write_reg(reg_control, KW_I2C_CTL_STOP); + return state_stop; +} + +static int keywest_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 subaddr, u8 *data, int len) +{ + u8 mode_reg = host->speed; + int state = state_addr; + int rc = 0; + + /* Setup mode & subaddress if any */ + switch(host->mode) { + case pmac_low_i2c_mode_dumb: + printk(KERN_ERR "low_i2c: Dumb mode not supported !\n"); + return -EINVAL; + case pmac_low_i2c_mode_std: + mode_reg |= KW_I2C_MODE_STANDARD; + break; + case pmac_low_i2c_mode_stdsub: + mode_reg |= KW_I2C_MODE_STANDARDSUB; + break; + case pmac_low_i2c_mode_combined: + mode_reg |= KW_I2C_MODE_COMBINED; + break; + } + + /* Setup channel & clear pending irqs */ + kw_write_reg(reg_isr, kw_read_reg(reg_isr)); + kw_write_reg(reg_mode, mode_reg | (host->channel << 4)); + kw_write_reg(reg_status, 0); + + /* Set up address and r/w bit */ + kw_write_reg(reg_addr, addr); + + /* Set up the sub address */ + if ((mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_STANDARDSUB + || (mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_COMBINED) + kw_write_reg(reg_subaddr, subaddr); + + /* Start sending address & disable interrupt*/ + kw_write_reg(reg_ier, 0 /*KW_I2C_IRQ_MASK*/); + kw_write_reg(reg_control, KW_I2C_CTL_XADDR); + + /* State machine, to turn into an interrupt handler */ + while(state != state_idle) { + u8 isr = kw_wait_interrupt(host); + state = kw_handle_interrupt(host, state, addr & 1, &rc, &data, &len, isr); + } + + return rc; +} + +static void keywest_low_i2c_add(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(NULL); + u32 *psteps, *prate, steps, aoffset = 0; + struct device_node *parent; + + if (host == NULL) { + printk(KERN_ERR "low_i2c: Can't allocate host for %s\n", + np->full_name); + return; + } + memset(host, 0, sizeof(*host)); + + init_MUTEX(&host->mutex); + host->np = of_node_get(np); + psteps = (u32 *)get_property(np, "AAPL,address-step", NULL); + steps = psteps ? (*psteps) : 0x10; + for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++) + steps >>= 1; + parent = of_get_parent(np); + host->num_channels = 1; + if (parent && parent->name[0] == 'u') { + host->num_channels = 2; + aoffset = 3; + } + /* Select interface rate */ + host->speed = KW_I2C_MODE_100KHZ; + prate = (u32 *)get_property(np, "AAPL,i2c-rate", NULL); + if (prate) switch(*prate) { + case 100: + host->speed = KW_I2C_MODE_100KHZ; + break; + case 50: + host->speed = KW_I2C_MODE_50KHZ; + break; + case 25: + host->speed = KW_I2C_MODE_25KHZ; + break; + } + + host->mode = pmac_low_i2c_mode_std; + host->base = ioremap(np->addrs[0].address + aoffset, + np->addrs[0].size); + host->func = keywest_low_i2c_func; +} + +/* + * + * PMU implementation + * + */ + + +#ifdef CONFIG_ADB_PMU + +static int pmu_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len) +{ + // TODO + return -ENODEV; +} + +static void pmu_low_i2c_add(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(NULL); + + if (host == NULL) { + printk(KERN_ERR "low_i2c: Can't allocate host for %s\n", + np->full_name); + return; + } + memset(host, 0, sizeof(*host)); + + init_MUTEX(&host->mutex); + host->np = of_node_get(np); + host->num_channels = 3; + host->mode = pmac_low_i2c_mode_std; + host->func = pmu_low_i2c_func; +} + +#endif /* CONFIG_ADB_PMU */ + +void __init pmac_init_low_i2c(void) +{ + struct device_node *np; + + /* Probe keywest-i2c busses */ + np = of_find_compatible_node(NULL, "i2c", "keywest-i2c"); + while(np) { + keywest_low_i2c_add(np); + np = of_find_compatible_node(np, "i2c", "keywest-i2c"); + } + +#ifdef CONFIG_ADB_PMU + /* Probe PMU busses */ + np = of_find_node_by_name(NULL, "via-pmu"); + if (np) + pmu_low_i2c_add(np); +#endif /* CONFIG_ADB_PMU */ + + /* TODO: Add CUDA support as well */ +} + +int pmac_low_i2c_lock(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + down(&host->mutex); + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_lock); + +int pmac_low_i2c_unlock(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + up(&host->mutex); + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_unlock); + + +int pmac_low_i2c_open(struct device_node *np, int channel) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + + if (channel >= host->num_channels) + return -EINVAL; + + down(&host->mutex); + host->is_open = 1; + host->channel = channel; + + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_open); + +int pmac_low_i2c_close(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + + host->is_open = 0; + up(&host->mutex); + + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_close); + +int pmac_low_i2c_setmode(struct device_node *np, int mode) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + WARN_ON(!host->is_open); + host->mode = mode; + + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_setmode); + +int pmac_low_i2c_xfer(struct device_node *np, u8 addrdir, u8 subaddr, u8 *data, int len) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + WARN_ON(!host->is_open); + + return host->func(host, addrdir, subaddr, data, len); +} +EXPORT_SYMBOL(pmac_low_i2c_xfer); + diff --git a/arch/ppc64/kernel/pmac_nvram.c b/arch/ppc64/kernel/pmac_nvram.c new file mode 100644 index 00000000000..e32a902236e --- /dev/null +++ b/arch/ppc64/kernel/pmac_nvram.c @@ -0,0 +1,495 @@ +/* + * arch/ppc/platforms/pmac_nvram.c + * + * Copyright (C) 2002 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Todo: - add support for the OF persistent properties + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +#define NVRAM_SIZE 0x2000 /* 8kB of non-volatile RAM */ + +#define CORE99_SIGNATURE 0x5a +#define CORE99_ADLER_START 0x14 + +/* On Core99, nvram is either a sharp, a micron or an AMD flash */ +#define SM_FLASH_STATUS_DONE 0x80 +#define SM_FLASH_STATUS_ERR 0x38 + +#define SM_FLASH_CMD_ERASE_CONFIRM 0xd0 +#define SM_FLASH_CMD_ERASE_SETUP 0x20 +#define SM_FLASH_CMD_RESET 0xff +#define SM_FLASH_CMD_WRITE_SETUP 0x40 +#define SM_FLASH_CMD_CLEAR_STATUS 0x50 +#define SM_FLASH_CMD_READ_STATUS 0x70 + +/* CHRP NVRAM header */ +struct chrp_header { + u8 signature; + u8 cksum; + u16 len; + char name[12]; + u8 data[0]; +}; + +struct core99_header { + struct chrp_header hdr; + u32 adler; + u32 generation; + u32 reserved[2]; +}; + +/* + * Read and write the non-volatile RAM on PowerMacs and CHRP machines. + */ +static volatile unsigned char *nvram_data; +static int core99_bank = 0; +// XXX Turn that into a sem +static DEFINE_SPINLOCK(nv_lock); + +extern int system_running; + +static int (*core99_write_bank)(int bank, u8* datas); +static int (*core99_erase_bank)(int bank); + +static char *nvram_image __pmacdata; + + +static ssize_t __pmac core99_nvram_read(char *buf, size_t count, loff_t *index) +{ + int i; + + if (nvram_image == NULL) + return -ENODEV; + if (*index > NVRAM_SIZE) + return 0; + + i = *index; + if (i + count > NVRAM_SIZE) + count = NVRAM_SIZE - i; + + memcpy(buf, &nvram_image[i], count); + *index = i + count; + return count; +} + +static ssize_t __pmac core99_nvram_write(char *buf, size_t count, loff_t *index) +{ + int i; + + if (nvram_image == NULL) + return -ENODEV; + if (*index > NVRAM_SIZE) + return 0; + + i = *index; + if (i + count > NVRAM_SIZE) + count = NVRAM_SIZE - i; + + memcpy(&nvram_image[i], buf, count); + *index = i + count; + return count; +} + +static ssize_t __pmac core99_nvram_size(void) +{ + if (nvram_image == NULL) + return -ENODEV; + return NVRAM_SIZE; +} + +static u8 __pmac chrp_checksum(struct chrp_header* hdr) +{ + u8 *ptr; + u16 sum = hdr->signature; + for (ptr = (u8 *)&hdr->len; ptr < hdr->data; ptr++) + sum += *ptr; + while (sum > 0xFF) + sum = (sum & 0xFF) + (sum>>8); + return sum; +} + +static u32 __pmac core99_calc_adler(u8 *buffer) +{ + int cnt; + u32 low, high; + + buffer += CORE99_ADLER_START; + low = 1; + high = 0; + for (cnt=0; cnt<(NVRAM_SIZE-CORE99_ADLER_START); cnt++) { + if ((cnt % 5000) == 0) { + high %= 65521UL; + high %= 65521UL; + } + low += buffer[cnt]; + high += low; + } + low %= 65521UL; + high %= 65521UL; + + return (high << 16) | low; +} + +static u32 __pmac core99_check(u8* datas) +{ + struct core99_header* hdr99 = (struct core99_header*)datas; + + if (hdr99->hdr.signature != CORE99_SIGNATURE) { + DBG("Invalid signature\n"); + return 0; + } + if (hdr99->hdr.cksum != chrp_checksum(&hdr99->hdr)) { + DBG("Invalid checksum\n"); + return 0; + } + if (hdr99->adler != core99_calc_adler(datas)) { + DBG("Invalid adler\n"); + return 0; + } + return hdr99->generation; +} + +static int __pmac sm_erase_bank(int bank) +{ + int stat, i; + unsigned long timeout; + + u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: Sharp/Micron Erasing bank %d...\n", bank); + + out_8(base, SM_FLASH_CMD_ERASE_SETUP); + out_8(base, SM_FLASH_CMD_ERASE_CONFIRM); + timeout = 0; + do { + if (++timeout > 1000000) { + printk(KERN_ERR "nvram: Sharp/Miron flash erase timeout !\n"); + break; + } + out_8(base, SM_FLASH_CMD_READ_STATUS); + stat = in_8(base); + } while (!(stat & SM_FLASH_STATUS_DONE)); + + out_8(base, SM_FLASH_CMD_CLEAR_STATUS); + out_8(base, SM_FLASH_CMD_RESET); + + for (i=0; i 1000000) { + printk(KERN_ERR "nvram: Sharp/Micron flash write timeout !\n"); + break; + } + out_8(base, SM_FLASH_CMD_READ_STATUS); + stat = in_8(base); + } while (!(stat & SM_FLASH_STATUS_DONE)); + if (!(stat & SM_FLASH_STATUS_DONE)) + break; + } + out_8(base, SM_FLASH_CMD_CLEAR_STATUS); + out_8(base, SM_FLASH_CMD_RESET); + for (i=0; i 1000000) { + printk(KERN_ERR "nvram: AMD flash erase timeout !\n"); + break; + } + stat = in_8(base) ^ in_8(base); + } while (stat != 0); + + /* Reset */ + out_8(base, 0xf0); + udelay(1); + + for (i=0; i 1000000) { + printk(KERN_ERR "nvram: AMD flash write timeout !\n"); + break; + } + stat = in_8(base) ^ in_8(base); + } while (stat != 0); + if (stat != 0) + break; + } + + /* Reset */ + out_8(base, 0xf0); + udelay(1); + + for (i=0; igeneration++; + hdr99->hdr.signature = CORE99_SIGNATURE; + hdr99->hdr.cksum = chrp_checksum(&hdr99->hdr); + hdr99->adler = core99_calc_adler(nvram_image); + core99_bank = core99_bank ? 0 : 1; + if (core99_erase_bank) + if (core99_erase_bank(core99_bank)) { + printk("nvram: Error erasing bank %d\n", core99_bank); + goto bail; + } + if (core99_write_bank) + if (core99_write_bank(core99_bank, nvram_image)) + printk("nvram: Error writing bank %d\n", core99_bank); + bail: + spin_unlock_irqrestore(&nv_lock, flags); + + return 0; +} + +int __init pmac_nvram_init(void) +{ + struct device_node *dp; + u32 gen_bank0, gen_bank1; + int i; + + dp = find_devices("nvram"); + if (dp == NULL) { + printk(KERN_ERR "Can't find NVRAM device\n"); + return -ENODEV; + } + if (!device_is_compatible(dp, "nvram,flash")) { + printk(KERN_ERR "Incompatible type of NVRAM\n"); + return -ENXIO; + } + + nvram_image = alloc_bootmem(NVRAM_SIZE); + if (nvram_image == NULL) { + printk(KERN_ERR "nvram: can't allocate ram image\n"); + return -ENOMEM; + } + nvram_data = ioremap(dp->addrs[0].address, NVRAM_SIZE*2); + + DBG("nvram: Checking bank 0...\n"); + + gen_bank0 = core99_check((u8 *)nvram_data); + gen_bank1 = core99_check((u8 *)nvram_data + NVRAM_SIZE); + core99_bank = (gen_bank0 < gen_bank1) ? 1 : 0; + + DBG("nvram: gen0=%d, gen1=%d\n", gen_bank0, gen_bank1); + DBG("nvram: Active bank is: %d\n", core99_bank); + + for (i=0; iindex; +} + +u8 __pmac pmac_xpram_read(int xpaddr) +{ + int offset = pmac_get_partition(pmac_nvram_XPRAM); + loff_t index; + u8 buf; + ssize_t count; + + if (offset < 0 || xpaddr < 0 || xpaddr > 0x100) + return 0xff; + index = offset + xpaddr; + + count = ppc_md.nvram_read(&buf, 1, &index); + if (count != 1) + return 0xff; + return buf; +} + +void __pmac pmac_xpram_write(int xpaddr, u8 data) +{ + int offset = pmac_get_partition(pmac_nvram_XPRAM); + loff_t index; + u8 buf; + + if (offset < 0 || xpaddr < 0 || xpaddr > 0x100) + return; + index = offset + xpaddr; + buf = data; + + ppc_md.nvram_write(&buf, 1, &index); +} + +EXPORT_SYMBOL(pmac_get_partition); +EXPORT_SYMBOL(pmac_xpram_read); +EXPORT_SYMBOL(pmac_xpram_write); diff --git a/arch/ppc64/kernel/pmac_pci.c b/arch/ppc64/kernel/pmac_pci.c new file mode 100644 index 00000000000..71fe911ad18 --- /dev/null +++ b/arch/ppc64/kernel/pmac_pci.c @@ -0,0 +1,793 @@ +/* + * Support for PCI bridges found on Power Macintoshes. + * At present the "bandit" and "chaos" bridges are supported. + * Fortunately you access configuration space in the same + * way with either bridge. + * + * Copyright (C) 2003 Benjamin Herrenschmuidt (benh@kernel.crashing.org) + * Copyright (C) 1997 Paul Mackerras (paulus@samba.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "pci.h" +#include "pmac.h" + +#define DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +/* XXX Could be per-controller, but I don't think we risk anything by + * assuming we won't have both UniNorth and Bandit */ +static int has_uninorth; +static struct pci_controller *u3_agp; +struct device_node *k2_skiplist[2]; + +static int __init fixup_one_level_bus_range(struct device_node *node, int higher) +{ + for (; node != 0;node = node->sibling) { + int * bus_range; + unsigned int *class_code; + int len; + + /* For PCI<->PCI bridges or CardBus bridges, we go down */ + class_code = (unsigned int *) get_property(node, "class-code", NULL); + if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && + (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) + continue; + bus_range = (int *) get_property(node, "bus-range", &len); + if (bus_range != NULL && len > 2 * sizeof(int)) { + if (bus_range[1] > higher) + higher = bus_range[1]; + } + higher = fixup_one_level_bus_range(node->child, higher); + } + return higher; +} + +/* This routine fixes the "bus-range" property of all bridges in the + * system since they tend to have their "last" member wrong on macs + * + * Note that the bus numbers manipulated here are OF bus numbers, they + * are not Linux bus numbers. + */ +static void __init fixup_bus_range(struct device_node *bridge) +{ + int * bus_range; + int len; + + /* Lookup the "bus-range" property for the hose */ + bus_range = (int *) get_property(bridge, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s\n", + bridge->full_name); + return; + } + bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]); +} + +/* + * Apple MacRISC (U3, UniNorth, Bandit, Chaos) PCI controllers. + * + * The "Bandit" version is present in all early PCI PowerMacs, + * and up to the first ones using Grackle. Some machines may + * have 2 bandit controllers (2 PCI busses). + * + * "Chaos" is used in some "Bandit"-type machines as a bridge + * for the separate display bus. It is accessed the same + * way as bandit, but cannot be probed for devices. It therefore + * has its own config access functions. + * + * The "UniNorth" version is present in all Core99 machines + * (iBook, G4, new IMacs, and all the recent Apple machines). + * It contains 3 controllers in one ASIC. + * + * The U3 is the bridge used on G5 machines. It contains on + * AGP bus which is dealt with the old UniNorth access routines + * and an HyperTransport bus which uses its own set of access + * functions. + */ + +#define MACRISC_CFA0(devfn, off) \ + ((1 << (unsigned long)PCI_SLOT(dev_fn)) \ + | (((unsigned long)PCI_FUNC(dev_fn)) << 8) \ + | (((unsigned long)(off)) & 0xFCUL)) + +#define MACRISC_CFA1(bus, devfn, off) \ + ((((unsigned long)(bus)) << 16) \ + |(((unsigned long)(devfn)) << 8) \ + |(((unsigned long)(off)) & 0xFCUL) \ + |1UL) + +static unsigned long __pmac macrisc_cfg_access(struct pci_controller* hose, + u8 bus, u8 dev_fn, u8 offset) +{ + unsigned int caddr; + + if (bus == hose->first_busno) { + if (dev_fn < (11 << 3)) + return 0; + caddr = MACRISC_CFA0(dev_fn, offset); + } else + caddr = MACRISC_CFA1(bus, dev_fn, offset); + + /* Uninorth will return garbage if we don't read back the value ! */ + do { + out_le32(hose->cfg_addr, caddr); + } while (in_le32(hose->cfg_addr) != caddr); + + offset &= has_uninorth ? 0x07 : 0x03; + return ((unsigned long)hose->cfg_data) + offset; +} + +static int __pmac macrisc_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = macrisc_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8((u8 *)addr); + break; + case 2: + *val = in_le16((u16 *)addr); + break; + default: + *val = in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int __pmac macrisc_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = macrisc_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8((u8 *)addr, val); + (void) in_8((u8 *)addr); + break; + case 2: + out_le16((u16 *)addr, val); + (void) in_le16((u16 *)addr); + break; + default: + out_le32((u32 *)addr, val); + (void) in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops macrisc_pci_ops = +{ + macrisc_read_config, + macrisc_write_config +}; + +/* + * These versions of U3 HyperTransport config space access ops do not + * implement self-view of the HT host yet + */ + +/* + * This function deals with some "special cases" devices. + * + * 0 -> No special case + * 1 -> Skip the device but act as if the access was successfull + * (return 0xff's on reads, eventually, cache config space + * accesses in a later version) + * -1 -> Hide the device (unsuccessful acess) + */ +static int u3_ht_skip_device(struct pci_controller *hose, + struct pci_bus *bus, unsigned int devfn) +{ + struct device_node *busdn, *dn; + int i; + + /* We only allow config cycles to devices that are in OF device-tree + * as we are apparently having some weird things going on with some + * revs of K2 on recent G5s + */ + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = hose->arch_data; + for (dn = busdn->child; dn; dn = dn->sibling) + if (dn->devfn == devfn) + break; + if (dn == NULL) + return -1; + + /* + * When a device in K2 is powered down, we die on config + * cycle accesses. Fix that here. + */ + for (i=0; i<2; i++) + if (k2_skiplist[i] == dn) + return 1; + + return 0; +} + +#define U3_HT_CFA0(devfn, off) \ + ((((unsigned long)devfn) << 8) | offset) +#define U3_HT_CFA1(bus, devfn, off) \ + (U3_HT_CFA0(devfn, off) \ + + (((unsigned long)bus) << 16) \ + + 0x01000000UL) + +static unsigned long __pmac u3_ht_cfg_access(struct pci_controller* hose, + u8 bus, u8 devfn, u8 offset) +{ + if (bus == hose->first_busno) { + /* For now, we don't self probe U3 HT bridge */ + if (PCI_SLOT(devfn) == 0) + return 0; + return ((unsigned long)hose->cfg_data) + U3_HT_CFA0(devfn, offset); + } else + return ((unsigned long)hose->cfg_data) + U3_HT_CFA1(bus, devfn, offset); +} + +static int __pmac u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + unsigned long addr; + + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (u3_ht_skip_device(hose, bus, devfn)) { + case 0: + break; + case 1: + switch (len) { + case 1: + *val = 0xff; break; + case 2: + *val = 0xffff; break; + default: + *val = 0xfffffffful; break; + } + return PCIBIOS_SUCCESSFUL; + default: + return PCIBIOS_DEVICE_NOT_FOUND; + } + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8((u8 *)addr); + break; + case 2: + *val = in_le16((u16 *)addr); + break; + default: + *val = in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int __pmac u3_ht_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (u3_ht_skip_device(hose, bus, devfn)) { + case 0: + break; + case 1: + return PCIBIOS_SUCCESSFUL; + default: + return PCIBIOS_DEVICE_NOT_FOUND; + } + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8((u8 *)addr, val); + (void) in_8((u8 *)addr); + break; + case 2: + out_le16((u16 *)addr, val); + (void) in_le16((u16 *)addr); + break; + default: + out_le32((u32 *)addr, val); + (void) in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops u3_ht_pci_ops = +{ + u3_ht_read_config, + u3_ht_write_config +}; + +static void __init setup_u3_agp(struct pci_controller* hose) +{ + /* On G5, we move AGP up to high bus number so we don't need + * to reassign bus numbers for HT. If we ever have P2P bridges + * on AGP, we'll have to move pci_assign_all_busses to the + * pci_controller structure so we enable it for AGP and not for + * HT childs. + * We hard code the address because of the different size of + * the reg address cell, we shall fix that by killing struct + * reg_property and using some accessor functions instead + */ + hose->first_busno = 0xf0; + hose->last_busno = 0xff; + has_uninorth = 1; + hose->ops = ¯isc_pci_ops; + hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); + hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); + + u3_agp = hose; +} + +static void __init setup_u3_ht(struct pci_controller* hose) +{ + struct device_node *np = (struct device_node *)hose->arch_data; + int i, cur; + + hose->ops = &u3_ht_pci_ops; + + /* We hard code the address because of the different size of + * the reg address cell, we shall fix that by killing struct + * reg_property and using some accessor functions instead + */ + hose->cfg_data = (volatile unsigned char *)ioremap(0xf2000000, 0x02000000); + + /* + * /ht node doesn't expose a "ranges" property, so we "remove" regions that + * have been allocated to AGP. So far, this version of the code doesn't assign + * any of the 0xfxxxxxxx "fine" memory regions to /ht. + * We need to fix that sooner or later by either parsing all child "ranges" + * properties or figuring out the U3 address space decoding logic and + * then read it's configuration register (if any). + */ + hose->io_base_phys = 0xf4000000; + hose->io_base_virt = ioremap(hose->io_base_phys, 0x00400000); + isa_io_base = pci_io_base = (unsigned long) hose->io_base_virt; + hose->io_resource.name = np->full_name; + hose->io_resource.start = 0; + hose->io_resource.end = 0x003fffff; + hose->io_resource.flags = IORESOURCE_IO; + hose->pci_mem_offset = 0; + hose->first_busno = 0; + hose->last_busno = 0xef; + hose->mem_resources[0].name = np->full_name; + hose->mem_resources[0].start = 0x80000000; + hose->mem_resources[0].end = 0xefffffff; + hose->mem_resources[0].flags = IORESOURCE_MEM; + + if (u3_agp == NULL) { + DBG("U3 has no AGP, using full resource range\n"); + return; + } + + /* We "remove" the AGP resources from the resources allocated to HT, that + * is we create "holes". However, that code does assumptions that so far + * happen to be true (cross fingers...), typically that resources in the + * AGP node are properly ordered + */ + cur = 0; + for (i=0; i<3; i++) { + struct resource *res = &u3_agp->mem_resources[i]; + if (res->flags != IORESOURCE_MEM) + continue; + /* We don't care about "fine" resources */ + if (res->start >= 0xf0000000) + continue; + /* Check if it's just a matter of "shrinking" us in one direction */ + if (hose->mem_resources[cur].start == res->start) { + DBG("U3/HT: shrink start of %d, %08lx -> %08lx\n", + cur, hose->mem_resources[cur].start, res->end + 1); + hose->mem_resources[cur].start = res->end + 1; + continue; + } + if (hose->mem_resources[cur].end == res->end) { + DBG("U3/HT: shrink end of %d, %08lx -> %08lx\n", + cur, hose->mem_resources[cur].end, res->start - 1); + hose->mem_resources[cur].end = res->start - 1; + continue; + } + /* No, it's not the case, we need a hole */ + if (cur == 2) { + /* not enough resources for a hole, we drop part of the range */ + printk(KERN_WARNING "Running out of resources for /ht host !\n"); + hose->mem_resources[cur].end = res->start - 1; + continue; + } + cur++; + DBG("U3/HT: hole, %d end at %08lx, %d start at %08lx\n", + cur-1, res->start - 1, cur, res->end + 1); + hose->mem_resources[cur].name = np->full_name; + hose->mem_resources[cur].flags = IORESOURCE_MEM; + hose->mem_resources[cur].start = res->end + 1; + hose->mem_resources[cur].end = hose->mem_resources[cur-1].end; + hose->mem_resources[cur-1].end = res->start - 1; + } +} + +static void __init pmac_process_bridge_OF_ranges(struct pci_controller *hose, + struct device_node *dev, int primary) +{ + static unsigned int static_lc_ranges[2024]; + unsigned int *dt_ranges, *lc_ranges, *ranges, *prev; + unsigned int size; + int rlen = 0, orig_rlen; + int memno = 0; + struct resource *res; + int np, na = prom_n_addr_cells(dev); + + np = na + 5; + + /* First we try to merge ranges to fix a problem with some pmacs + * that can have more than 3 ranges, fortunately using contiguous + * addresses -- BenH + */ + dt_ranges = (unsigned int *) get_property(dev, "ranges", &rlen); + if (!dt_ranges) + return; + /* lc_ranges = alloc_bootmem(rlen);*/ + lc_ranges = static_lc_ranges; + if (!lc_ranges) + return; /* what can we do here ? */ + memcpy(lc_ranges, dt_ranges, rlen); + orig_rlen = rlen; + + /* Let's work on a copy of the "ranges" property instead of damaging + * the device-tree image in memory + */ + ranges = lc_ranges; + prev = NULL; + while ((rlen -= np * sizeof(unsigned int)) >= 0) { + if (prev) { + if (prev[0] == ranges[0] && prev[1] == ranges[1] && + (prev[2] + prev[na+4]) == ranges[2] && + (prev[na+2] + prev[na+4]) == ranges[na+2]) { + prev[na+4] += ranges[na+4]; + ranges[0] = 0; + ranges += np; + continue; + } + } + prev = ranges; + ranges += np; + } + + /* + * The ranges property is laid out as an array of elements, + * each of which comprises: + * cells 0 - 2: a PCI address + * cells 3 or 3+4: a CPU physical address + * (size depending on dev->n_addr_cells) + * cells 4+5 or 5+6: the size of the range + */ + ranges = lc_ranges; + rlen = orig_rlen; + while (ranges && (rlen -= np * sizeof(unsigned int)) >= 0) { + res = NULL; + size = ranges[na+4]; + switch (ranges[0] >> 24) { + case 1: /* I/O space */ + if (ranges[2] != 0) + break; + hose->io_base_phys = ranges[na+2]; + /* limit I/O space to 16MB */ + if (size > 0x01000000) + size = 0x01000000; + hose->io_base_virt = ioremap(ranges[na+2], size); + if (primary) + isa_io_base = (unsigned long) hose->io_base_virt; + res = &hose->io_resource; + res->flags = IORESOURCE_IO; + res->start = ranges[2]; + break; + case 2: /* memory space */ + memno = 0; + if (ranges[1] == 0 && ranges[2] == 0 + && ranges[na+4] <= (16 << 20)) { + /* 1st 16MB, i.e. ISA memory area */ +#if 0 + if (primary) + isa_mem_base = ranges[na+2]; +#endif + memno = 1; + } + while (memno < 3 && hose->mem_resources[memno].flags) + ++memno; + if (memno == 0) + hose->pci_mem_offset = ranges[na+2] - ranges[2]; + if (memno < 3) { + res = &hose->mem_resources[memno]; + res->flags = IORESOURCE_MEM; + res->start = ranges[na+2]; + } + break; + } + if (res != NULL) { + res->name = dev->full_name; + res->end = res->start + size - 1; + res->parent = NULL; + res->sibling = NULL; + res->child = NULL; + } + ranges += np; + } +} + +/* + * We assume that if we have a G3 powermac, we have one bridge called + * "pci" (a MPC106) and no bandit or chaos bridges, and contrariwise, + * if we have one or more bandit or chaos bridges, we don't have a MPC106. + */ +static int __init add_bridge(struct device_node *dev) +{ + int len; + struct pci_controller *hose; + char* disp_name; + int *bus_range; + int primary = 1; + struct property *of_prop; + + DBG("Adding PCI host bridge %s\n", dev->full_name); + + bus_range = (int *) get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n", + dev->full_name); + } + + hose = alloc_bootmem(sizeof(struct pci_controller)); + if (hose == NULL) + return -ENOMEM; + pci_setup_pci_controller(hose); + + hose->arch_data = dev; + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + + of_prop = alloc_bootmem(sizeof(struct property) + + sizeof(hose->global_number)); + if (of_prop) { + memset(of_prop, 0, sizeof(struct property)); + of_prop->name = "linux,pci-domain"; + of_prop->length = sizeof(hose->global_number); + of_prop->value = (unsigned char *)&of_prop[1]; + memcpy(of_prop->value, &hose->global_number, sizeof(hose->global_number)); + prom_add_property(dev, of_prop); + } + + disp_name = NULL; + if (device_is_compatible(dev, "u3-agp")) { + setup_u3_agp(hose); + disp_name = "U3-AGP"; + primary = 0; + } else if (device_is_compatible(dev, "u3-ht")) { + setup_u3_ht(hose); + disp_name = "U3-HT"; + primary = 1; + } + printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n", + disp_name, hose->first_busno, hose->last_busno); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pmac_process_bridge_OF_ranges(hose, dev, primary); + + /* Fixup "bus-range" OF property */ + fixup_bus_range(dev); + + return 0; +} + +/* + * We use our own read_irq_line here because PCI_INTERRUPT_PIN is + * crap on some of Apple ASICs. We unconditionally use the Open Firmware + * interrupt number as this is always right. + */ +static int pmac_pci_read_irq_line(struct pci_dev *pci_dev) +{ + struct device_node *node; + + node = pci_device_to_OF_node(pci_dev); + if (node == NULL) + return -1; + if (node->n_intrs == 0) + return -1; + pci_dev->irq = node->intrs[0].line; + pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq); + + return 0; +} + +void __init pmac_pcibios_fixup(void) +{ + struct pci_dev *dev = NULL; + + for_each_pci_dev(dev) + pmac_pci_read_irq_line(dev); +} + +static void __init pmac_fixup_phb_resources(void) +{ + struct pci_controller *hose, *tmp; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; + hose->io_resource.start += offset; + hose->io_resource.end += offset; + printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n", + hose->global_number, + hose->io_resource.start, hose->io_resource.end); + } +} + +void __init pmac_pci_init(void) +{ + struct device_node *np, *root; + struct device_node *ht = NULL; + + /* Probe root PCI hosts, that is on U3 the AGP host and the + * HyperTransport host. That one is actually "kept" around + * and actually added last as it's resource management relies + * on the AGP resources to have been setup first + */ + root = of_find_node_by_path("/"); + if (root == NULL) { + printk(KERN_CRIT "pmac_find_bridges: can't find root of device tree\n"); + return; + } + for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) { + if (np->name == NULL) + continue; + if (strcmp(np->name, "pci") == 0) { + if (add_bridge(np) == 0) + of_node_get(np); + } + if (strcmp(np->name, "ht") == 0) { + of_node_get(np); + ht = np; + } + } + of_node_put(root); + + /* Now setup the HyperTransport host if we found any + */ + if (ht && add_bridge(ht) != 0) + of_node_put(ht); + + /* Fixup the IO resources on our host bridges as the common code + * does it only for childs of the host bridges + */ + pmac_fixup_phb_resources(); + + /* Setup the linkage between OF nodes and PHBs */ + pci_devs_phb_init(); + + /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We + * assume there is no P2P bridge on the AGP bus, which should be a + * safe assumptions hopefully. + */ + if (u3_agp) { + struct device_node *np = u3_agp->arch_data; + np->busno = 0xf0; + for (np = np->child; np; np = np->sibling) + np->busno = 0xf0; + } + + pmac_check_ht_link(); + + /* Tell pci.c to not use the common resource allocation mecanism */ + pci_probe_only = 1; + + /* Allow all IO */ + io_page_mask = -1; +} + +/* + * Disable second function on K2-SATA, it's broken + * and disable IO BARs on first one + */ +static void fixup_k2_sata(struct pci_dev* dev) +{ + int i; + u16 cmd; + + if (PCI_FUNC(dev->devfn) > 0) { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + pci_write_config_word(dev, PCI_COMMAND, cmd); + for (i = 0; i < 6; i++) { + dev->resource[i].start = dev->resource[i].end = 0; + dev->resource[i].flags = 0; + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0); + } + } else { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd &= ~PCI_COMMAND_IO; + pci_write_config_word(dev, PCI_COMMAND, cmd); + for (i = 0; i < 5; i++) { + dev->resource[i].start = dev->resource[i].end = 0; + dev->resource[i].flags = 0; + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0); + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS, 0x0240, fixup_k2_sata); + diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c new file mode 100644 index 00000000000..6cf03d387b9 --- /dev/null +++ b/arch/ppc64/kernel/pmac_setup.c @@ -0,0 +1,511 @@ +/* + * arch/ppc/platforms/setup.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) + * + * Derived from "arch/alpha/kernel/setup.c" + * Copyright (C) 1995 Linus Torvalds + * + * Maintained by Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +/* + * bootup setup stuff.. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pmac.h" +#include "mpic.h" + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +static int current_root_goodness = -1; +#define DEFAULT_ROOT_DEVICE Root_SDA1 /* sda1 - slightly silly choice */ + +extern int powersave_nap; +int sccdbg; + +sys_ctrler_t sys_ctrler; +EXPORT_SYMBOL(sys_ctrler); + +#ifdef CONFIG_PMAC_SMU +unsigned long smu_cmdbuf_abs; +EXPORT_SYMBOL(smu_cmdbuf_abs); +#endif + +extern void udbg_init_scc(struct device_node *np); + +void __pmac pmac_show_cpuinfo(struct seq_file *m) +{ + struct device_node *np; + char *pp; + int plen; + char* mbname; + int mbmodel = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, + PMAC_MB_INFO_MODEL, 0); + unsigned int mbflags = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, + PMAC_MB_INFO_FLAGS, 0); + + if (pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, PMAC_MB_INFO_NAME, + (long)&mbname) != 0) + mbname = "Unknown"; + + /* find motherboard type */ + seq_printf(m, "machine\t\t: "); + np = find_devices("device-tree"); + if (np != NULL) { + pp = (char *) get_property(np, "model", NULL); + if (pp != NULL) + seq_printf(m, "%s\n", pp); + else + seq_printf(m, "PowerMac\n"); + pp = (char *) get_property(np, "compatible", &plen); + if (pp != NULL) { + seq_printf(m, "motherboard\t:"); + while (plen > 0) { + int l = strlen(pp) + 1; + seq_printf(m, " %s", pp); + plen -= l; + pp += l; + } + seq_printf(m, "\n"); + } + } else + seq_printf(m, "PowerMac\n"); + + /* print parsed model */ + seq_printf(m, "detected as\t: %d (%s)\n", mbmodel, mbname); + seq_printf(m, "pmac flags\t: %08x\n", mbflags); + + /* Indicate newworld */ + seq_printf(m, "pmac-generation\t: NewWorld\n"); +} + + +void __init pmac_setup_arch(void) +{ + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + /* Probe motherboard chipset */ + pmac_feature_init(); +#if 0 + /* Lock-enable the SCC channel used for debug */ + if (sccdbg) { + np = of_find_node_by_name(NULL, "escc"); + if (np) + pmac_call_feature(PMAC_FTR_SCC_ENABLE, np, + PMAC_SCC_ASYNC | PMAC_SCC_FLAG_XMON, 1); + } +#endif + /* We can NAP */ + powersave_nap = 1; + +#ifdef CONFIG_ADB_PMU + /* Initialize the PMU if any */ + find_via_pmu(); +#endif +#ifdef CONFIG_PMAC_SMU + /* Initialize the SMU if any */ + smu_init(); +#endif + + /* Init NVRAM access */ + pmac_nvram_init(); + + /* Setup SMP callback */ +#ifdef CONFIG_SMP + pmac_setup_smp(); +#endif + + /* Lookup PCI hosts */ + pmac_pci_init(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif +} + +#ifdef CONFIG_SCSI +void note_scsi_host(struct device_node *node, void *host) +{ + /* Obsolete */ +} +#endif + + +static int initializing = 1; + +static int pmac_late_init(void) +{ + initializing = 0; + return 0; +} + +late_initcall(pmac_late_init); + +/* can't be __init - can be called whenever a disk is first accessed */ +void __pmac note_bootable_part(dev_t dev, int part, int goodness) +{ + extern dev_t boot_dev; + char *p; + + if (!initializing) + return; + if ((goodness <= current_root_goodness) && + ROOT_DEV != DEFAULT_ROOT_DEVICE) + return; + p = strstr(saved_command_line, "root="); + if (p != NULL && (p == saved_command_line || p[-1] == ' ')) + return; + + if (!boot_dev || dev == boot_dev) { + ROOT_DEV = dev + part; + boot_dev = 0; + current_root_goodness = goodness; + } +} + +void __pmac pmac_restart(char *cmd) +{ + switch(sys_ctrler) { +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: + pmu_restart(); + break; +#endif + +#ifdef CONFIG_PMAC_SMU + case SYS_CTRLER_SMU: + smu_restart(); + break; +#endif + default: + ; + } +} + +void __pmac pmac_power_off(void) +{ + switch(sys_ctrler) { +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: + pmu_shutdown(); + break; +#endif +#ifdef CONFIG_PMAC_SMU + case SYS_CTRLER_SMU: + smu_shutdown(); + break; +#endif + default: + ; + } +} + +void __pmac pmac_halt(void) +{ + pmac_power_off(); +} + +#ifdef CONFIG_BOOTX_TEXT +static int dummy_getc_poll(void) +{ + return -1; +} + +static unsigned char dummy_getc(void) +{ + return 0; +} + +static void btext_putc(unsigned char c) +{ + btext_drawchar(c); +} + +static void __init init_boot_display(void) +{ + char *name; + struct device_node *np = NULL; + int rc = -ENODEV; + + printk("trying to initialize btext ...\n"); + + name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + if (name != NULL) { + np = of_find_node_by_path(name); + if (np != NULL) { + if (strcmp(np->type, "display") != 0) { + printk("boot stdout isn't a display !\n"); + of_node_put(np); + np = NULL; + } + } + } + if (np) + rc = btext_initialize(np); + if (rc == 0) + return; + + for (np = NULL; (np = of_find_node_by_type(np, "display"));) { + if (get_property(np, "linux,opened", NULL)) { + printk("trying %s ...\n", np->full_name); + rc = btext_initialize(np); + printk("result: %d\n", rc); + } + if (rc == 0) + return; + } +} +#endif /* CONFIG_BOOTX_TEXT */ + +/* + * Early initialization. + */ +void __init pmac_init_early(void) +{ + DBG(" -> pmac_init_early\n"); + + /* Initialize hash table, from now on, we can take hash faults + * and call ioremap + */ + hpte_init_native(); + + /* Init SCC */ + if (strstr(cmd_line, "sccdbg")) { + sccdbg = 1; + udbg_init_scc(NULL); + } + + else { +#ifdef CONFIG_BOOTX_TEXT + init_boot_display(); + + ppc_md.udbg_putc = btext_putc; + ppc_md.udbg_getc = dummy_getc; + ppc_md.udbg_getc_poll = dummy_getc_poll; +#endif /* CONFIG_BOOTX_TEXT */ + } + + /* Setup interrupt mapping options */ + ppc64_interrupt_controller = IC_OPEN_PIC; + + iommu_init_early_u3(); + + DBG(" <- pmac_init_early\n"); +} + +static int pmac_u3_cascade(struct pt_regs *regs, void *data) +{ + return mpic_get_one_irq((struct mpic *)data, regs); +} + +static __init void pmac_init_IRQ(void) +{ + struct device_node *irqctrler = NULL; + struct device_node *irqctrler2 = NULL; + struct device_node *np = NULL; + struct mpic *mpic1, *mpic2; + + /* We first try to detect Apple's new Core99 chipset, since mac-io + * is quite different on those machines and contains an IBM MPIC2. + */ + while ((np = of_find_node_by_type(np, "open-pic")) != NULL) { + struct device_node *parent = of_get_parent(np); + if (parent && !strcmp(parent->name, "u3")) + irqctrler2 = of_node_get(np); + else + irqctrler = of_node_get(np); + of_node_put(parent); + } + if (irqctrler != NULL && irqctrler->n_addrs > 0) { + unsigned char senses[128]; + + printk(KERN_INFO "PowerMac using OpenPIC irq controller at 0x%08x\n", + (unsigned int)irqctrler->addrs[0].address); + + prom_get_irq_senses(senses, 0, 128); + mpic1 = mpic_alloc(irqctrler->addrs[0].address, + MPIC_PRIMARY | MPIC_WANTS_RESET, + 0, 0, 128, 256, senses, 128, " K2-MPIC "); + BUG_ON(mpic1 == NULL); + mpic_init(mpic1); + + if (irqctrler2 != NULL && irqctrler2->n_intrs > 0 && + irqctrler2->n_addrs > 0) { + printk(KERN_INFO "Slave OpenPIC at 0x%08x hooked on IRQ %d\n", + (u32)irqctrler2->addrs[0].address, + irqctrler2->intrs[0].line); + + pmac_call_feature(PMAC_FTR_ENABLE_MPIC, irqctrler2, 0, 0); + prom_get_irq_senses(senses, 128, 128 + 128); + + /* We don't need to set MPIC_BROKEN_U3 here since we don't have + * hypertransport interrupts routed to it + */ + mpic2 = mpic_alloc(irqctrler2->addrs[0].address, + MPIC_BIG_ENDIAN | MPIC_WANTS_RESET, + 0, 128, 128, 0, senses, 128, " U3-MPIC "); + BUG_ON(mpic2 == NULL); + mpic_init(mpic2); + mpic_setup_cascade(irqctrler2->intrs[0].line, + pmac_u3_cascade, mpic2); + } + } + of_node_put(irqctrler); + of_node_put(irqctrler2); +} + +static void __init pmac_progress(char *s, unsigned short hex) +{ + if (sccdbg) { + udbg_puts(s); + udbg_puts("\n"); + } +#ifdef CONFIG_BOOTX_TEXT + else if (boot_text_mapped) { + btext_drawstring(s); + btext_drawstring("\n"); + } +#endif /* CONFIG_BOOTX_TEXT */ +} + +/* + * pmac has no legacy IO, anything calling this function has to + * fail or bad things will happen + */ +static int pmac_check_legacy_ioport(unsigned int baseport) +{ + return -ENODEV; +} + +static int __init pmac_declare_of_platform_devices(void) +{ + struct device_node *np; + + np = find_devices("u3"); + if (np) { + for (np = np->child; np != NULL; np = np->sibling) + if (strncmp(np->name, "i2c", 3) == 0) { + of_platform_device_create(np, "u3-i2c"); + break; + } + } + + return 0; +} + +device_initcall(pmac_declare_of_platform_devices); + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init pmac_probe(int platform) +{ + if (platform != PLATFORM_POWERMAC) + return 0; + /* + * On U3, the DART (iommu) must be allocated now since it + * has an impact on htab_initialize (due to the large page it + * occupies having to be broken up so the DART itself is not + * part of the cacheable linar mapping + */ + alloc_u3_dart_table(); + +#ifdef CONFIG_PMAC_SMU + /* + * SMU based G5s need some memory below 2Gb, at least the current + * driver needs that. We have to allocate it now. We allocate 4k + * (1 small page) for now. + */ + smu_cmdbuf_abs = lmb_alloc_base(4096, 4096, 0x80000000UL); +#endif /* CONFIG_PMAC_SMU */ + + return 1; +} + +struct machdep_calls __initdata pmac_md = { +#ifdef CONFIG_HOTPLUG_CPU + .cpu_die = generic_mach_cpu_die, +#endif + .probe = pmac_probe, + .setup_arch = pmac_setup_arch, + .init_early = pmac_init_early, + .get_cpuinfo = pmac_show_cpuinfo, + .init_IRQ = pmac_init_IRQ, + .get_irq = mpic_get_irq, + .pcibios_fixup = pmac_pcibios_fixup, + .restart = pmac_restart, + .power_off = pmac_power_off, + .halt = pmac_halt, + .get_boot_time = pmac_get_boot_time, + .set_rtc_time = pmac_set_rtc_time, + .get_rtc_time = pmac_get_rtc_time, + .calibrate_decr = pmac_calibrate_decr, + .feature_call = pmac_do_feature_call, + .progress = pmac_progress, + .check_legacy_ioport = pmac_check_legacy_ioport +}; diff --git a/arch/ppc64/kernel/pmac_smp.c b/arch/ppc64/kernel/pmac_smp.c new file mode 100644 index 00000000000..c27588ede2f --- /dev/null +++ b/arch/ppc64/kernel/pmac_smp.c @@ -0,0 +1,316 @@ +/* + * SMP support for power macintosh. + * + * We support both the old "powersurge" SMP architecture + * and the current Core99 (G4 PowerMac) machines. + * + * Note that we don't support the very first rev. of + * Apple/DayStar 2 CPUs board, the one with the funky + * watchdog. Hopefully, none of these should be there except + * maybe internally to Apple. I should probably still add some + * code to detect this card though and disable SMP. --BenH. + * + * Support Macintosh G4 SMP by Troy Benjegerdes (hozer@drgw.net) + * and Ben Herrenschmidt . + * + * Support for DayStar quad CPU cards + * Copyright (C) XLR8, Inc. 1994-2000 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpic.h" + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +extern void pmac_secondary_start_1(void); +extern void pmac_secondary_start_2(void); +extern void pmac_secondary_start_3(void); + +extern struct smp_ops_t *smp_ops; + +static void (*pmac_tb_freeze)(int freeze); +static struct device_node *pmac_tb_clock_chip_host; +static DEFINE_SPINLOCK(timebase_lock); +static unsigned long timebase; + +static void smp_core99_cypress_tb_freeze(int freeze) +{ + u8 data; + int rc; + + /* Strangely, the device-tree says address is 0xd2, but darwin + * accesses 0xd0 ... + */ + pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_combined); + rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, + 0xd0 | pmac_low_i2c_read, + 0x81, &data, 1); + if (rc != 0) + goto bail; + + data = (data & 0xf3) | (freeze ? 0x00 : 0x0c); + + pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_stdsub); + rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, + 0xd0 | pmac_low_i2c_write, + 0x81, &data, 1); + + bail: + if (rc != 0) { + printk("Cypress Timebase %s rc: %d\n", + freeze ? "freeze" : "unfreeze", rc); + panic("Timebase freeze failed !\n"); + } +} + +static void smp_core99_pulsar_tb_freeze(int freeze) +{ + u8 data; + int rc; + + /* Strangely, the device-tree says address is 0xd2, but darwin + * accesses 0xd0 ... + */ + pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_combined); + rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, + 0xd4 | pmac_low_i2c_read, + 0x2e, &data, 1); + if (rc != 0) + goto bail; + + data = (data & 0x88) | (freeze ? 0x11 : 0x22); + + pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_stdsub); + rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, + 0xd4 | pmac_low_i2c_write, + 0x2e, &data, 1); + bail: + if (rc != 0) { + printk(KERN_ERR "Pulsar Timebase %s rc: %d\n", + freeze ? "freeze" : "unfreeze", rc); + panic("Timebase freeze failed !\n"); + } +} + + +static void smp_core99_give_timebase(void) +{ + /* Open i2c bus for synchronous access */ + if (pmac_low_i2c_open(pmac_tb_clock_chip_host, 0)) + panic("Can't open i2c for TB sync !\n"); + + spin_lock(&timebase_lock); + (*pmac_tb_freeze)(1); + mb(); + timebase = get_tb(); + spin_unlock(&timebase_lock); + + while (timebase) + barrier(); + + spin_lock(&timebase_lock); + (*pmac_tb_freeze)(0); + spin_unlock(&timebase_lock); + + /* Close i2c bus */ + pmac_low_i2c_close(pmac_tb_clock_chip_host); +} + + +static void __devinit smp_core99_take_timebase(void) +{ + while (!timebase) + barrier(); + spin_lock(&timebase_lock); + set_tb(timebase >> 32, timebase & 0xffffffff); + timebase = 0; + spin_unlock(&timebase_lock); +} + + +static int __init smp_core99_probe(void) +{ + struct device_node *cpus; + struct device_node *cc; + int ncpus = 0; + + /* Maybe use systemconfiguration here ? */ + if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345); + + /* Count CPUs in the device-tree */ + for (cpus = NULL; (cpus = of_find_node_by_type(cpus, "cpu")) != NULL;) + ++ncpus; + + printk(KERN_INFO "PowerMac SMP probe found %d cpus\n", ncpus); + + /* Nothing more to do if less than 2 of them */ + if (ncpus <= 1) + return 1; + + /* Look for the clock chip */ + for (cc = NULL; (cc = of_find_node_by_name(cc, "i2c-hwclock")) != NULL;) { + struct device_node *p = of_get_parent(cc); + u32 *reg; + int ok; + ok = p && device_is_compatible(p, "uni-n-i2c"); + if (!ok) + goto next; + reg = (u32 *)get_property(cc, "reg", NULL); + if (reg == NULL) + goto next; + switch (*reg) { + case 0xd2: + pmac_tb_freeze = smp_core99_cypress_tb_freeze; + printk(KERN_INFO "Timebase clock is Cypress chip\n"); + break; + case 0xd4: + pmac_tb_freeze = smp_core99_pulsar_tb_freeze; + printk(KERN_INFO "Timebase clock is Pulsar chip\n"); + break; + } + if (pmac_tb_freeze != NULL) { + pmac_tb_clock_chip_host = p; + smp_ops->give_timebase = smp_core99_give_timebase; + smp_ops->take_timebase = smp_core99_take_timebase; + break; + } + next: + of_node_put(p); + } + + mpic_request_ipis(); + + return ncpus; +} + +static void __init smp_core99_kick_cpu(int nr) +{ + int save_vector, j; + unsigned long new_vector; + unsigned long flags; + volatile unsigned int *vector + = ((volatile unsigned int *)(KERNELBASE+0x100)); + + if (nr < 1 || nr > 3) + return; + if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu", 0x346); + + local_irq_save(flags); + local_irq_disable(); + + /* Save reset vector */ + save_vector = *vector; + + /* Setup fake reset vector that does + * b .pmac_secondary_start - KERNELBASE + */ + switch(nr) { + case 1: + new_vector = (unsigned long)pmac_secondary_start_1; + break; + case 2: + new_vector = (unsigned long)pmac_secondary_start_2; + break; + case 3: + default: + new_vector = (unsigned long)pmac_secondary_start_3; + break; + } + *vector = 0x48000002 + (new_vector - KERNELBASE); + + /* flush data cache and inval instruction cache */ + flush_icache_range((unsigned long) vector, (unsigned long) vector + 4); + + /* Put some life in our friend */ + pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0); + paca[nr].cpu_start = 1; + + /* FIXME: We wait a bit for the CPU to take the exception, I should + * instead wait for the entry code to set something for me. Well, + * ideally, all that crap will be done in prom.c and the CPU left + * in a RAM-based wait loop like CHRP. + */ + for (j = 1; j < 1000000; j++) + mb(); + + /* Restore our exception vector */ + *vector = save_vector; + flush_icache_range((unsigned long) vector, (unsigned long) vector + 4); + + local_irq_restore(flags); + if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); +} + +static void __init smp_core99_setup_cpu(int cpu_nr) +{ + /* Setup MPIC */ + mpic_setup_this_cpu(); + + if (cpu_nr == 0) { + extern void g5_phy_disable_cpu1(void); + + /* If we didn't start the second CPU, we must take + * it off the bus + */ + if (num_online_cpus() < 2) + g5_phy_disable_cpu1(); + if (ppc_md.progress) ppc_md.progress("smp_core99_setup_cpu 0 done", 0x349); + } +} + +struct smp_ops_t core99_smp_ops __pmacdata = { + .message_pass = smp_mpic_message_pass, + .probe = smp_core99_probe, + .kick_cpu = smp_core99_kick_cpu, + .setup_cpu = smp_core99_setup_cpu, + .give_timebase = smp_generic_give_timebase, + .take_timebase = smp_generic_take_timebase, +}; + +void __init pmac_setup_smp(void) +{ + smp_ops = &core99_smp_ops; +#ifdef CONFIG_HOTPLUG_CPU + smp_ops->cpu_enable = generic_cpu_enable; + smp_ops->cpu_disable = generic_cpu_disable; + smp_ops->cpu_die = generic_cpu_die; +#endif +} diff --git a/arch/ppc64/kernel/pmac_time.c b/arch/ppc64/kernel/pmac_time.c new file mode 100644 index 00000000000..f24827581dd --- /dev/null +++ b/arch/ppc64/kernel/pmac_time.c @@ -0,0 +1,201 @@ +/* + * Support for periodic interrupts (100 per second) and for getting + * the current time from the RTC on Power Macintoshes. + * + * We use the decrementer register for our periodic interrupts. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996 Paul Mackerras. + * Copyright (C) 2003-2005 Benjamin Herrenschmidt. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +extern void setup_default_decr(void); + +extern unsigned long ppc_tb_freq; +extern unsigned long ppc_proc_freq; + +/* Apparently the RTC stores seconds since 1 Jan 1904 */ +#define RTC_OFFSET 2082844800 + +/* + * Calibrate the decrementer frequency with the VIA timer 1. + */ +#define VIA_TIMER_FREQ_6 4700000 /* time 1 frequency * 6 */ + +extern struct timezone sys_tz; +extern void to_tm(int tim, struct rtc_time * tm); + +void __pmac pmac_get_rtc_time(struct rtc_time *tm) +{ + switch(sys_ctrler) { +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: { + /* TODO: Move that to a function in the PMU driver */ + struct adb_request req; + unsigned int now; + + if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0) + return; + pmu_wait_complete(&req); + if (req.reply_len != 4) + printk(KERN_ERR "pmac_get_rtc_time: PMU returned a %d" + " bytes reply\n", req.reply_len); + now = (req.reply[0] << 24) + (req.reply[1] << 16) + + (req.reply[2] << 8) + req.reply[3]; + DBG("get: %u -> %u\n", (int)now, (int)(now - RTC_OFFSET)); + now -= RTC_OFFSET; + + to_tm(now, tm); + tm->tm_year -= 1900; + tm->tm_mon -= 1; + + DBG("-> tm_mday: %d, tm_mon: %d, tm_year: %d, %d:%02d:%02d\n", + tm->tm_mday, tm->tm_mon, tm->tm_year, + tm->tm_hour, tm->tm_min, tm->tm_sec); + break; + } +#endif /* CONFIG_ADB_PMU */ + +#ifdef CONFIG_PMAC_SMU + case SYS_CTRLER_SMU: + smu_get_rtc_time(tm); + break; +#endif /* CONFIG_PMAC_SMU */ + default: + ; + } +} + +int __pmac pmac_set_rtc_time(struct rtc_time *tm) +{ + switch(sys_ctrler) { +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: { + /* TODO: Move that to a function in the PMU driver */ + struct adb_request req; + unsigned int nowtime; + + DBG("set: tm_mday: %d, tm_mon: %d, tm_year: %d," + " %d:%02d:%02d\n", + tm->tm_mday, tm->tm_mon, tm->tm_year, + tm->tm_hour, tm->tm_min, tm->tm_sec); + + nowtime = mktime(tm->tm_year + 1900, tm->tm_mon + 1, + tm->tm_mday, tm->tm_hour, tm->tm_min, + tm->tm_sec); + + DBG("-> %u -> %u\n", (int)nowtime, + (int)(nowtime + RTC_OFFSET)); + nowtime += RTC_OFFSET; + + if (pmu_request(&req, NULL, 5, PMU_SET_RTC, + nowtime >> 24, nowtime >> 16, + nowtime >> 8, nowtime) < 0) + return -ENXIO; + pmu_wait_complete(&req); + if (req.reply_len != 0) + printk(KERN_ERR "pmac_set_rtc_time: PMU returned a %d" + " bytes reply\n", req.reply_len); + return 0; + } +#endif /* CONFIG_ADB_PMU */ + +#ifdef CONFIG_PMAC_SMU + case SYS_CTRLER_SMU: + return smu_set_rtc_time(tm); +#endif /* CONFIG_PMAC_SMU */ + default: + return -ENODEV; + } +} + +void __init pmac_get_boot_time(struct rtc_time *tm) +{ + pmac_get_rtc_time(tm); + +#ifdef disabled__CONFIG_NVRAM + s32 delta = 0; + int dst; + + delta = ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x9)) << 16; + delta |= ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xa)) << 8; + delta |= pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xb); + if (delta & 0x00800000UL) + delta |= 0xFF000000UL; + dst = ((pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x8) & 0x80) != 0); + printk("GMT Delta read from XPRAM: %d minutes, DST: %s\n", delta/60, + dst ? "on" : "off"); +#endif +} + +/* + * Query the OF and get the decr frequency. + * This was taken from the pmac time_init() when merging the prep/pmac + * time functions. + */ +void __init pmac_calibrate_decr(void) +{ + struct device_node *cpu; + unsigned int freq, *fp; + struct div_result divres; + + /* + * The cpu node should have a timebase-frequency property + * to tell us the rate at which the decrementer counts. + */ + cpu = find_type_devices("cpu"); + if (cpu == 0) + panic("can't find cpu node in time_init"); + fp = (unsigned int *) get_property(cpu, "timebase-frequency", NULL); + if (fp == 0) + panic("can't get cpu timebase frequency"); + freq = *fp; + printk("time_init: decrementer frequency = %u.%.6u MHz\n", + freq/1000000, freq%1000000); + tb_ticks_per_jiffy = freq / HZ; + tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; + tb_ticks_per_usec = freq / 1000000; + tb_to_us = mulhwu_scale_factor(freq, 1000000); + div128_by_32( 1024*1024, 0, tb_ticks_per_sec, &divres ); + tb_to_xs = divres.result_low; + ppc_tb_freq = freq; + + fp = (unsigned int *)get_property(cpu, "clock-frequency", NULL); + if (fp == 0) + panic("can't get cpu processor frequency"); + ppc_proc_freq = *fp; + + setup_default_decr(); +} + diff --git a/arch/ppc64/kernel/pmc.c b/arch/ppc64/kernel/pmc.c new file mode 100644 index 00000000000..67be773f9c0 --- /dev/null +++ b/arch/ppc64/kernel/pmc.c @@ -0,0 +1,67 @@ +/* + * linux/arch/ppc64/kernel/pmc.c + * + * Copyright (C) 2004 David Gibson, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include + +/* Ensure exceptions are disabled */ +static void dummy_perf(struct pt_regs *regs) +{ + unsigned int mmcr0 = mfspr(SPRN_MMCR0); + + mmcr0 &= ~(MMCR0_PMXE|MMCR0_PMAO); + mtspr(SPRN_MMCR0, mmcr0); +} + +static spinlock_t pmc_owner_lock = SPIN_LOCK_UNLOCKED; +static void *pmc_owner_caller; /* mostly for debugging */ +perf_irq_t perf_irq = dummy_perf; + +int reserve_pmc_hardware(perf_irq_t new_perf_irq) +{ + int err = 0; + + spin_lock(&pmc_owner_lock); + + if (pmc_owner_caller) { + printk(KERN_WARNING "reserve_pmc_hardware: " + "PMC hardware busy (reserved by caller %p)\n", + pmc_owner_caller); + err = -EBUSY; + goto out; + } + + pmc_owner_caller = __builtin_return_address(0); + perf_irq = new_perf_irq ? : dummy_perf; + + out: + spin_unlock(&pmc_owner_lock); + return err; +} +EXPORT_SYMBOL_GPL(reserve_pmc_hardware); + +void release_pmc_hardware(void) +{ + spin_lock(&pmc_owner_lock); + + WARN_ON(! pmc_owner_caller); + + pmc_owner_caller = NULL; + perf_irq = dummy_perf; + + spin_unlock(&pmc_owner_lock); +} +EXPORT_SYMBOL_GPL(release_pmc_hardware); diff --git a/arch/ppc64/kernel/ppc_ksyms.c b/arch/ppc64/kernel/ppc_ksyms.c new file mode 100644 index 00000000000..6ced63a3439 --- /dev/null +++ b/arch/ppc64/kernel/ppc_ksyms.c @@ -0,0 +1,95 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +EXPORT_SYMBOL(strcpy); +EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strcat); +EXPORT_SYMBOL(strncat); +EXPORT_SYMBOL(strchr); +EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(strpbrk); +EXPORT_SYMBOL(strstr); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strnlen); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); + +EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(ip_fast_csum); +EXPORT_SYMBOL(csum_tcpudp_magic); + +EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__strnlen_user); + +EXPORT_SYMBOL(reloc_offset); + +#ifdef CONFIG_PPC_ISERIES +EXPORT_SYMBOL(HvCall0); +EXPORT_SYMBOL(HvCall1); +EXPORT_SYMBOL(HvCall2); +EXPORT_SYMBOL(HvCall3); +EXPORT_SYMBOL(HvCall4); +EXPORT_SYMBOL(HvCall5); +EXPORT_SYMBOL(HvCall6); +EXPORT_SYMBOL(HvCall7); +#endif + +EXPORT_SYMBOL(_insb); +EXPORT_SYMBOL(_outsb); +EXPORT_SYMBOL(_insw); +EXPORT_SYMBOL(_outsw); +EXPORT_SYMBOL(_insl); +EXPORT_SYMBOL(_outsl); +EXPORT_SYMBOL(_insw_ns); +EXPORT_SYMBOL(_outsw_ns); +EXPORT_SYMBOL(_insl_ns); +EXPORT_SYMBOL(_outsl_ns); + +EXPORT_SYMBOL(kernel_thread); + +EXPORT_SYMBOL(giveup_fpu); +#ifdef CONFIG_ALTIVEC +EXPORT_SYMBOL(giveup_altivec); +#endif +EXPORT_SYMBOL(flush_icache_range); + +#ifdef CONFIG_SMP +#ifdef CONFIG_PPC_ISERIES +EXPORT_SYMBOL(local_get_flags); +EXPORT_SYMBOL(local_irq_disable); +EXPORT_SYMBOL(local_irq_restore); +#endif +#endif + +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memscan); +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memchr); + +EXPORT_SYMBOL(timer_interrupt); +EXPORT_SYMBOL(console_drivers); diff --git a/arch/ppc64/kernel/proc_ppc64.c b/arch/ppc64/kernel/proc_ppc64.c new file mode 100644 index 00000000000..0914b0669b0 --- /dev/null +++ b/arch/ppc64/kernel/proc_ppc64.c @@ -0,0 +1,128 @@ +/* + * arch/ppc64/kernel/proc_ppc64.c + * + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static loff_t page_map_seek( struct file *file, loff_t off, int whence); +static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos); +static int page_map_mmap( struct file *file, struct vm_area_struct *vma ); + +static struct file_operations page_map_fops = { + .llseek = page_map_seek, + .read = page_map_read, + .mmap = page_map_mmap +}; + +/* + * Create the ppc64 and ppc64/rtas directories early. This allows us to + * assume that they have been previously created in drivers. + */ +static int __init proc_ppc64_create(void) +{ + struct proc_dir_entry *root; + + root = proc_mkdir("ppc64", NULL); + if (!root) + return 1; + + if (!(systemcfg->platform & PLATFORM_PSERIES)) + return 0; + + if (!proc_mkdir("rtas", root)) + return 1; + + if (!proc_symlink("rtas", NULL, "ppc64/rtas")) + return 1; + + return 0; +} +core_initcall(proc_ppc64_create); + +static int __init proc_ppc64_init(void) +{ + struct proc_dir_entry *pde; + + pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL); + if (!pde) + return 1; + pde->nlink = 1; + pde->data = systemcfg; + pde->size = PAGE_SIZE; + pde->proc_fops = &page_map_fops; + + return 0; +} +__initcall(proc_ppc64_init); + +static loff_t page_map_seek( struct file *file, loff_t off, int whence) +{ + loff_t new; + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + + switch(whence) { + case 0: + new = off; + break; + case 1: + new = file->f_pos + off; + break; + case 2: + new = dp->size + off; + break; + default: + return -EINVAL; + } + if ( new < 0 || new > dp->size ) + return -EINVAL; + return (file->f_pos = new); +} + +static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size); +} + +static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + + vma->vm_flags |= VM_SHM | VM_LOCKED; + + if ((vma->vm_end - vma->vm_start) > dp->size) + return -EINVAL; + + remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT, + dp->size, vma->vm_page_prot); + return 0; +} + diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c new file mode 100644 index 00000000000..8b068612273 --- /dev/null +++ b/arch/ppc64/kernel/process.c @@ -0,0 +1,688 @@ +/* + * linux/arch/ppc64/kernel/process.c + * + * Derived from "arch/i386/kernel/process.c" + * Copyright (C) 1995 Linus Torvalds + * + * Updated and modified by Cort Dougan (cort@cs.nmt.edu) and + * Paul Mackerras (paulus@cs.anu.edu.au) + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef CONFIG_SMP +struct task_struct *last_task_used_math = NULL; +struct task_struct *last_task_used_altivec = NULL; +#endif + +struct mm_struct ioremap_mm = { + .pgd = ioremap_dir, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .cpu_vm_mask = CPU_MASK_ALL, + .page_table_lock = SPIN_LOCK_UNLOCKED, +}; + +/* + * Make sure the floating-point register state in the + * the thread_struct is up to date for task tsk. + */ +void flush_fp_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + /* + * We need to disable preemption here because if we didn't, + * another process could get scheduled after the regs->msr + * test but before we have finished saving the FP registers + * to the thread_struct. That process could take over the + * FPU, and then when we get scheduled again we would store + * bogus values for the remaining FP registers. + */ + preempt_disable(); + if (tsk->thread.regs->msr & MSR_FP) { +#ifdef CONFIG_SMP + /* + * This should only ever be called for current or + * for a stopped child process. Since we save away + * the FP register state on context switch on SMP, + * there is something wrong if a stopped child appears + * to still have its FP state in the CPU registers. + */ + BUG_ON(tsk != current); +#endif + giveup_fpu(current); + } + preempt_enable(); + } +} + +void enable_kernel_fp(void) +{ + WARN_ON(preemptible()); + +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) + giveup_fpu(current); + else + giveup_fpu(NULL); /* just enables FP for kernel */ +#else + giveup_fpu(last_task_used_math); +#endif /* CONFIG_SMP */ +} +EXPORT_SYMBOL(enable_kernel_fp); + +int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs) +{ + if (!tsk->thread.regs) + return 0; + flush_fp_to_thread(current); + + memcpy(fpregs, &tsk->thread.fpr[0], sizeof(*fpregs)); + + return 1; +} + +#ifdef CONFIG_ALTIVEC + +void enable_kernel_altivec(void) +{ + WARN_ON(preemptible()); + +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) + giveup_altivec(current); + else + giveup_altivec(NULL); /* just enables FP for kernel */ +#else + giveup_altivec(last_task_used_altivec); +#endif /* CONFIG_SMP */ +} +EXPORT_SYMBOL(enable_kernel_altivec); + +/* + * Make sure the VMX/Altivec register state in the + * the thread_struct is up to date for task tsk. + */ +void flush_altivec_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + preempt_disable(); + if (tsk->thread.regs->msr & MSR_VEC) { +#ifdef CONFIG_SMP + BUG_ON(tsk != current); +#endif + giveup_altivec(current); + } + preempt_enable(); + } +} + +int dump_task_altivec(struct pt_regs *regs, elf_vrregset_t *vrregs) +{ + flush_altivec_to_thread(current); + memcpy(vrregs, ¤t->thread.vr[0], sizeof(*vrregs)); + return 1; +} + +#endif /* CONFIG_ALTIVEC */ + +DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); + +struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *new) +{ + struct thread_struct *new_thread, *old_thread; + unsigned long flags; + struct task_struct *last; + +#ifdef CONFIG_SMP + /* avoid complexity of lazy save/restore of fpu + * by just saving it every time we switch out if + * this task used the fpu during the last quantum. + * + * If it tries to use the fpu again, it'll trap and + * reload its fp regs. So we don't have to do a restore + * every switch, just a save. + * -- Cort + */ + if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP)) + giveup_fpu(prev); +#ifdef CONFIG_ALTIVEC + if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC)) + giveup_altivec(prev); +#endif /* CONFIG_ALTIVEC */ +#endif /* CONFIG_SMP */ + +#if defined(CONFIG_ALTIVEC) && !defined(CONFIG_SMP) + /* Avoid the trap. On smp this this never happens since + * we don't set last_task_used_altivec -- Cort + */ + if (new->thread.regs && last_task_used_altivec == new) + new->thread.regs->msr |= MSR_VEC; +#endif /* CONFIG_ALTIVEC */ + + flush_tlb_pending(); + + new_thread = &new->thread; + old_thread = ¤t->thread; + +/* Collect purr utilization data per process and per processor wise */ +/* purr is nothing but processor time base */ + +#if defined(CONFIG_PPC_PSERIES) + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + long unsigned start_tb, current_tb; + start_tb = old_thread->start_tb; + cu->current_tb = current_tb = mfspr(SPRN_PURR); + old_thread->accum_tb += (current_tb - start_tb); + new_thread->start_tb = current_tb; + } +#endif + + + local_irq_save(flags); + last = _switch(old_thread, new_thread); + + local_irq_restore(flags); + + return last; +} + +static int instructions_to_print = 16; + +static void show_instructions(struct pt_regs *regs) +{ + int i; + unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 * + sizeof(int)); + + printk("Instruction dump:"); + + for (i = 0; i < instructions_to_print; i++) { + int instr; + + if (!(i % 8)) + printk("\n"); + + if (((REGION_ID(pc) != KERNEL_REGION_ID) && + (REGION_ID(pc) != VMALLOC_REGION_ID)) || + __get_user(instr, (unsigned int *)pc)) { + printk("XXXXXXXX "); + } else { + if (regs->nip == pc) + printk("<%08x> ", instr); + else + printk("%08x ", instr); + } + + pc += sizeof(int); + } + + printk("\n"); +} + +void show_regs(struct pt_regs * regs) +{ + int i; + unsigned long trap; + + printk("NIP: %016lX XER: %08X LR: %016lX CTR: %016lX\n", + regs->nip, (unsigned int)regs->xer, regs->link, regs->ctr); + printk("REGS: %p TRAP: %04lx %s (%s)\n", + regs, regs->trap, print_tainted(), system_utsname.release); + printk("MSR: %016lx EE: %01x PR: %01x FP: %01x ME: %01x " + "IR/DR: %01x%01x CR: %08X\n", + regs->msr, regs->msr&MSR_EE ? 1 : 0, regs->msr&MSR_PR ? 1 : 0, + regs->msr & MSR_FP ? 1 : 0,regs->msr&MSR_ME ? 1 : 0, + regs->msr&MSR_IR ? 1 : 0, + regs->msr&MSR_DR ? 1 : 0, + (unsigned int)regs->ccr); + trap = TRAP(regs); + printk("DAR: %016lx DSISR: %016lx\n", regs->dar, regs->dsisr); + printk("TASK: %p[%d] '%s' THREAD: %p", + current, current->pid, current->comm, current->thread_info); + +#ifdef CONFIG_SMP + printk(" CPU: %d", smp_processor_id()); +#endif /* CONFIG_SMP */ + + for (i = 0; i < 32; i++) { + if ((i % 4) == 0) { + printk("\n" KERN_INFO "GPR%02d: ", i); + } + + printk("%016lX ", regs->gpr[i]); + if (i == 13 && !FULL_REGS(regs)) + break; + } + printk("\n"); + /* + * Lookup NIP late so we have the best change of getting the + * above info out without failing + */ + printk("NIP [%016lx] ", regs->nip); + print_symbol("%s\n", regs->nip); + printk("LR [%016lx] ", regs->link); + print_symbol("%s\n", regs->link); + show_stack(current, (unsigned long *)regs->gpr[1]); + if (!user_mode(regs)) + show_instructions(regs); +} + +void exit_thread(void) +{ +#ifndef CONFIG_SMP + if (last_task_used_math == current) + last_task_used_math = NULL; +#ifdef CONFIG_ALTIVEC + if (last_task_used_altivec == current) + last_task_used_altivec = NULL; +#endif /* CONFIG_ALTIVEC */ +#endif /* CONFIG_SMP */ +} + +void flush_thread(void) +{ + struct thread_info *t = current_thread_info(); + + if (t->flags & _TIF_ABI_PENDING) + t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT); + +#ifndef CONFIG_SMP + if (last_task_used_math == current) + last_task_used_math = NULL; +#ifdef CONFIG_ALTIVEC + if (last_task_used_altivec == current) + last_task_used_altivec = NULL; +#endif /* CONFIG_ALTIVEC */ +#endif /* CONFIG_SMP */ +} + +void +release_thread(struct task_struct *t) +{ +} + + +/* + * This gets called before we allocate a new thread and copy + * the current task into it. + */ +void prepare_to_copy(struct task_struct *tsk) +{ + flush_fp_to_thread(current); + flush_altivec_to_thread(current); +} + +/* + * Copy a thread.. + */ +int +copy_thread(int nr, unsigned long clone_flags, unsigned long usp, + unsigned long unused, struct task_struct *p, struct pt_regs *regs) +{ + struct pt_regs *childregs, *kregs; + extern void ret_from_fork(void); + unsigned long sp = (unsigned long)p->thread_info + THREAD_SIZE; + + /* Copy registers */ + sp -= sizeof(struct pt_regs); + childregs = (struct pt_regs *) sp; + *childregs = *regs; + if ((childregs->msr & MSR_PR) == 0) { + /* for kernel thread, set stackptr in new task */ + childregs->gpr[1] = sp + sizeof(struct pt_regs); + p->thread.regs = NULL; /* no user register state */ + clear_ti_thread_flag(p->thread_info, TIF_32BIT); +#ifdef CONFIG_PPC_ISERIES + set_ti_thread_flag(p->thread_info, TIF_RUN_LIGHT); +#endif + } else { + childregs->gpr[1] = usp; + p->thread.regs = childregs; + if (clone_flags & CLONE_SETTLS) { + if (test_thread_flag(TIF_32BIT)) + childregs->gpr[2] = childregs->gpr[6]; + else + childregs->gpr[13] = childregs->gpr[6]; + } + } + childregs->gpr[3] = 0; /* Result from fork() */ + sp -= STACK_FRAME_OVERHEAD; + + /* + * The way this works is that at some point in the future + * some task will call _switch to switch to the new task. + * That will pop off the stack frame created below and start + * the new task running at ret_from_fork. The new task will + * do some house keeping and then return from the fork or clone + * system call, using the stack frame created above. + */ + sp -= sizeof(struct pt_regs); + kregs = (struct pt_regs *) sp; + sp -= STACK_FRAME_OVERHEAD; + p->thread.ksp = sp; + if (cpu_has_feature(CPU_FTR_SLB)) { + unsigned long sp_vsid = get_kernel_vsid(sp); + + sp_vsid <<= SLB_VSID_SHIFT; + sp_vsid |= SLB_VSID_KERNEL; + if (cpu_has_feature(CPU_FTR_16M_PAGE)) + sp_vsid |= SLB_VSID_L; + + p->thread.ksp_vsid = sp_vsid; + } + + /* + * The PPC64 ABI makes use of a TOC to contain function + * pointers. The function (ret_from_except) is actually a pointer + * to the TOC entry. The first entry is a pointer to the actual + * function. + */ + kregs->nip = *((unsigned long *)ret_from_fork); + + return 0; +} + +/* + * Set up a thread for executing a new program + */ +void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp) +{ + unsigned long entry, toc, load_addr = regs->gpr[2]; + + /* fdptr is a relocated pointer to the function descriptor for + * the elf _start routine. The first entry in the function + * descriptor is the entry address of _start and the second + * entry is the TOC value we need to use. + */ + set_fs(USER_DS); + __get_user(entry, (unsigned long __user *)fdptr); + __get_user(toc, (unsigned long __user *)fdptr+1); + + /* Check whether the e_entry function descriptor entries + * need to be relocated before we can use them. + */ + if (load_addr != 0) { + entry += load_addr; + toc += load_addr; + } + + /* + * If we exec out of a kernel thread then thread.regs will not be + * set. Do it now. + */ + if (!current->thread.regs) { + unsigned long childregs = (unsigned long)current->thread_info + + THREAD_SIZE; + childregs -= sizeof(struct pt_regs); + current->thread.regs = (struct pt_regs *)childregs; + } + + regs->nip = entry; + regs->gpr[1] = sp; + regs->gpr[2] = toc; + regs->msr = MSR_USER64; +#ifndef CONFIG_SMP + if (last_task_used_math == current) + last_task_used_math = 0; +#endif /* CONFIG_SMP */ + memset(current->thread.fpr, 0, sizeof(current->thread.fpr)); + current->thread.fpscr = 0; +#ifdef CONFIG_ALTIVEC +#ifndef CONFIG_SMP + if (last_task_used_altivec == current) + last_task_used_altivec = 0; +#endif /* CONFIG_SMP */ + memset(current->thread.vr, 0, sizeof(current->thread.vr)); + current->thread.vscr.u[0] = 0; + current->thread.vscr.u[1] = 0; + current->thread.vscr.u[2] = 0; + current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */ + current->thread.vrsave = 0; + current->thread.used_vr = 0; +#endif /* CONFIG_ALTIVEC */ +} +EXPORT_SYMBOL(start_thread); + +int set_fpexc_mode(struct task_struct *tsk, unsigned int val) +{ + struct pt_regs *regs = tsk->thread.regs; + + if (val > PR_FP_EXC_PRECISE) + return -EINVAL; + tsk->thread.fpexc_mode = __pack_fe01(val); + if (regs != NULL && (regs->msr & MSR_FP) != 0) + regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1)) + | tsk->thread.fpexc_mode; + return 0; +} + +int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) +{ + unsigned int val; + + val = __unpack_fe01(tsk->thread.fpexc_mode); + return put_user(val, (unsigned int __user *) adr); +} + +int sys_clone(unsigned long clone_flags, unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5, unsigned long p6, + struct pt_regs *regs) +{ + unsigned long parent_tidptr = 0; + unsigned long child_tidptr = 0; + + if (p2 == 0) + p2 = regs->gpr[1]; /* stack pointer for child */ + + if (clone_flags & (CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | + CLONE_CHILD_CLEARTID)) { + parent_tidptr = p3; + child_tidptr = p5; + if (test_thread_flag(TIF_32BIT)) { + parent_tidptr &= 0xffffffff; + child_tidptr &= 0xffffffff; + } + } + + return do_fork(clone_flags, p2, regs, 0, + (int __user *)parent_tidptr, (int __user *)child_tidptr); +} + +int sys_fork(unsigned long p1, unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5, unsigned long p6, + struct pt_regs *regs) +{ + return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL); +} + +int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5, unsigned long p6, + struct pt_regs *regs) +{ + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1], regs, 0, + NULL, NULL); +} + +int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, unsigned long a5, + struct pt_regs *regs) +{ + int error; + char * filename; + + filename = getname((char __user *) a0); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + error = do_execve(filename, (char __user * __user *) a1, + (char __user * __user *) a2, regs); + + if (error == 0) { + task_lock(current); + current->ptrace &= ~PT_DTRACE; + task_unlock(current); + } + putname(filename); + +out: + return error; +} + +static int kstack_depth_to_print = 64; + +static int validate_sp(unsigned long sp, struct task_struct *p, + unsigned long nbytes) +{ + unsigned long stack_page = (unsigned long)p->thread_info; + + if (sp >= stack_page + sizeof(struct thread_struct) + && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + +#ifdef CONFIG_IRQSTACKS + stack_page = (unsigned long) hardirq_ctx[task_cpu(p)]; + if (sp >= stack_page + sizeof(struct thread_struct) + && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + + stack_page = (unsigned long) softirq_ctx[task_cpu(p)]; + if (sp >= stack_page + sizeof(struct thread_struct) + && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; +#endif + + return 0; +} + +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long ip, sp; + int count = 0; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + sp = p->thread.ksp; + if (!validate_sp(sp, p, 112)) + return 0; + + do { + sp = *(unsigned long *)sp; + if (!validate_sp(sp, p, 112)) + return 0; + if (count > 0) { + ip = *(unsigned long *)(sp + 16); + if (!in_sched_functions(ip)) + return ip; + } + } while (count++ < 16); + return 0; +} +EXPORT_SYMBOL(get_wchan); + +void show_stack(struct task_struct *p, unsigned long *_sp) +{ + unsigned long ip, newsp, lr; + int count = 0; + unsigned long sp = (unsigned long)_sp; + int firstframe = 1; + + if (sp == 0) { + if (p) { + sp = p->thread.ksp; + } else { + sp = __get_SP(); + p = current; + } + } + + lr = 0; + printk("Call Trace:\n"); + do { + if (!validate_sp(sp, p, 112)) + return; + + _sp = (unsigned long *) sp; + newsp = _sp[0]; + ip = _sp[2]; + if (!firstframe || ip != lr) { + printk("[%016lx] [%016lx] ", sp, ip); + print_symbol("%s", ip); + if (firstframe) + printk(" (unreliable)"); + printk("\n"); + } + firstframe = 0; + + /* + * See if this is an exception frame. + * We look for the "regshere" marker in the current frame. + */ + if (validate_sp(sp, p, sizeof(struct pt_regs) + 400) + && _sp[12] == 0x7265677368657265ul) { + struct pt_regs *regs = (struct pt_regs *) + (sp + STACK_FRAME_OVERHEAD); + printk("--- Exception: %lx", regs->trap); + print_symbol(" at %s\n", regs->nip); + lr = regs->link; + print_symbol(" LR = %s\n", lr); + firstframe = 1; + } + + sp = newsp; + } while (count++ < kstack_depth_to_print); +} + +void dump_stack(void) +{ + show_stack(current, (unsigned long *)__get_SP()); +} +EXPORT_SYMBOL(dump_stack); diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c new file mode 100644 index 00000000000..01739d5c47c --- /dev/null +++ b/arch/ppc64/kernel/prom.c @@ -0,0 +1,1820 @@ +/* + * + * + * Procedures for interfacing to Open Firmware. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. + * {engebret|bergner}@us.ibm.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +struct pci_reg_property { + struct pci_address addr; + u32 size_hi; + u32 size_lo; +}; + +struct isa_reg_property { + u32 space; + u32 address; + u32 size; +}; + + +typedef int interpret_func(struct device_node *, unsigned long *, + int, int, int); + +extern struct rtas_t rtas; +extern struct lmb lmb; +extern unsigned long klimit; + +static int __initdata dt_root_addr_cells; +static int __initdata dt_root_size_cells; +static int __initdata iommu_is_off; +int __initdata iommu_force_on; +typedef u32 cell_t; + +#if 0 +static struct boot_param_header *initial_boot_params __initdata; +#else +struct boot_param_header *initial_boot_params; +#endif + +static struct device_node *allnodes = NULL; + +/* use when traversing tree through the allnext, child, sibling, + * or parent members of struct device_node. + */ +static DEFINE_RWLOCK(devtree_lock); + +/* export that to outside world */ +struct device_node *of_chosen; + +/* + * Wrapper for allocating memory for various data that needs to be + * attached to device nodes as they are processed at boot or when + * added to the device tree later (e.g. DLPAR). At boot there is + * already a region reserved so we just increment *mem_start by size; + * otherwise we call kmalloc. + */ +static void * prom_alloc(unsigned long size, unsigned long *mem_start) +{ + unsigned long tmp; + + if (!mem_start) + return kmalloc(size, GFP_KERNEL); + + tmp = *mem_start; + *mem_start += size; + return (void *)tmp; +} + +/* + * Find the device_node with a given phandle. + */ +static struct device_node * find_phandle(phandle ph) +{ + struct device_node *np; + + for (np = allnodes; np != 0; np = np->allnext) + if (np->linux_phandle == ph) + return np; + return NULL; +} + +/* + * Find the interrupt parent of a node. + */ +static struct device_node * __devinit intr_parent(struct device_node *p) +{ + phandle *parp; + + parp = (phandle *) get_property(p, "interrupt-parent", NULL); + if (parp == NULL) + return p->parent; + return find_phandle(*parp); +} + +/* + * Find out the size of each entry of the interrupts property + * for a node. + */ +int __devinit prom_n_intr_cells(struct device_node *np) +{ + struct device_node *p; + unsigned int *icp; + + for (p = np; (p = intr_parent(p)) != NULL; ) { + icp = (unsigned int *) + get_property(p, "#interrupt-cells", NULL); + if (icp != NULL) + return *icp; + if (get_property(p, "interrupt-controller", NULL) != NULL + || get_property(p, "interrupt-map", NULL) != NULL) { + printk("oops, node %s doesn't have #interrupt-cells\n", + p->full_name); + return 1; + } + } +#ifdef DEBUG_IRQ + printk("prom_n_intr_cells failed for %s\n", np->full_name); +#endif + return 1; +} + +/* + * Map an interrupt from a device up to the platform interrupt + * descriptor. + */ +static int __devinit map_interrupt(unsigned int **irq, struct device_node **ictrler, + struct device_node *np, unsigned int *ints, + int nintrc) +{ + struct device_node *p, *ipar; + unsigned int *imap, *imask, *ip; + int i, imaplen, match; + int newintrc = 0, newaddrc = 0; + unsigned int *reg; + int naddrc; + + reg = (unsigned int *) get_property(np, "reg", NULL); + naddrc = prom_n_addr_cells(np); + p = intr_parent(np); + while (p != NULL) { + if (get_property(p, "interrupt-controller", NULL) != NULL) + /* this node is an interrupt controller, stop here */ + break; + imap = (unsigned int *) + get_property(p, "interrupt-map", &imaplen); + if (imap == NULL) { + p = intr_parent(p); + continue; + } + imask = (unsigned int *) + get_property(p, "interrupt-map-mask", NULL); + if (imask == NULL) { + printk("oops, %s has interrupt-map but no mask\n", + p->full_name); + return 0; + } + imaplen /= sizeof(unsigned int); + match = 0; + ipar = NULL; + while (imaplen > 0 && !match) { + /* check the child-interrupt field */ + match = 1; + for (i = 0; i < naddrc && match; ++i) + match = ((reg[i] ^ imap[i]) & imask[i]) == 0; + for (; i < naddrc + nintrc && match; ++i) + match = ((ints[i-naddrc] ^ imap[i]) & imask[i]) == 0; + imap += naddrc + nintrc; + imaplen -= naddrc + nintrc; + /* grab the interrupt parent */ + ipar = find_phandle((phandle) *imap++); + --imaplen; + if (ipar == NULL) { + printk("oops, no int parent %x in map of %s\n", + imap[-1], p->full_name); + return 0; + } + /* find the parent's # addr and intr cells */ + ip = (unsigned int *) + get_property(ipar, "#interrupt-cells", NULL); + if (ip == NULL) { + printk("oops, no #interrupt-cells on %s\n", + ipar->full_name); + return 0; + } + newintrc = *ip; + ip = (unsigned int *) + get_property(ipar, "#address-cells", NULL); + newaddrc = (ip == NULL)? 0: *ip; + imap += newaddrc + newintrc; + imaplen -= newaddrc + newintrc; + } + if (imaplen < 0) { + printk("oops, error decoding int-map on %s, len=%d\n", + p->full_name, imaplen); + return 0; + } + if (!match) { +#ifdef DEBUG_IRQ + printk("oops, no match in %s int-map for %s\n", + p->full_name, np->full_name); +#endif + return 0; + } + p = ipar; + naddrc = newaddrc; + nintrc = newintrc; + ints = imap - nintrc; + reg = ints - naddrc; + } + if (p == NULL) { +#ifdef DEBUG_IRQ + printk("hmmm, int tree for %s doesn't have ctrler\n", + np->full_name); +#endif + return 0; + } + *irq = ints; + *ictrler = p; + return nintrc; +} + +static int __devinit finish_node_interrupts(struct device_node *np, + unsigned long *mem_start, + int measure_only) +{ + unsigned int *ints; + int intlen, intrcells, intrcount; + int i, j, n; + unsigned int *irq, virq; + struct device_node *ic; + + ints = (unsigned int *) get_property(np, "interrupts", &intlen); + if (ints == NULL) + return 0; + intrcells = prom_n_intr_cells(np); + intlen /= intrcells * sizeof(unsigned int); + + np->intrs = prom_alloc(intlen * sizeof(*(np->intrs)), mem_start); + if (!np->intrs) + return -ENOMEM; + + if (measure_only) + return 0; + + intrcount = 0; + for (i = 0; i < intlen; ++i, ints += intrcells) { + n = map_interrupt(&irq, &ic, np, ints, intrcells); + if (n <= 0) + continue; + + /* don't map IRQ numbers under a cascaded 8259 controller */ + if (ic && device_is_compatible(ic, "chrp,iic")) { + np->intrs[intrcount].line = irq[0]; + } else { + virq = virt_irq_create_mapping(irq[0]); + if (virq == NO_IRQ) { + printk(KERN_CRIT "Could not allocate interrupt" + " number for %s\n", np->full_name); + continue; + } + np->intrs[intrcount].line = irq_offset_up(virq); + } + + /* We offset irq numbers for the u3 MPIC by 128 in PowerMac */ + if (systemcfg->platform == PLATFORM_POWERMAC && ic && ic->parent) { + char *name = get_property(ic->parent, "name", NULL); + if (name && !strcmp(name, "u3")) + np->intrs[intrcount].line += 128; + } + np->intrs[intrcount].sense = 1; + if (n > 1) + np->intrs[intrcount].sense = irq[1]; + if (n > 2) { + printk("hmmm, got %d intr cells for %s:", n, + np->full_name); + for (j = 0; j < n; ++j) + printk(" %d", irq[j]); + printk("\n"); + } + ++intrcount; + } + np->n_intrs = intrcount; + + return 0; +} + +static int __devinit interpret_pci_props(struct device_node *np, + unsigned long *mem_start, + int naddrc, int nsizec, + int measure_only) +{ + struct address_range *adr; + struct pci_reg_property *pci_addrs; + int i, l, n_addrs; + + pci_addrs = (struct pci_reg_property *) + get_property(np, "assigned-addresses", &l); + if (!pci_addrs) + return 0; + + n_addrs = l / sizeof(*pci_addrs); + + adr = prom_alloc(n_addrs * sizeof(*adr), mem_start); + if (!adr) + return -ENOMEM; + + if (measure_only) + return 0; + + np->addrs = adr; + np->n_addrs = n_addrs; + + for (i = 0; i < n_addrs; i++) { + adr[i].space = pci_addrs[i].addr.a_hi; + adr[i].address = pci_addrs[i].addr.a_lo | + ((u64)pci_addrs[i].addr.a_mid << 32); + adr[i].size = pci_addrs[i].size_lo; + } + + return 0; +} + +static int __init interpret_dbdma_props(struct device_node *np, + unsigned long *mem_start, + int naddrc, int nsizec, + int measure_only) +{ + struct reg_property32 *rp; + struct address_range *adr; + unsigned long base_address; + int i, l; + struct device_node *db; + + base_address = 0; + if (!measure_only) { + for (db = np->parent; db != NULL; db = db->parent) { + if (!strcmp(db->type, "dbdma") && db->n_addrs != 0) { + base_address = db->addrs[0].address; + break; + } + } + } + + rp = (struct reg_property32 *) get_property(np, "reg", &l); + if (rp != 0 && l >= sizeof(struct reg_property32)) { + i = 0; + adr = (struct address_range *) (*mem_start); + while ((l -= sizeof(struct reg_property32)) >= 0) { + if (!measure_only) { + adr[i].space = 2; + adr[i].address = rp[i].address + base_address; + adr[i].size = rp[i].size; + } + ++i; + } + np->addrs = adr; + np->n_addrs = i; + (*mem_start) += i * sizeof(struct address_range); + } + + return 0; +} + +static int __init interpret_macio_props(struct device_node *np, + unsigned long *mem_start, + int naddrc, int nsizec, + int measure_only) +{ + struct reg_property32 *rp; + struct address_range *adr; + unsigned long base_address; + int i, l; + struct device_node *db; + + base_address = 0; + if (!measure_only) { + for (db = np->parent; db != NULL; db = db->parent) { + if (!strcmp(db->type, "mac-io") && db->n_addrs != 0) { + base_address = db->addrs[0].address; + break; + } + } + } + + rp = (struct reg_property32 *) get_property(np, "reg", &l); + if (rp != 0 && l >= sizeof(struct reg_property32)) { + i = 0; + adr = (struct address_range *) (*mem_start); + while ((l -= sizeof(struct reg_property32)) >= 0) { + if (!measure_only) { + adr[i].space = 2; + adr[i].address = rp[i].address + base_address; + adr[i].size = rp[i].size; + } + ++i; + } + np->addrs = adr; + np->n_addrs = i; + (*mem_start) += i * sizeof(struct address_range); + } + + return 0; +} + +static int __init interpret_isa_props(struct device_node *np, + unsigned long *mem_start, + int naddrc, int nsizec, + int measure_only) +{ + struct isa_reg_property *rp; + struct address_range *adr; + int i, l; + + rp = (struct isa_reg_property *) get_property(np, "reg", &l); + if (rp != 0 && l >= sizeof(struct isa_reg_property)) { + i = 0; + adr = (struct address_range *) (*mem_start); + while ((l -= sizeof(struct isa_reg_property)) >= 0) { + if (!measure_only) { + adr[i].space = rp[i].space; + adr[i].address = rp[i].address; + adr[i].size = rp[i].size; + } + ++i; + } + np->addrs = adr; + np->n_addrs = i; + (*mem_start) += i * sizeof(struct address_range); + } + + return 0; +} + +static int __init interpret_root_props(struct device_node *np, + unsigned long *mem_start, + int naddrc, int nsizec, + int measure_only) +{ + struct address_range *adr; + int i, l; + unsigned int *rp; + int rpsize = (naddrc + nsizec) * sizeof(unsigned int); + + rp = (unsigned int *) get_property(np, "reg", &l); + if (rp != 0 && l >= rpsize) { + i = 0; + adr = (struct address_range *) (*mem_start); + while ((l -= rpsize) >= 0) { + if (!measure_only) { + adr[i].space = 0; + adr[i].address = rp[naddrc - 1]; + adr[i].size = rp[naddrc + nsizec - 1]; + } + ++i; + rp += naddrc + nsizec; + } + np->addrs = adr; + np->n_addrs = i; + (*mem_start) += i * sizeof(struct address_range); + } + + return 0; +} + +static int __devinit finish_node(struct device_node *np, + unsigned long *mem_start, + interpret_func *ifunc, + int naddrc, int nsizec, + int measure_only) +{ + struct device_node *child; + int *ip, rc = 0; + + /* get the device addresses and interrupts */ + if (ifunc != NULL) + rc = ifunc(np, mem_start, naddrc, nsizec, measure_only); + if (rc) + goto out; + + rc = finish_node_interrupts(np, mem_start, measure_only); + if (rc) + goto out; + + /* Look for #address-cells and #size-cells properties. */ + ip = (int *) get_property(np, "#address-cells", NULL); + if (ip != NULL) + naddrc = *ip; + ip = (int *) get_property(np, "#size-cells", NULL); + if (ip != NULL) + nsizec = *ip; + + /* the f50 sets the name to 'display' and 'compatible' to what we + * expect for the name -- Cort + */ + if (!strcmp(np->name, "display")) + np->name = get_property(np, "compatible", NULL); + + if (!strcmp(np->name, "device-tree") || np->parent == NULL) + ifunc = interpret_root_props; + else if (np->type == 0) + ifunc = NULL; + else if (!strcmp(np->type, "pci") || !strcmp(np->type, "vci")) + ifunc = interpret_pci_props; + else if (!strcmp(np->type, "dbdma")) + ifunc = interpret_dbdma_props; + else if (!strcmp(np->type, "mac-io") || ifunc == interpret_macio_props) + ifunc = interpret_macio_props; + else if (!strcmp(np->type, "isa")) + ifunc = interpret_isa_props; + else if (!strcmp(np->name, "uni-n") || !strcmp(np->name, "u3")) + ifunc = interpret_root_props; + else if (!((ifunc == interpret_dbdma_props + || ifunc == interpret_macio_props) + && (!strcmp(np->type, "escc") + || !strcmp(np->type, "media-bay")))) + ifunc = NULL; + + for (child = np->child; child != NULL; child = child->sibling) { + rc = finish_node(child, mem_start, ifunc, + naddrc, nsizec, measure_only); + if (rc) + goto out; + } +out: + return rc; +} + +/** + * finish_device_tree is called once things are running normally + * (i.e. with text and data mapped to the address they were linked at). + * It traverses the device tree and fills in some of the additional, + * fields in each node like {n_}addrs and {n_}intrs, the virt interrupt + * mapping is also initialized at this point. + */ +void __init finish_device_tree(void) +{ + unsigned long start, end, size = 0; + + DBG(" -> finish_device_tree\n"); + + if (ppc64_interrupt_controller == IC_INVALID) { + DBG("failed to configure interrupt controller type\n"); + panic("failed to configure interrupt controller type\n"); + } + + /* Initialize virtual IRQ map */ + virt_irq_init(); + + /* + * Finish device-tree (pre-parsing some properties etc...) + * We do this in 2 passes. One with "measure_only" set, which + * will only measure the amount of memory needed, then we can + * allocate that memory, and call finish_node again. However, + * we must be careful as most routines will fail nowadays when + * prom_alloc() returns 0, so we must make sure our first pass + * doesn't start at 0. We pre-initialize size to 16 for that + * reason and then remove those additional 16 bytes + */ + size = 16; + finish_node(allnodes, &size, NULL, 0, 0, 1); + size -= 16; + end = start = (unsigned long)abs_to_virt(lmb_alloc(size, 128)); + finish_node(allnodes, &end, NULL, 0, 0, 0); + BUG_ON(end != start + size); + + DBG(" <- finish_device_tree\n"); +} + +#ifdef DEBUG +#define printk udbg_printf +#endif + +static inline char *find_flat_dt_string(u32 offset) +{ + return ((char *)initial_boot_params) + initial_boot_params->off_dt_strings + + offset; +} + +/** + * This function is used to scan the flattened device-tree, it is + * used to extract the memory informations at boot before we can + * unflatten the tree + */ +static int __init scan_flat_dt(int (*it)(unsigned long node, + const char *full_path, void *data), + void *data) +{ + unsigned long p = ((unsigned long)initial_boot_params) + + initial_boot_params->off_dt_struct; + int rc = 0; + + do { + u32 tag = *((u32 *)p); + char *pathp; + + p += 4; + if (tag == OF_DT_END_NODE) + continue; + if (tag == OF_DT_END) + break; + if (tag == OF_DT_PROP) { + u32 sz = *((u32 *)p); + p += 8; + p = _ALIGN(p, sz >= 8 ? 8 : 4); + p += sz; + p = _ALIGN(p, 4); + continue; + } + if (tag != OF_DT_BEGIN_NODE) { + printk(KERN_WARNING "Invalid tag %x scanning flattened" + " device tree !\n", tag); + return -EINVAL; + } + pathp = (char *)p; + p = _ALIGN(p + strlen(pathp) + 1, 4); + rc = it(p, pathp, data); + if (rc != 0) + break; + } while(1); + + return rc; +} + +/** + * This function can be used within scan_flattened_dt callback to get + * access to properties + */ +static void* __init get_flat_dt_prop(unsigned long node, const char *name, + unsigned long *size) +{ + unsigned long p = node; + + do { + u32 tag = *((u32 *)p); + u32 sz, noff; + const char *nstr; + + p += 4; + if (tag != OF_DT_PROP) + return NULL; + + sz = *((u32 *)p); + noff = *((u32 *)(p + 4)); + p += 8; + p = _ALIGN(p, sz >= 8 ? 8 : 4); + + nstr = find_flat_dt_string(noff); + if (nstr == NULL) { + printk(KERN_WARNING "Can't find property index name !\n"); + return NULL; + } + if (strcmp(name, nstr) == 0) { + if (size) + *size = sz; + return (void *)p; + } + p += sz; + p = _ALIGN(p, 4); + } while(1); +} + +static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, + unsigned long align) +{ + void *res; + + *mem = _ALIGN(*mem, align); + res = (void *)*mem; + *mem += size; + + return res; +} + +static unsigned long __init unflatten_dt_node(unsigned long mem, + unsigned long *p, + struct device_node *dad, + struct device_node ***allnextpp) +{ + struct device_node *np; + struct property *pp, **prev_pp = NULL; + char *pathp; + u32 tag; + unsigned int l; + + tag = *((u32 *)(*p)); + if (tag != OF_DT_BEGIN_NODE) { + printk("Weird tag at start of node: %x\n", tag); + return mem; + } + *p += 4; + pathp = (char *)*p; + l = strlen(pathp) + 1; + *p = _ALIGN(*p + l, 4); + + np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + l, + __alignof__(struct device_node)); + if (allnextpp) { + memset(np, 0, sizeof(*np)); + np->full_name = ((char*)np) + sizeof(struct device_node); + memcpy(np->full_name, pathp, l); + prev_pp = &np->properties; + **allnextpp = np; + *allnextpp = &np->allnext; + if (dad != NULL) { + np->parent = dad; + /* we temporarily use the `next' field as `last_child'. */ + if (dad->next == 0) + dad->child = np; + else + dad->next->sibling = np; + dad->next = np; + } + kref_init(&np->kref); + } + while(1) { + u32 sz, noff; + char *pname; + + tag = *((u32 *)(*p)); + if (tag != OF_DT_PROP) + break; + *p += 4; + sz = *((u32 *)(*p)); + noff = *((u32 *)((*p) + 4)); + *p = _ALIGN((*p) + 8, sz >= 8 ? 8 : 4); + + pname = find_flat_dt_string(noff); + if (pname == NULL) { + printk("Can't find property name in list !\n"); + break; + } + l = strlen(pname) + 1; + pp = unflatten_dt_alloc(&mem, sizeof(struct property), + __alignof__(struct property)); + if (allnextpp) { + if (strcmp(pname, "linux,phandle") == 0) { + np->node = *((u32 *)*p); + if (np->linux_phandle == 0) + np->linux_phandle = np->node; + } + if (strcmp(pname, "ibm,phandle") == 0) + np->linux_phandle = *((u32 *)*p); + pp->name = pname; + pp->length = sz; + pp->value = (void *)*p; + *prev_pp = pp; + prev_pp = &pp->next; + } + *p = _ALIGN((*p) + sz, 4); + } + if (allnextpp) { + *prev_pp = NULL; + np->name = get_property(np, "name", NULL); + np->type = get_property(np, "device_type", NULL); + + if (!np->name) + np->name = ""; + if (!np->type) + np->type = ""; + } + while (tag == OF_DT_BEGIN_NODE) { + mem = unflatten_dt_node(mem, p, np, allnextpp); + tag = *((u32 *)(*p)); + } + if (tag != OF_DT_END_NODE) { + printk("Weird tag at start of node: %x\n", tag); + return mem; + } + *p += 4; + return mem; +} + + +/** + * unflattens the device-tree passed by the firmware, creating the + * tree of struct device_node. It also fills the "name" and "type" + * pointers of the nodes so the normal device-tree walking functions + * can be used (this used to be done by finish_device_tree) + */ +void __init unflatten_device_tree(void) +{ + unsigned long start, mem, size; + struct device_node **allnextp = &allnodes; + char *p; + int l = 0; + + DBG(" -> unflatten_device_tree()\n"); + + /* First pass, scan for size */ + start = ((unsigned long)initial_boot_params) + + initial_boot_params->off_dt_struct; + size = unflatten_dt_node(0, &start, NULL, NULL); + + DBG(" size is %lx, allocating...\n", size); + + /* Allocate memory for the expanded device tree */ + mem = (unsigned long)abs_to_virt(lmb_alloc(size, + __alignof__(struct device_node))); + DBG(" unflattening...\n", mem); + + /* Second pass, do actual unflattening */ + start = ((unsigned long)initial_boot_params) + + initial_boot_params->off_dt_struct; + unflatten_dt_node(mem, &start, NULL, &allnextp); + if (*((u32 *)start) != OF_DT_END) + printk(KERN_WARNING "Weird tag at end of tree: %x\n", *((u32 *)start)); + *allnextp = NULL; + + /* Get pointer to OF "/chosen" node for use everywhere */ + of_chosen = of_find_node_by_path("/chosen"); + + /* Retreive command line */ + if (of_chosen != NULL) { + p = (char *)get_property(of_chosen, "bootargs", &l); + if (p != NULL && l > 0) + strlcpy(cmd_line, p, min(l, COMMAND_LINE_SIZE)); + } +#ifdef CONFIG_CMDLINE + if (l == 0 || (l == 1 && (*p) == 0)) + strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); +#endif /* CONFIG_CMDLINE */ + + DBG("Command line is: %s\n", cmd_line); + + DBG(" <- unflatten_device_tree()\n"); +} + + +static int __init early_init_dt_scan_cpus(unsigned long node, + const char *full_path, void *data) +{ + char *type = get_flat_dt_prop(node, "device_type", NULL); + + /* We are scanning "cpu" nodes only */ + if (type == NULL || strcmp(type, "cpu") != 0) + return 0; + + /* On LPAR, look for the first ibm,pft-size property for the hash table size + */ + if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) { + u32 *pft_size; + pft_size = (u32 *)get_flat_dt_prop(node, "ibm,pft-size", NULL); + if (pft_size != NULL) { + /* pft_size[0] is the NUMA CEC cookie */ + ppc64_pft_size = pft_size[1]; + } + } + + if (initial_boot_params && initial_boot_params->version >= 2) { + /* version 2 of the kexec param format adds the phys cpuid + * of booted proc. + */ + boot_cpuid_phys = initial_boot_params->boot_cpuid_phys; + boot_cpuid = 0; + } else { + /* Check if it's the boot-cpu, set it's hw index in paca now */ + if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) { + u32 *prop = get_flat_dt_prop(node, "reg", NULL); + set_hard_smp_processor_id(0, prop == NULL ? 0 : *prop); + boot_cpuid_phys = get_hard_smp_processor_id(0); + } + } + + return 0; +} + +static int __init early_init_dt_scan_chosen(unsigned long node, + const char *full_path, void *data) +{ + u32 *prop; + u64 *prop64; + extern unsigned long memory_limit, tce_alloc_start, tce_alloc_end; + + if (strcmp(full_path, "/chosen") != 0) + return 0; + + /* get platform type */ + prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL); + if (prop == NULL) + return 0; + systemcfg->platform = *prop; + + /* check if iommu is forced on or off */ + if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) + iommu_is_off = 1; + if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) + iommu_force_on = 1; + + prop64 = (u64*)get_flat_dt_prop(node, "linux,memory-limit", NULL); + if (prop64) + memory_limit = *prop64; + + prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); + if (prop64) + tce_alloc_start = *prop64; + + prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); + if (prop64) + tce_alloc_end = *prop64; + +#ifdef CONFIG_PPC_RTAS + /* To help early debugging via the front panel, we retreive a minimal + * set of RTAS infos now if available + */ + { + u64 *basep, *entryp; + + basep = (u64*)get_flat_dt_prop(node, "linux,rtas-base", NULL); + entryp = (u64*)get_flat_dt_prop(node, "linux,rtas-entry", NULL); + prop = (u32*)get_flat_dt_prop(node, "linux,rtas-size", NULL); + if (basep && entryp && prop) { + rtas.base = *basep; + rtas.entry = *entryp; + rtas.size = *prop; + } + } +#endif /* CONFIG_PPC_RTAS */ + + /* break now */ + return 1; +} + +static int __init early_init_dt_scan_root(unsigned long node, + const char *full_path, void *data) +{ + u32 *prop; + + if (strcmp(full_path, "/") != 0) + return 0; + + prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL); + dt_root_size_cells = (prop == NULL) ? 1 : *prop; + + prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL); + dt_root_addr_cells = (prop == NULL) ? 2 : *prop; + + /* break now */ + return 1; +} + +static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp) +{ + cell_t *p = *cellp; + unsigned long r = 0; + + /* Ignore more than 2 cells */ + while (s > 2) { + p++; + s--; + } + while (s) { + r <<= 32; + r |= *(p++); + s--; + } + + *cellp = p; + return r; +} + + +static int __init early_init_dt_scan_memory(unsigned long node, + const char *full_path, void *data) +{ + char *type = get_flat_dt_prop(node, "device_type", NULL); + cell_t *reg, *endp; + unsigned long l; + + /* We are scanning "memory" nodes only */ + if (type == NULL || strcmp(type, "memory") != 0) + return 0; + + reg = (cell_t *)get_flat_dt_prop(node, "reg", &l); + if (reg == NULL) + return 0; + + endp = reg + (l / sizeof(cell_t)); + + DBG("memory scan node %s ...\n", full_path); + while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { + unsigned long base, size; + + base = dt_mem_next_cell(dt_root_addr_cells, ®); + size = dt_mem_next_cell(dt_root_size_cells, ®); + + if (size == 0) + continue; + DBG(" - %lx , %lx\n", base, size); + if (iommu_is_off) { + if (base >= 0x80000000ul) + continue; + if ((base + size) > 0x80000000ul) + size = 0x80000000ul - base; + } + lmb_add(base, size); + } + return 0; +} + +static void __init early_reserve_mem(void) +{ + u64 base, size; + u64 *reserve_map = (u64 *)(((unsigned long)initial_boot_params) + + initial_boot_params->off_mem_rsvmap); + while (1) { + base = *(reserve_map++); + size = *(reserve_map++); + if (size == 0) + break; + DBG("reserving: %lx -> %lx\n", base, size); + lmb_reserve(base, size); + } + +#if 0 + DBG("memory reserved, lmbs :\n"); + lmb_dump_all(); +#endif +} + +void __init early_init_devtree(void *params) +{ + DBG(" -> early_init_devtree()\n"); + + /* Setup flat device-tree pointer */ + initial_boot_params = params; + + /* By default, hash size is not set */ + ppc64_pft_size = 0; + + /* Retreive various informations from the /chosen node of the + * device-tree, including the platform type, initrd location and + * size, TCE reserve, and more ... + */ + scan_flat_dt(early_init_dt_scan_chosen, NULL); + + /* Scan memory nodes and rebuild LMBs */ + lmb_init(); + scan_flat_dt(early_init_dt_scan_root, NULL); + scan_flat_dt(early_init_dt_scan_memory, NULL); + lmb_enforce_memory_limit(); + lmb_analyze(); + systemcfg->physicalMemorySize = lmb_phys_mem_size(); + lmb_reserve(0, __pa(klimit)); + + DBG("Phys. mem: %lx\n", systemcfg->physicalMemorySize); + + /* Reserve LMB regions used by kernel, initrd, dt, etc... */ + early_reserve_mem(); + + DBG("Scanning CPUs ...\n"); + + /* Retreive hash table size from flattened tree */ + scan_flat_dt(early_init_dt_scan_cpus, NULL); + + /* If hash size wasn't obtained above, we calculate it now based on + * the total RAM size + */ + if (ppc64_pft_size == 0) { + unsigned long rnd_mem_size, pteg_count; + + /* round mem_size up to next power of 2 */ + rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize); + if (rnd_mem_size < systemcfg->physicalMemorySize) + rnd_mem_size <<= 1; + + /* # pages / 2 */ + pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11); + + ppc64_pft_size = __ilog2(pteg_count << 7); + } + + DBG("Hash pftSize: %x\n", (int)ppc64_pft_size); + DBG(" <- early_init_devtree()\n"); +} + +#undef printk + +int +prom_n_addr_cells(struct device_node* np) +{ + int* ip; + do { + if (np->parent) + np = np->parent; + ip = (int *) get_property(np, "#address-cells", NULL); + if (ip != NULL) + return *ip; + } while (np->parent); + /* No #address-cells property for the root node, default to 1 */ + return 1; +} + +int +prom_n_size_cells(struct device_node* np) +{ + int* ip; + do { + if (np->parent) + np = np->parent; + ip = (int *) get_property(np, "#size-cells", NULL); + if (ip != NULL) + return *ip; + } while (np->parent); + /* No #size-cells property for the root node, default to 1 */ + return 1; +} + +/** + * Work out the sense (active-low level / active-high edge) + * of each interrupt from the device tree. + */ +void __init prom_get_irq_senses(unsigned char *senses, int off, int max) +{ + struct device_node *np; + int i, j; + + /* default to level-triggered */ + memset(senses, 1, max - off); + + for (np = allnodes; np != 0; np = np->allnext) { + for (j = 0; j < np->n_intrs; j++) { + i = np->intrs[j].line; + if (i >= off && i < max) + senses[i-off] = np->intrs[j].sense ? + IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE : + IRQ_SENSE_EDGE | IRQ_POLARITY_POSITIVE; + } + } +} + +/** + * Construct and return a list of the device_nodes with a given name. + */ +struct device_node * +find_devices(const char *name) +{ + struct device_node *head, **prevp, *np; + + prevp = &head; + for (np = allnodes; np != 0; np = np->allnext) { + if (np->name != 0 && strcasecmp(np->name, name) == 0) { + *prevp = np; + prevp = &np->next; + } + } + *prevp = NULL; + return head; +} +EXPORT_SYMBOL(find_devices); + +/** + * Construct and return a list of the device_nodes with a given type. + */ +struct device_node * +find_type_devices(const char *type) +{ + struct device_node *head, **prevp, *np; + + prevp = &head; + for (np = allnodes; np != 0; np = np->allnext) { + if (np->type != 0 && strcasecmp(np->type, type) == 0) { + *prevp = np; + prevp = &np->next; + } + } + *prevp = NULL; + return head; +} +EXPORT_SYMBOL(find_type_devices); + +/** + * Returns all nodes linked together + */ +struct device_node * +find_all_nodes(void) +{ + struct device_node *head, **prevp, *np; + + prevp = &head; + for (np = allnodes; np != 0; np = np->allnext) { + *prevp = np; + prevp = &np->next; + } + *prevp = NULL; + return head; +} +EXPORT_SYMBOL(find_all_nodes); + +/** Checks if the given "compat" string matches one of the strings in + * the device's "compatible" property + */ +int +device_is_compatible(struct device_node *device, const char *compat) +{ + const char* cp; + int cplen, l; + + cp = (char *) get_property(device, "compatible", &cplen); + if (cp == NULL) + return 0; + while (cplen > 0) { + if (strncasecmp(cp, compat, strlen(compat)) == 0) + return 1; + l = strlen(cp) + 1; + cp += l; + cplen -= l; + } + + return 0; +} +EXPORT_SYMBOL(device_is_compatible); + + +/** + * Indicates whether the root node has a given value in its + * compatible property. + */ +int +machine_is_compatible(const char *compat) +{ + struct device_node *root; + int rc = 0; + + root = of_find_node_by_path("/"); + if (root) { + rc = device_is_compatible(root, compat); + of_node_put(root); + } + return rc; +} +EXPORT_SYMBOL(machine_is_compatible); + +/** + * Construct and return a list of the device_nodes with a given type + * and compatible property. + */ +struct device_node * +find_compatible_devices(const char *type, const char *compat) +{ + struct device_node *head, **prevp, *np; + + prevp = &head; + for (np = allnodes; np != 0; np = np->allnext) { + if (type != NULL + && !(np->type != 0 && strcasecmp(np->type, type) == 0)) + continue; + if (device_is_compatible(np, compat)) { + *prevp = np; + prevp = &np->next; + } + } + *prevp = NULL; + return head; +} +EXPORT_SYMBOL(find_compatible_devices); + +/** + * Find the device_node with a given full_name. + */ +struct device_node * +find_path_device(const char *path) +{ + struct device_node *np; + + for (np = allnodes; np != 0; np = np->allnext) + if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0) + return np; + return NULL; +} +EXPORT_SYMBOL(find_path_device); + +/******* + * + * New implementation of the OF "find" APIs, return a refcounted + * object, call of_node_put() when done. The device tree and list + * are protected by a rw_lock. + * + * Note that property management will need some locking as well, + * this isn't dealt with yet. + * + *******/ + +/** + * of_find_node_by_name - Find a node by its "name" property + * @from: The node to start searching from or NULL, the node + * you pass will not be searched, only the next one + * will; typically, you pass what the previous call + * returned. of_node_put() will be called on it + * @name: The name string to match against + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_node_by_name(struct device_node *from, + const char *name) +{ + struct device_node *np; + + read_lock(&devtree_lock); + np = from ? from->allnext : allnodes; + for (; np != 0; np = np->allnext) + if (np->name != 0 && strcasecmp(np->name, name) == 0 + && of_node_get(np)) + break; + if (from) + of_node_put(from); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_node_by_name); + +/** + * of_find_node_by_type - Find a node by its "device_type" property + * @from: The node to start searching from or NULL, the node + * you pass will not be searched, only the next one + * will; typically, you pass what the previous call + * returned. of_node_put() will be called on it + * @name: The type string to match against + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_node_by_type(struct device_node *from, + const char *type) +{ + struct device_node *np; + + read_lock(&devtree_lock); + np = from ? from->allnext : allnodes; + for (; np != 0; np = np->allnext) + if (np->type != 0 && strcasecmp(np->type, type) == 0 + && of_node_get(np)) + break; + if (from) + of_node_put(from); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_node_by_type); + +/** + * of_find_compatible_node - Find a node based on type and one of the + * tokens in its "compatible" property + * @from: The node to start searching from or NULL, the node + * you pass will not be searched, only the next one + * will; typically, you pass what the previous call + * returned. of_node_put() will be called on it + * @type: The type string to match "device_type" or NULL to ignore + * @compatible: The string to match to one of the tokens in the device + * "compatible" list. + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_compatible_node(struct device_node *from, + const char *type, const char *compatible) +{ + struct device_node *np; + + read_lock(&devtree_lock); + np = from ? from->allnext : allnodes; + for (; np != 0; np = np->allnext) { + if (type != NULL + && !(np->type != 0 && strcasecmp(np->type, type) == 0)) + continue; + if (device_is_compatible(np, compatible) && of_node_get(np)) + break; + } + if (from) + of_node_put(from); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_compatible_node); + +/** + * of_find_node_by_path - Find a node matching a full OF path + * @path: The full path to match + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_node_by_path(const char *path) +{ + struct device_node *np = allnodes; + + read_lock(&devtree_lock); + for (; np != 0; np = np->allnext) + if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0 + && of_node_get(np)) + break; + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_node_by_path); + +/** + * of_find_node_by_phandle - Find a node given a phandle + * @handle: phandle of the node to find + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_node_by_phandle(phandle handle) +{ + struct device_node *np; + + read_lock(&devtree_lock); + for (np = allnodes; np != 0; np = np->allnext) + if (np->linux_phandle == handle) + break; + if (np) + of_node_get(np); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_node_by_phandle); + +/** + * of_find_all_nodes - Get next node in global list + * @prev: Previous node or NULL to start iteration + * of_node_put() will be called on it + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_all_nodes(struct device_node *prev) +{ + struct device_node *np; + + read_lock(&devtree_lock); + np = prev ? prev->allnext : allnodes; + for (; np != 0; np = np->allnext) + if (of_node_get(np)) + break; + if (prev) + of_node_put(prev); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_all_nodes); + +/** + * of_get_parent - Get a node's parent if any + * @node: Node to get parent + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_get_parent(const struct device_node *node) +{ + struct device_node *np; + + if (!node) + return NULL; + + read_lock(&devtree_lock); + np = of_node_get(node->parent); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_get_parent); + +/** + * of_get_next_child - Iterate a node childs + * @node: parent node + * @prev: previous child of the parent node, or NULL to get first + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_get_next_child(const struct device_node *node, + struct device_node *prev) +{ + struct device_node *next; + + read_lock(&devtree_lock); + next = prev ? prev->sibling : node->child; + for (; next != 0; next = next->sibling) + if (of_node_get(next)) + break; + if (prev) + of_node_put(prev); + read_unlock(&devtree_lock); + return next; +} +EXPORT_SYMBOL(of_get_next_child); + +/** + * of_node_get - Increment refcount of a node + * @node: Node to inc refcount, NULL is supported to + * simplify writing of callers + * + * Returns node. + */ +struct device_node *of_node_get(struct device_node *node) +{ + if (node) + kref_get(&node->kref); + return node; +} +EXPORT_SYMBOL(of_node_get); + +static inline struct device_node * kref_to_device_node(struct kref *kref) +{ + return container_of(kref, struct device_node, kref); +} + +/** + * of_node_release - release a dynamically allocated node + * @kref: kref element of the node to be released + * + * In of_node_put() this function is passed to kref_put() + * as the destructor. + */ +static void of_node_release(struct kref *kref) +{ + struct device_node *node = kref_to_device_node(kref); + struct property *prop = node->properties; + + if (!OF_IS_DYNAMIC(node)) + return; + while (prop) { + struct property *next = prop->next; + kfree(prop->name); + kfree(prop->value); + kfree(prop); + prop = next; + } + kfree(node->intrs); + kfree(node->addrs); + kfree(node->full_name); + kfree(node); +} + +/** + * of_node_put - Decrement refcount of a node + * @node: Node to dec refcount, NULL is supported to + * simplify writing of callers + * + */ +void of_node_put(struct device_node *node) +{ + if (node) + kref_put(&node->kref, of_node_release); +} +EXPORT_SYMBOL(of_node_put); + +/* + * Fix up the uninitialized fields in a new device node: + * name, type, n_addrs, addrs, n_intrs, intrs, and pci-specific fields + * + * A lot of boot-time code is duplicated here, because functions such + * as finish_node_interrupts, interpret_pci_props, etc. cannot use the + * slab allocator. + * + * This should probably be split up into smaller chunks. + */ + +static int of_finish_dynamic_node(struct device_node *node, + unsigned long *unused1, int unused2, + int unused3, int unused4) +{ + struct device_node *parent = of_get_parent(node); + int err = 0; + phandle *ibm_phandle; + + node->name = get_property(node, "name", NULL); + node->type = get_property(node, "device_type", NULL); + + if (!parent) { + err = -ENODEV; + goto out; + } + + /* We don't support that function on PowerMac, at least + * not yet + */ + if (systemcfg->platform == PLATFORM_POWERMAC) + return -ENODEV; + + /* fix up new node's linux_phandle field */ + if ((ibm_phandle = (unsigned int *)get_property(node, "ibm,phandle", NULL))) + node->linux_phandle = *ibm_phandle; + +out: + of_node_put(parent); + return err; +} + +/* + * Plug a device node into the tree and global list. + */ +void of_attach_node(struct device_node *np) +{ + write_lock(&devtree_lock); + np->sibling = np->parent->child; + np->allnext = allnodes; + np->parent->child = np; + allnodes = np; + write_unlock(&devtree_lock); +} + +/* + * "Unplug" a node from the device tree. The caller must hold + * a reference to the node. The memory associated with the node + * is not freed until its refcount goes to zero. + */ +void of_detach_node(const struct device_node *np) +{ + struct device_node *parent; + + write_lock(&devtree_lock); + + parent = np->parent; + + if (allnodes == np) + allnodes = np->allnext; + else { + struct device_node *prev; + for (prev = allnodes; + prev->allnext != np; + prev = prev->allnext) + ; + prev->allnext = np->allnext; + } + + if (parent->child == np) + parent->child = np->sibling; + else { + struct device_node *prevsib; + for (prevsib = np->parent->child; + prevsib->sibling != np; + prevsib = prevsib->sibling) + ; + prevsib->sibling = np->sibling; + } + + write_unlock(&devtree_lock); +} + +static int prom_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) +{ + int err; + + switch (action) { + case PSERIES_RECONFIG_ADD: + err = finish_node(node, NULL, of_finish_dynamic_node, 0, 0, 0); + if (err < 0) { + printk(KERN_ERR "finish_node returned %d\n", err); + err = NOTIFY_BAD; + } + break; + default: + err = NOTIFY_DONE; + break; + } + return err; +} + +static struct notifier_block prom_reconfig_nb = { + .notifier_call = prom_reconfig_notifier, + .priority = 10, /* This one needs to run first */ +}; + +static int __init prom_reconfig_setup(void) +{ + return pSeries_reconfig_notifier_register(&prom_reconfig_nb); +} +__initcall(prom_reconfig_setup); + +/* + * Find a property with a given name for a given node + * and return the value. + */ +unsigned char * +get_property(struct device_node *np, const char *name, int *lenp) +{ + struct property *pp; + + for (pp = np->properties; pp != 0; pp = pp->next) + if (strcmp(pp->name, name) == 0) { + if (lenp != 0) + *lenp = pp->length; + return pp->value; + } + return NULL; +} +EXPORT_SYMBOL(get_property); + +/* + * Add a property to a node + */ +void +prom_add_property(struct device_node* np, struct property* prop) +{ + struct property **next = &np->properties; + + prop->next = NULL; + while (*next) + next = &(*next)->next; + *next = prop; +} + +#if 0 +void +print_properties(struct device_node *np) +{ + struct property *pp; + char *cp; + int i, n; + + for (pp = np->properties; pp != 0; pp = pp->next) { + printk(KERN_INFO "%s", pp->name); + for (i = strlen(pp->name); i < 16; ++i) + printk(" "); + cp = (char *) pp->value; + for (i = pp->length; i > 0; --i, ++cp) + if ((i > 1 && (*cp < 0x20 || *cp > 0x7e)) + || (i == 1 && *cp != 0)) + break; + if (i == 0 && pp->length > 1) { + /* looks like a string */ + printk(" %s\n", (char *) pp->value); + } else { + /* dump it in hex */ + n = pp->length; + if (n > 64) + n = 64; + if (pp->length % 4 == 0) { + unsigned int *p = (unsigned int *) pp->value; + + n /= 4; + for (i = 0; i < n; ++i) { + if (i != 0 && (i % 4) == 0) + printk("\n "); + printk(" %08x", *p++); + } + } else { + unsigned char *bp = pp->value; + + for (i = 0; i < n; ++i) { + if (i != 0 && (i % 16) == 0) + printk("\n "); + printk(" %02x", *bp++); + } + } + printk("\n"); + if (pp->length > 64) + printk(" ... (length = %d)\n", + pp->length); + } + } +} +#endif + + + + + + + + + + diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c new file mode 100644 index 00000000000..8dffa9ae262 --- /dev/null +++ b/arch/ppc64/kernel/prom_init.c @@ -0,0 +1,1838 @@ +/* + * + * + * Procedures for interfacing to Open Firmware. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. + * {engebret|bergner}@us.ibm.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG_PROM + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_LOGO_LINUX_CLUT224 +#include +extern const struct linux_logo logo_linux_clut224; +#endif + +/* + * Properties whose value is longer than this get excluded from our + * copy of the device tree. This value does need to be big enough to + * ensure that we don't lose things like the interrupt-map property + * on a PCI-PCI bridge. + */ +#define MAX_PROPERTY_LENGTH (1UL * 1024 * 1024) + +/* + * Eventually bump that one up + */ +#define DEVTREE_CHUNK_SIZE 0x100000 + +/* + * This is the size of the local memory reserve map that gets copied + * into the boot params passed to the kernel. That size is totally + * flexible as the kernel just reads the list until it encounters an + * entry with size 0, so it can be changed without breaking binary + * compatibility + */ +#define MEM_RESERVE_MAP_SIZE 8 + +/* + * prom_init() is called very early on, before the kernel text + * and data have been mapped to KERNELBASE. At this point the code + * is running at whatever address it has been loaded at, so + * references to extern and static variables must be relocated + * explicitly. The procedure reloc_offset() returns the address + * we're currently running at minus the address we were linked at. + * (Note that strings count as static variables.) + * + * Because OF may have mapped I/O devices into the area starting at + * KERNELBASE, particularly on CHRP machines, we can't safely call + * OF once the kernel has been mapped to KERNELBASE. Therefore all + * OF calls should be done within prom_init(), and prom_init() + * and all routines called within it must be careful to relocate + * references as necessary. + * + * Note that the bss is cleared *after* prom_init runs, so we have + * to make sure that any static or extern variables it accesses + * are put in the data segment. + */ + + +#define PROM_BUG() do { \ + prom_printf("kernel BUG at %s line 0x%x!\n", \ + RELOC(__FILE__), __LINE__); \ + __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \ +} while (0) + +#ifdef DEBUG_PROM +#define prom_debug(x...) prom_printf(x) +#else +#define prom_debug(x...) +#endif + + +typedef u32 prom_arg_t; + +struct prom_args { + u32 service; + u32 nargs; + u32 nret; + prom_arg_t args[10]; + prom_arg_t *rets; /* Pointer to return values in args[16]. */ +}; + +struct prom_t { + unsigned long entry; + ihandle root; + ihandle chosen; + int cpu; + ihandle stdout; + ihandle disp_node; + struct prom_args args; + unsigned long version; + unsigned long root_size_cells; + unsigned long root_addr_cells; +}; + +struct pci_reg_property { + struct pci_address addr; + u32 size_hi; + u32 size_lo; +}; + +struct mem_map_entry { + u64 base; + u64 size; +}; + +typedef u32 cell_t; + +extern void __start(unsigned long r3, unsigned long r4, unsigned long r5); + +extern void enter_prom(struct prom_args *args, unsigned long entry); +extern void copy_and_flush(unsigned long dest, unsigned long src, + unsigned long size, unsigned long offset); + +extern unsigned long klimit; + +/* prom structure */ +static struct prom_t __initdata prom; + +#define PROM_SCRATCH_SIZE 256 + +static char __initdata of_stdout_device[256]; +static char __initdata prom_scratch[PROM_SCRATCH_SIZE]; + +static unsigned long __initdata dt_header_start; +static unsigned long __initdata dt_struct_start, dt_struct_end; +static unsigned long __initdata dt_string_start, dt_string_end; + +static unsigned long __initdata prom_initrd_start, prom_initrd_end; + +static int __initdata iommu_force_on; +static int __initdata ppc64_iommu_off; +static int __initdata of_platform; + +static char __initdata prom_cmd_line[COMMAND_LINE_SIZE]; + +static unsigned long __initdata prom_memory_limit; +static unsigned long __initdata prom_tce_alloc_start; +static unsigned long __initdata prom_tce_alloc_end; + +static unsigned long __initdata alloc_top; +static unsigned long __initdata alloc_top_high; +static unsigned long __initdata alloc_bottom; +static unsigned long __initdata rmo_top; +static unsigned long __initdata ram_top; + +static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE]; +static int __initdata mem_reserve_cnt; + +static cell_t __initdata regbuf[1024]; + + +#define MAX_CPU_THREADS 2 + +/* TO GO */ +#ifdef CONFIG_HMT +struct { + unsigned int pir; + unsigned int threadid; +} hmt_thread_data[NR_CPUS]; +#endif /* CONFIG_HMT */ + +/* + * This are used in calls to call_prom. The 4th and following + * arguments to call_prom should be 32-bit values. 64 bit values + * are truncated to 32 bits (and fortunately don't get interpreted + * as two arguments). + */ +#define ADDR(x) (u32) ((unsigned long)(x) - offset) + +/* This is the one and *ONLY* place where we actually call open + * firmware from, since we need to make sure we're running in 32b + * mode when we do. We switch back to 64b mode upon return. + */ + +#define PROM_ERROR (-1) + +static int __init call_prom(const char *service, int nargs, int nret, ...) +{ + int i; + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + va_list list; + + _prom->args.service = ADDR(service); + _prom->args.nargs = nargs; + _prom->args.nret = nret; + _prom->args.rets = (prom_arg_t *)&(_prom->args.args[nargs]); + + va_start(list, nret); + for (i=0; i < nargs; i++) + _prom->args.args[i] = va_arg(list, prom_arg_t); + va_end(list); + + for (i=0; i < nret ;i++) + _prom->args.rets[i] = 0; + + enter_prom(&_prom->args, _prom->entry); + + return (nret > 0) ? _prom->args.rets[0] : 0; +} + + +static unsigned int __init prom_claim(unsigned long virt, unsigned long size, + unsigned long align) +{ + return (unsigned int)call_prom("claim", 3, 1, + (prom_arg_t)virt, (prom_arg_t)size, + (prom_arg_t)align); +} + +static void __init prom_print(const char *msg) +{ + const char *p, *q; + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + + if (_prom->stdout == 0) + return; + + for (p = msg; *p != 0; p = q) { + for (q = p; *q != 0 && *q != '\n'; ++q) + ; + if (q > p) + call_prom("write", 3, 1, _prom->stdout, p, q - p); + if (*q == 0) + break; + ++q; + call_prom("write", 3, 1, _prom->stdout, ADDR("\r\n"), 2); + } +} + + +static void __init prom_print_hex(unsigned long val) +{ + unsigned long offset = reloc_offset(); + int i, nibbles = sizeof(val)*2; + char buf[sizeof(val)*2+1]; + struct prom_t *_prom = PTRRELOC(&prom); + + for (i = nibbles-1; i >= 0; i--) { + buf[i] = (val & 0xf) + '0'; + if (buf[i] > '9') + buf[i] += ('a'-'0'-10); + val >>= 4; + } + buf[nibbles] = '\0'; + call_prom("write", 3, 1, _prom->stdout, buf, nibbles); +} + + +static void __init prom_printf(const char *format, ...) +{ + unsigned long offset = reloc_offset(); + const char *p, *q, *s; + va_list args; + unsigned long v; + struct prom_t *_prom = PTRRELOC(&prom); + + va_start(args, format); + for (p = PTRRELOC(format); *p != 0; p = q) { + for (q = p; *q != 0 && *q != '\n' && *q != '%'; ++q) + ; + if (q > p) + call_prom("write", 3, 1, _prom->stdout, p, q - p); + if (*q == 0) + break; + if (*q == '\n') { + ++q; + call_prom("write", 3, 1, _prom->stdout, + ADDR("\r\n"), 2); + continue; + } + ++q; + if (*q == 0) + break; + switch (*q) { + case 's': + ++q; + s = va_arg(args, const char *); + prom_print(s); + break; + case 'x': + ++q; + v = va_arg(args, unsigned long); + prom_print_hex(v); + break; + } + } +} + + +static void __init __attribute__((noreturn)) prom_panic(const char *reason) +{ + unsigned long offset = reloc_offset(); + + prom_print(PTRRELOC(reason)); + /* ToDo: should put up an SRC here */ + call_prom("exit", 0, 0); + + for (;;) /* should never get here */ + ; +} + + +static int __init prom_next_node(phandle *nodep) +{ + phandle node; + + if ((node = *nodep) != 0 + && (*nodep = call_prom("child", 1, 1, node)) != 0) + return 1; + if ((*nodep = call_prom("peer", 1, 1, node)) != 0) + return 1; + for (;;) { + if ((node = call_prom("parent", 1, 1, node)) == 0) + return 0; + if ((*nodep = call_prom("peer", 1, 1, node)) != 0) + return 1; + } +} + +static int __init prom_getprop(phandle node, const char *pname, + void *value, size_t valuelen) +{ + unsigned long offset = reloc_offset(); + + return call_prom("getprop", 4, 1, node, ADDR(pname), + (u32)(unsigned long) value, (u32) valuelen); +} + +static int __init prom_getproplen(phandle node, const char *pname) +{ + unsigned long offset = reloc_offset(); + + return call_prom("getproplen", 2, 1, node, ADDR(pname)); +} + +static int __init prom_setprop(phandle node, const char *pname, + void *value, size_t valuelen) +{ + unsigned long offset = reloc_offset(); + + return call_prom("setprop", 4, 1, node, ADDR(pname), + (u32)(unsigned long) value, (u32) valuelen); +} + +/* We can't use the standard versions because of RELOC headaches. */ +#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ + || ('a' <= (c) && (c) <= 'f') \ + || ('A' <= (c) && (c) <= 'F')) + +#define isdigit(c) ('0' <= (c) && (c) <= '9') +#define islower(c) ('a' <= (c) && (c) <= 'z') +#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c)) + +unsigned long prom_strtoul(const char *cp, const char **endp) +{ + unsigned long result = 0, base = 10, value; + + if (*cp == '0') { + base = 8; + cp++; + if (toupper(*cp) == 'X') { + cp++; + base = 16; + } + } + + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) { + result = result * base + value; + cp++; + } + + if (endp) + *endp = cp; + + return result; +} + +unsigned long prom_memparse(const char *ptr, const char **retptr) +{ + unsigned long ret = prom_strtoul(ptr, retptr); + int shift = 0; + + /* + * We can't use a switch here because GCC *may* generate a + * jump table which won't work, because we're not running at + * the address we're linked at. + */ + if ('G' == **retptr || 'g' == **retptr) + shift = 30; + + if ('M' == **retptr || 'm' == **retptr) + shift = 20; + + if ('K' == **retptr || 'k' == **retptr) + shift = 10; + + if (shift) { + ret <<= shift; + (*retptr)++; + } + + return ret; +} + +/* + * Early parsing of the command line passed to the kernel, used for + * "mem=x" and the options that affect the iommu + */ +static void __init early_cmdline_parse(void) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + char *opt, *p; + int l = 0; + + RELOC(prom_cmd_line[0]) = 0; + p = RELOC(prom_cmd_line); + if ((long)_prom->chosen > 0) + l = prom_getprop(_prom->chosen, "bootargs", p, COMMAND_LINE_SIZE-1); +#ifdef CONFIG_CMDLINE + if (l == 0) /* dbl check */ + strlcpy(RELOC(prom_cmd_line), + RELOC(CONFIG_CMDLINE), sizeof(prom_cmd_line)); +#endif /* CONFIG_CMDLINE */ + prom_printf("command line: %s\n", RELOC(prom_cmd_line)); + + opt = strstr(RELOC(prom_cmd_line), RELOC("iommu=")); + if (opt) { + prom_printf("iommu opt is: %s\n", opt); + opt += 6; + while (*opt && *opt == ' ') + opt++; + if (!strncmp(opt, RELOC("off"), 3)) + RELOC(ppc64_iommu_off) = 1; + else if (!strncmp(opt, RELOC("force"), 5)) + RELOC(iommu_force_on) = 1; + } + + opt = strstr(RELOC(prom_cmd_line), RELOC("mem=")); + if (opt) { + opt += 4; + RELOC(prom_memory_limit) = prom_memparse(opt, (const char **)&opt); + /* Align to 16 MB == size of large page */ + RELOC(prom_memory_limit) = ALIGN(RELOC(prom_memory_limit), 0x1000000); + } +} + +/* + * Memory allocation strategy... our layout is normally: + * + * at 14Mb or more we vmlinux, then a gap and initrd. In some rare cases, initrd + * might end up beeing before the kernel though. We assume this won't override + * the final kernel at 0, we have no provision to handle that in this version, + * but it should hopefully never happen. + * + * alloc_top is set to the top of RMO, eventually shrink down if the TCEs overlap + * alloc_bottom is set to the top of kernel/initrd + * + * from there, allocations are done that way : rtas is allocated topmost, and + * the device-tree is allocated from the bottom. We try to grow the device-tree + * allocation as we progress. If we can't, then we fail, we don't currently have + * a facility to restart elsewhere, but that shouldn't be necessary neither + * + * Note that calls to reserve_mem have to be done explicitely, memory allocated + * with either alloc_up or alloc_down isn't automatically reserved. + */ + + +/* + * Allocates memory in the RMO upward from the kernel/initrd + * + * When align is 0, this is a special case, it means to allocate in place + * at the current location of alloc_bottom or fail (that is basically + * extending the previous allocation). Used for the device-tree flattening + */ +static unsigned long __init alloc_up(unsigned long size, unsigned long align) +{ + unsigned long offset = reloc_offset(); + unsigned long base = _ALIGN_UP(RELOC(alloc_bottom), align); + unsigned long addr = 0; + + prom_debug("alloc_up(%x, %x)\n", size, align); + if (RELOC(ram_top) == 0) + prom_panic("alloc_up() called with mem not initialized\n"); + + if (align) + base = _ALIGN_UP(RELOC(alloc_bottom), align); + else + base = RELOC(alloc_bottom); + + for(; (base + size) <= RELOC(alloc_top); + base = _ALIGN_UP(base + 0x100000, align)) { + prom_debug(" trying: 0x%x\n\r", base); + addr = (unsigned long)prom_claim(base, size, 0); + if ((int)addr != PROM_ERROR) + break; + addr = 0; + if (align == 0) + break; + } + if (addr == 0) + return 0; + RELOC(alloc_bottom) = addr; + + prom_debug(" -> %x\n", addr); + prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom)); + prom_debug(" alloc_top : %x\n", RELOC(alloc_top)); + prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); + prom_debug(" rmo_top : %x\n", RELOC(rmo_top)); + prom_debug(" ram_top : %x\n", RELOC(ram_top)); + + return addr; +} + +/* + * Allocates memory downard, either from top of RMO, or if highmem + * is set, from the top of RAM. Note that this one doesn't handle + * failures. In does claim memory if highmem is not set. + */ +static unsigned long __init alloc_down(unsigned long size, unsigned long align, + int highmem) +{ + unsigned long offset = reloc_offset(); + unsigned long base, addr = 0; + + prom_debug("alloc_down(%x, %x, %s)\n", size, align, + highmem ? RELOC("(high)") : RELOC("(low)")); + if (RELOC(ram_top) == 0) + prom_panic("alloc_down() called with mem not initialized\n"); + + if (highmem) { + /* Carve out storage for the TCE table. */ + addr = _ALIGN_DOWN(RELOC(alloc_top_high) - size, align); + if (addr <= RELOC(alloc_bottom)) + return 0; + else { + /* Will we bump into the RMO ? If yes, check out that we + * didn't overlap existing allocations there, if we did, + * we are dead, we must be the first in town ! + */ + if (addr < RELOC(rmo_top)) { + /* Good, we are first */ + if (RELOC(alloc_top) == RELOC(rmo_top)) + RELOC(alloc_top) = RELOC(rmo_top) = addr; + else + return 0; + } + RELOC(alloc_top_high) = addr; + } + goto bail; + } + + base = _ALIGN_DOWN(RELOC(alloc_top) - size, align); + for(; base > RELOC(alloc_bottom); base = _ALIGN_DOWN(base - 0x100000, align)) { + prom_debug(" trying: 0x%x\n\r", base); + addr = (unsigned long)prom_claim(base, size, 0); + if ((int)addr != PROM_ERROR) + break; + addr = 0; + } + if (addr == 0) + return 0; + RELOC(alloc_top) = addr; + + bail: + prom_debug(" -> %x\n", addr); + prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom)); + prom_debug(" alloc_top : %x\n", RELOC(alloc_top)); + prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); + prom_debug(" rmo_top : %x\n", RELOC(rmo_top)); + prom_debug(" ram_top : %x\n", RELOC(ram_top)); + + return addr; +} + +/* + * Parse a "reg" cell + */ +static unsigned long __init prom_next_cell(int s, cell_t **cellp) +{ + cell_t *p = *cellp; + unsigned long r = 0; + + /* Ignore more than 2 cells */ + while (s > 2) { + p++; + s--; + } + while (s) { + r <<= 32; + r |= *(p++); + s--; + } + + *cellp = p; + return r; +} + +/* + * Very dumb function for adding to the memory reserve list, but + * we don't need anything smarter at this point + * + * XXX Eventually check for collisions. They should NEVER happen + * if problems seem to show up, it would be a good start to track + * them down. + */ +static void reserve_mem(unsigned long base, unsigned long size) +{ + unsigned long offset = reloc_offset(); + unsigned long top = base + size; + unsigned long cnt = RELOC(mem_reserve_cnt); + + if (size == 0) + return; + + /* We need to always keep one empty entry so that we + * have our terminator with "size" set to 0 since we are + * dumb and just copy this entire array to the boot params + */ + base = _ALIGN_DOWN(base, PAGE_SIZE); + top = _ALIGN_UP(top, PAGE_SIZE); + size = top - base; + + if (cnt >= (MEM_RESERVE_MAP_SIZE - 1)) + prom_panic("Memory reserve map exhausted !\n"); + RELOC(mem_reserve_map)[cnt].base = base; + RELOC(mem_reserve_map)[cnt].size = size; + RELOC(mem_reserve_cnt) = cnt + 1; +} + +/* + * Initialize memory allocation mecanism, parse "memory" nodes and + * obtain that way the top of memory and RMO to setup out local allocator + */ +static void __init prom_init_mem(void) +{ + phandle node; + char *path, type[64]; + unsigned int plen; + cell_t *p, *endp; + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + + /* + * We iterate the memory nodes to find + * 1) top of RMO (first node) + * 2) top of memory + */ + prom_debug("root_addr_cells: %x\n", (long)_prom->root_addr_cells); + prom_debug("root_size_cells: %x\n", (long)_prom->root_size_cells); + + prom_debug("scanning memory:\n"); + path = RELOC(prom_scratch); + + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + prom_getprop(node, "device_type", type, sizeof(type)); + + if (strcmp(type, RELOC("memory"))) + continue; + + plen = prom_getprop(node, "reg", RELOC(regbuf), sizeof(regbuf)); + if (plen > sizeof(regbuf)) { + prom_printf("memory node too large for buffer !\n"); + plen = sizeof(regbuf); + } + p = RELOC(regbuf); + endp = p + (plen / sizeof(cell_t)); + +#ifdef DEBUG_PROM + memset(path, 0, PROM_SCRATCH_SIZE); + call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); + prom_debug(" node %s :\n", path); +#endif /* DEBUG_PROM */ + + while ((endp - p) >= (_prom->root_addr_cells + _prom->root_size_cells)) { + unsigned long base, size; + + base = prom_next_cell(_prom->root_addr_cells, &p); + size = prom_next_cell(_prom->root_size_cells, &p); + + if (size == 0) + continue; + prom_debug(" %x %x\n", base, size); + if (base == 0) + RELOC(rmo_top) = size; + if ((base + size) > RELOC(ram_top)) + RELOC(ram_top) = base + size; + } + } + + RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(klimit) - offset + 0x4000); + + /* Check if we have an initrd after the kernel, if we do move our bottom + * point to after it + */ + if (RELOC(prom_initrd_start)) { + if (RELOC(prom_initrd_end) > RELOC(alloc_bottom)) + RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end)); + } + + /* + * If prom_memory_limit is set we reduce the upper limits *except* for + * alloc_top_high. This must be the real top of RAM so we can put + * TCE's up there. + */ + + RELOC(alloc_top_high) = RELOC(ram_top); + + if (RELOC(prom_memory_limit)) { + if (RELOC(prom_memory_limit) <= RELOC(alloc_bottom)) { + prom_printf("Ignoring mem=%x <= alloc_bottom.\n", + RELOC(prom_memory_limit)); + RELOC(prom_memory_limit) = 0; + } else if (RELOC(prom_memory_limit) >= RELOC(ram_top)) { + prom_printf("Ignoring mem=%x >= ram_top.\n", + RELOC(prom_memory_limit)); + RELOC(prom_memory_limit) = 0; + } else { + RELOC(ram_top) = RELOC(prom_memory_limit); + RELOC(rmo_top) = min(RELOC(rmo_top), RELOC(prom_memory_limit)); + } + } + + /* + * Setup our top alloc point, that is top of RMO or top of + * segment 0 when running non-LPAR. + */ + if ( RELOC(of_platform) == PLATFORM_PSERIES_LPAR ) + RELOC(alloc_top) = RELOC(rmo_top); + else + RELOC(alloc_top) = RELOC(rmo_top) = min(0x40000000ul, RELOC(ram_top)); + + prom_printf("memory layout at init:\n"); + prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit)); + prom_printf(" alloc_bottom : %x\n", RELOC(alloc_bottom)); + prom_printf(" alloc_top : %x\n", RELOC(alloc_top)); + prom_printf(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); + prom_printf(" rmo_top : %x\n", RELOC(rmo_top)); + prom_printf(" ram_top : %x\n", RELOC(ram_top)); +} + + +/* + * Allocate room for and instanciate RTAS + */ +static void __init prom_instantiate_rtas(void) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + phandle prom_rtas, rtas_node; + u32 base, entry = 0; + u32 size = 0; + + prom_debug("prom_instantiate_rtas: start...\n"); + + prom_rtas = call_prom("finddevice", 1, 1, ADDR("/rtas")); + prom_debug("prom_rtas: %x\n", prom_rtas); + if (prom_rtas == (phandle) -1) + return; + + prom_getprop(prom_rtas, "rtas-size", &size, sizeof(size)); + if (size == 0) + return; + + base = alloc_down(size, PAGE_SIZE, 0); + if (base == 0) { + prom_printf("RTAS allocation failed !\n"); + return; + } + prom_printf("instantiating rtas at 0x%x", base); + + rtas_node = call_prom("open", 1, 1, ADDR("/rtas")); + prom_printf("..."); + + if (call_prom("call-method", 3, 2, + ADDR("instantiate-rtas"), + rtas_node, base) != PROM_ERROR) { + entry = (long)_prom->args.rets[1]; + } + if (entry == 0) { + prom_printf(" failed\n"); + return; + } + prom_printf(" done\n"); + + reserve_mem(base, size); + + prom_setprop(prom_rtas, "linux,rtas-base", &base, sizeof(base)); + prom_setprop(prom_rtas, "linux,rtas-entry", &entry, sizeof(entry)); + + prom_debug("rtas base = 0x%x\n", base); + prom_debug("rtas entry = 0x%x\n", entry); + prom_debug("rtas size = 0x%x\n", (long)size); + + prom_debug("prom_instantiate_rtas: end...\n"); +} + + +/* + * Allocate room for and initialize TCE tables + */ +static void __init prom_initialize_tce_table(void) +{ + phandle node; + ihandle phb_node; + unsigned long offset = reloc_offset(); + char compatible[64], type[64], model[64]; + char *path = RELOC(prom_scratch); + u64 base, align; + u32 minalign, minsize; + u64 tce_entry, *tce_entryp; + u64 local_alloc_top, local_alloc_bottom; + u64 i; + + if (RELOC(ppc64_iommu_off)) + return; + + prom_debug("starting prom_initialize_tce_table\n"); + + /* Cache current top of allocs so we reserve a single block */ + local_alloc_top = RELOC(alloc_top_high); + local_alloc_bottom = local_alloc_top; + + /* Search all nodes looking for PHBs. */ + for (node = 0; prom_next_node(&node); ) { + compatible[0] = 0; + type[0] = 0; + model[0] = 0; + prom_getprop(node, "compatible", + compatible, sizeof(compatible)); + prom_getprop(node, "device_type", type, sizeof(type)); + prom_getprop(node, "model", model, sizeof(model)); + + if ((type[0] == 0) || (strstr(type, RELOC("pci")) == NULL)) + continue; + + /* Keep the old logic in tack to avoid regression. */ + if (compatible[0] != 0) { + if ((strstr(compatible, RELOC("python")) == NULL) && + (strstr(compatible, RELOC("Speedwagon")) == NULL) && + (strstr(compatible, RELOC("Winnipeg")) == NULL)) + continue; + } else if (model[0] != 0) { + if ((strstr(model, RELOC("ython")) == NULL) && + (strstr(model, RELOC("peedwagon")) == NULL) && + (strstr(model, RELOC("innipeg")) == NULL)) + continue; + } + + if (prom_getprop(node, "tce-table-minalign", &minalign, + sizeof(minalign)) == PROM_ERROR) + minalign = 0; + if (prom_getprop(node, "tce-table-minsize", &minsize, + sizeof(minsize)) == PROM_ERROR) + minsize = 4UL << 20; + + /* + * Even though we read what OF wants, we just set the table + * size to 4 MB. This is enough to map 2GB of PCI DMA space. + * By doing this, we avoid the pitfalls of trying to DMA to + * MMIO space and the DMA alias hole. + * + * On POWER4, firmware sets the TCE region by assuming + * each TCE table is 8MB. Using this memory for anything + * else will impact performance, so we always allocate 8MB. + * Anton + */ + if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p)) + minsize = 8UL << 20; + else + minsize = 4UL << 20; + + /* Align to the greater of the align or size */ + align = max(minalign, minsize); + base = alloc_down(minsize, align, 1); + if (base == 0) + prom_panic("ERROR, cannot find space for TCE table.\n"); + if (base < local_alloc_bottom) + local_alloc_bottom = base; + + /* Save away the TCE table attributes for later use. */ + prom_setprop(node, "linux,tce-base", &base, sizeof(base)); + prom_setprop(node, "linux,tce-size", &minsize, sizeof(minsize)); + + /* It seems OF doesn't null-terminate the path :-( */ + memset(path, 0, sizeof(path)); + /* Call OF to setup the TCE hardware */ + if (call_prom("package-to-path", 3, 1, node, + path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) { + prom_printf("package-to-path failed\n"); + } + + prom_debug("TCE table: %s\n", path); + prom_debug("\tnode = 0x%x\n", node); + prom_debug("\tbase = 0x%x\n", base); + prom_debug("\tsize = 0x%x\n", minsize); + + /* Initialize the table to have a one-to-one mapping + * over the allocated size. + */ + tce_entryp = (unsigned long *)base; + for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) { + tce_entry = (i << PAGE_SHIFT); + tce_entry |= 0x3; + *tce_entryp = tce_entry; + } + + prom_printf("opening PHB %s", path); + phb_node = call_prom("open", 1, 1, path); + if ( (long)phb_node <= 0) + prom_printf("... failed\n"); + else + prom_printf("... done\n"); + + call_prom("call-method", 6, 0, ADDR("set-64-bit-addressing"), + phb_node, -1, minsize, + (u32) base, (u32) (base >> 32)); + call_prom("close", 1, 0, phb_node); + } + + reserve_mem(local_alloc_bottom, local_alloc_top - local_alloc_bottom); + + if (RELOC(prom_memory_limit)) { + /* + * We align the start to a 16MB boundary so we can map the TCE area + * using large pages if possible. The end should be the top of RAM + * so no need to align it. + */ + RELOC(prom_tce_alloc_start) = _ALIGN_DOWN(local_alloc_bottom, 0x1000000); + RELOC(prom_tce_alloc_end) = local_alloc_top; + } + + /* Flag the first invalid entry */ + prom_debug("ending prom_initialize_tce_table\n"); +} + +/* + * With CHRP SMP we need to use the OF to start the other + * processors so we can't wait until smp_boot_cpus (the OF is + * trashed by then) so we have to put the processors into + * a holding pattern controlled by the kernel (not OF) before + * we destroy the OF. + * + * This uses a chunk of low memory, puts some holding pattern + * code there and sends the other processors off to there until + * smp_boot_cpus tells them to do something. The holding pattern + * checks that address until its cpu # is there, when it is that + * cpu jumps to __secondary_start(). smp_boot_cpus() takes care + * of setting those values. + * + * We also use physical address 0x4 here to tell when a cpu + * is in its holding pattern code. + * + * Fixup comment... DRENG / PPPBBB - Peter + * + * -- Cort + */ +static void __init prom_hold_cpus(void) +{ + unsigned long i; + unsigned int reg; + phandle node; + unsigned long offset = reloc_offset(); + char type[64]; + int cpuid = 0; + unsigned int interrupt_server[MAX_CPU_THREADS]; + unsigned int cpu_threads, hw_cpu_num; + int propsize; + extern void __secondary_hold(void); + extern unsigned long __secondary_hold_spinloop; + extern unsigned long __secondary_hold_acknowledge; + unsigned long *spinloop + = (void *)virt_to_abs(&__secondary_hold_spinloop); + unsigned long *acknowledge + = (void *)virt_to_abs(&__secondary_hold_acknowledge); + unsigned long secondary_hold + = virt_to_abs(*PTRRELOC((unsigned long *)__secondary_hold)); + struct prom_t *_prom = PTRRELOC(&prom); + + prom_debug("prom_hold_cpus: start...\n"); + prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); + prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); + prom_debug(" 1) acknowledge = 0x%x\n", + (unsigned long)acknowledge); + prom_debug(" 1) *acknowledge = 0x%x\n", *acknowledge); + prom_debug(" 1) secondary_hold = 0x%x\n", secondary_hold); + + /* Set the common spinloop variable, so all of the secondary cpus + * will block when they are awakened from their OF spinloop. + * This must occur for both SMP and non SMP kernels, since OF will + * be trashed when we move the kernel. + */ + *spinloop = 0; + +#ifdef CONFIG_HMT + for (i=0; i < NR_CPUS; i++) { + RELOC(hmt_thread_data)[i].pir = 0xdeadbeef; + } +#endif + /* look for cpus */ + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + prom_getprop(node, "device_type", type, sizeof(type)); + if (strcmp(type, RELOC("cpu")) != 0) + continue; + + /* Skip non-configured cpus. */ + if (prom_getprop(node, "status", type, sizeof(type)) > 0) + if (strcmp(type, RELOC("okay")) != 0) + continue; + + reg = -1; + prom_getprop(node, "reg", ®, sizeof(reg)); + + prom_debug("\ncpuid = 0x%x\n", cpuid); + prom_debug("cpu hw idx = 0x%x\n", reg); + + /* Init the acknowledge var which will be reset by + * the secondary cpu when it awakens from its OF + * spinloop. + */ + *acknowledge = (unsigned long)-1; + + propsize = prom_getprop(node, "ibm,ppc-interrupt-server#s", + &interrupt_server, + sizeof(interrupt_server)); + if (propsize < 0) { + /* no property. old hardware has no SMT */ + cpu_threads = 1; + interrupt_server[0] = reg; /* fake it with phys id */ + } else { + /* We have a threaded processor */ + cpu_threads = propsize / sizeof(u32); + if (cpu_threads > MAX_CPU_THREADS) { + prom_printf("SMT: too many threads!\n" + "SMT: found %x, max is %x\n", + cpu_threads, MAX_CPU_THREADS); + cpu_threads = 1; /* ToDo: panic? */ + } + } + + hw_cpu_num = interrupt_server[0]; + if (hw_cpu_num != _prom->cpu) { + /* Primary Thread of non-boot cpu */ + prom_printf("%x : starting cpu hw idx %x... ", cpuid, reg); + call_prom("start-cpu", 3, 0, node, + secondary_hold, reg); + + for ( i = 0 ; (i < 100000000) && + (*acknowledge == ((unsigned long)-1)); i++ ) + mb(); + + if (*acknowledge == reg) { + prom_printf("done\n"); + /* We have to get every CPU out of OF, + * even if we never start it. */ + if (cpuid >= NR_CPUS) + goto next; + } else { + prom_printf("failed: %x\n", *acknowledge); + } + } +#ifdef CONFIG_SMP + else + prom_printf("%x : boot cpu %x\n", cpuid, reg); +#endif +next: +#ifdef CONFIG_SMP + /* Init paca for secondary threads. They start later. */ + for (i=1; i < cpu_threads; i++) { + cpuid++; + if (cpuid >= NR_CPUS) + continue; + } +#endif /* CONFIG_SMP */ + cpuid++; + } +#ifdef CONFIG_HMT + /* Only enable HMT on processors that provide support. */ + if (__is_processor(PV_PULSAR) || + __is_processor(PV_ICESTAR) || + __is_processor(PV_SSTAR)) { + prom_printf(" starting secondary threads\n"); + + for (i = 0; i < NR_CPUS; i += 2) { + if (!cpu_online(i)) + continue; + + if (i == 0) { + unsigned long pir = mfspr(SPRN_PIR); + if (__is_processor(PV_PULSAR)) { + RELOC(hmt_thread_data)[i].pir = + pir & 0x1f; + } else { + RELOC(hmt_thread_data)[i].pir = + pir & 0x3ff; + } + } + } + } else { + prom_printf("Processor is not HMT capable\n"); + } +#endif + + if (cpuid > NR_CPUS) + prom_printf("WARNING: maximum CPUs (" __stringify(NR_CPUS) + ") exceeded: ignoring extras\n"); + + prom_debug("prom_hold_cpus: end...\n"); +} + + +static void __init prom_init_client_services(unsigned long pp) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + + /* Get a handle to the prom entry point before anything else */ + _prom->entry = pp; + + /* Init default value for phys size */ + _prom->root_size_cells = 1; + _prom->root_addr_cells = 2; + + /* get a handle for the stdout device */ + _prom->chosen = call_prom("finddevice", 1, 1, ADDR("/chosen")); + if ((long)_prom->chosen <= 0) + prom_panic("cannot find chosen"); /* msg won't be printed :( */ + + /* get device tree root */ + _prom->root = call_prom("finddevice", 1, 1, ADDR("/")); + if ((long)_prom->root <= 0) + prom_panic("cannot find device tree root"); /* msg won't be printed :( */ +} + +static void __init prom_init_stdout(void) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + char *path = RELOC(of_stdout_device); + char type[16]; + u32 val; + + if (prom_getprop(_prom->chosen, "stdout", &val, sizeof(val)) <= 0) + prom_panic("cannot find stdout"); + + _prom->stdout = val; + + /* Get the full OF pathname of the stdout device */ + memset(path, 0, 256); + call_prom("instance-to-path", 3, 1, _prom->stdout, path, 255); + val = call_prom("instance-to-package", 1, 1, _prom->stdout); + prom_setprop(_prom->chosen, "linux,stdout-package", &val, sizeof(val)); + prom_printf("OF stdout device is: %s\n", RELOC(of_stdout_device)); + prom_setprop(_prom->chosen, "linux,stdout-path", + RELOC(of_stdout_device), strlen(RELOC(of_stdout_device))+1); + + /* If it's a display, note it */ + memset(type, 0, sizeof(type)); + prom_getprop(val, "device_type", type, sizeof(type)); + if (strcmp(type, RELOC("display")) == 0) { + _prom->disp_node = val; + prom_setprop(val, "linux,boot-display", NULL, 0); + } +} + +static void __init prom_close_stdin(void) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + ihandle val; + + if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0) + call_prom("close", 1, 0, val); +} + +static int __init prom_find_machine_type(void) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + char compat[256]; + int len, i = 0; + phandle rtas; + + len = prom_getprop(_prom->root, "compatible", + compat, sizeof(compat)-1); + if (len > 0) { + compat[len] = 0; + while (i < len) { + char *p = &compat[i]; + int sl = strlen(p); + if (sl == 0) + break; + if (strstr(p, RELOC("Power Macintosh")) || + strstr(p, RELOC("MacRISC4"))) + return PLATFORM_POWERMAC; + if (strstr(p, RELOC("Momentum,Maple"))) + return PLATFORM_MAPLE; + i += sl + 1; + } + } + /* Default to pSeries. We need to know if we are running LPAR */ + rtas = call_prom("finddevice", 1, 1, ADDR("/rtas")); + if (rtas != (phandle) -1) { + unsigned long x; + x = prom_getproplen(rtas, "ibm,hypertas-functions"); + if (x != PROM_ERROR) { + prom_printf("Hypertas detected, assuming LPAR !\n"); + return PLATFORM_PSERIES_LPAR; + } + } + return PLATFORM_PSERIES; +} + +static int __init prom_set_color(ihandle ih, int i, int r, int g, int b) +{ + unsigned long offset = reloc_offset(); + + return call_prom("call-method", 6, 1, ADDR("color!"), ih, i, b, g, r); +} + +/* + * If we have a display that we don't know how to drive, + * we will want to try to execute OF's open method for it + * later. However, OF will probably fall over if we do that + * we've taken over the MMU. + * So we check whether we will need to open the display, + * and if so, open it now. + */ +static void __init prom_check_displays(void) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + char type[16], *path; + phandle node; + ihandle ih; + int i; + + static unsigned char default_colors[] = { + 0x00, 0x00, 0x00, + 0x00, 0x00, 0xaa, + 0x00, 0xaa, 0x00, + 0x00, 0xaa, 0xaa, + 0xaa, 0x00, 0x00, + 0xaa, 0x00, 0xaa, + 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, + 0x55, 0x55, 0x55, + 0x55, 0x55, 0xff, + 0x55, 0xff, 0x55, + 0x55, 0xff, 0xff, + 0xff, 0x55, 0x55, + 0xff, 0x55, 0xff, + 0xff, 0xff, 0x55, + 0xff, 0xff, 0xff + }; + const unsigned char *clut; + + prom_printf("Looking for displays\n"); + for (node = 0; prom_next_node(&node); ) { + memset(type, 0, sizeof(type)); + prom_getprop(node, "device_type", type, sizeof(type)); + if (strcmp(type, RELOC("display")) != 0) + continue; + + /* It seems OF doesn't null-terminate the path :-( */ + path = RELOC(prom_scratch); + memset(path, 0, PROM_SCRATCH_SIZE); + + /* + * leave some room at the end of the path for appending extra + * arguments + */ + if (call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-10) < 0) + continue; + prom_printf("found display : %s, opening ... ", path); + + ih = call_prom("open", 1, 1, path); + if (ih == (ihandle)0 || ih == (ihandle)-1) { + prom_printf("failed\n"); + continue; + } + + /* Success */ + prom_printf("done\n"); + prom_setprop(node, "linux,opened", NULL, 0); + + /* + * stdout wasn't a display node, pick the first we can find + * for btext + */ + if (_prom->disp_node == 0) + _prom->disp_node = node; + + /* Setup a useable color table when the appropriate + * method is available. Should update this to set-colors */ + clut = RELOC(default_colors); + for (i = 0; i < 32; i++, clut += 3) + if (prom_set_color(ih, i, clut[0], clut[1], + clut[2]) != 0) + break; + +#ifdef CONFIG_LOGO_LINUX_CLUT224 + clut = PTRRELOC(RELOC(logo_linux_clut224.clut)); + for (i = 0; i < RELOC(logo_linux_clut224.clutsize); i++, clut += 3) + if (prom_set_color(ih, i + 32, clut[0], clut[1], + clut[2]) != 0) + break; +#endif /* CONFIG_LOGO_LINUX_CLUT224 */ + } +} + + +/* Return (relocated) pointer to this much memory: moves initrd if reqd. */ +static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end, + unsigned long needed, unsigned long align) +{ + unsigned long offset = reloc_offset(); + void *ret; + + *mem_start = _ALIGN(*mem_start, align); + while ((*mem_start + needed) > *mem_end) { + unsigned long room, chunk; + + prom_debug("Chunk exhausted, claiming more at %x...\n", + RELOC(alloc_bottom)); + room = RELOC(alloc_top) - RELOC(alloc_bottom); + if (room > DEVTREE_CHUNK_SIZE) + room = DEVTREE_CHUNK_SIZE; + if (room < PAGE_SIZE) + prom_panic("No memory for flatten_device_tree (no room)"); + chunk = alloc_up(room, 0); + if (chunk == 0) + prom_panic("No memory for flatten_device_tree (claim failed)"); + *mem_end = RELOC(alloc_top); + } + + ret = (void *)*mem_start; + *mem_start += needed; + + return ret; +} + +#define dt_push_token(token, mem_start, mem_end) \ + do { *((u32 *)make_room(mem_start, mem_end, 4, 4)) = token; } while(0) + +static unsigned long __init dt_find_string(char *str) +{ + unsigned long offset = reloc_offset(); + char *s, *os; + + s = os = (char *)RELOC(dt_string_start); + s += 4; + while (s < (char *)RELOC(dt_string_end)) { + if (strcmp(s, str) == 0) + return s - os; + s += strlen(s) + 1; + } + return 0; +} + +static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, + unsigned long *mem_end) +{ + unsigned long offset = reloc_offset(); + char *prev_name, *namep, *sstart; + unsigned long soff; + phandle child; + + sstart = (char *)RELOC(dt_string_start); + + /* get and store all property names */ + prev_name = RELOC(""); + for (;;) { + + /* 32 is max len of name including nul. */ + namep = make_room(mem_start, mem_end, 32, 1); + if (call_prom("nextprop", 3, 1, node, prev_name, namep) <= 0) { + /* No more nodes: unwind alloc */ + *mem_start = (unsigned long)namep; + break; + } + soff = dt_find_string(namep); + if (soff != 0) { + *mem_start = (unsigned long)namep; + namep = sstart + soff; + } else { + /* Trim off some if we can */ + *mem_start = (unsigned long)namep + strlen(namep) + 1; + RELOC(dt_string_end) = *mem_start; + } + prev_name = namep; + } + + /* do all our children */ + child = call_prom("child", 1, 1, node); + while (child != (phandle)0) { + scan_dt_build_strings(child, mem_start, mem_end); + child = call_prom("peer", 1, 1, child); + } +} + +static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, + unsigned long *mem_end) +{ + int l, align; + phandle child; + char *namep, *prev_name, *sstart; + unsigned long soff; + unsigned char *valp; + unsigned long offset = reloc_offset(); + char pname[32]; + char *path; + + path = RELOC(prom_scratch); + + dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end); + + /* get the node's full name */ + namep = (char *)*mem_start; + l = call_prom("package-to-path", 3, 1, node, + namep, *mem_end - *mem_start); + if (l >= 0) { + /* Didn't fit? Get more room. */ + if (l+1 > *mem_end - *mem_start) { + namep = make_room(mem_start, mem_end, l+1, 1); + call_prom("package-to-path", 3, 1, node, namep, l); + } + namep[l] = '\0'; + *mem_start = _ALIGN(((unsigned long) namep) + strlen(namep) + 1, 4); + } + + /* get it again for debugging */ + memset(path, 0, PROM_SCRATCH_SIZE); + call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); + + /* get and store all properties */ + prev_name = RELOC(""); + sstart = (char *)RELOC(dt_string_start); + for (;;) { + if (call_prom("nextprop", 3, 1, node, prev_name, pname) <= 0) + break; + + /* find string offset */ + soff = dt_find_string(pname); + if (soff == 0) { + prom_printf("WARNING: Can't find string index for <%s>, node %s\n", + pname, path); + break; + } + prev_name = sstart + soff; + + /* get length */ + l = call_prom("getproplen", 2, 1, node, pname); + + /* sanity checks */ + if (l < 0) + continue; + if (l > MAX_PROPERTY_LENGTH) { + prom_printf("WARNING: ignoring large property "); + /* It seems OF doesn't null-terminate the path :-( */ + prom_printf("[%s] ", path); + prom_printf("%s length 0x%x\n", pname, l); + continue; + } + + /* push property head */ + dt_push_token(OF_DT_PROP, mem_start, mem_end); + dt_push_token(l, mem_start, mem_end); + dt_push_token(soff, mem_start, mem_end); + + /* push property content */ + align = (l >= 8) ? 8 : 4; + valp = make_room(mem_start, mem_end, l, align); + call_prom("getprop", 4, 1, node, pname, valp, l); + *mem_start = _ALIGN(*mem_start, 4); + } + + /* Add a "linux,phandle" property. */ + soff = dt_find_string(RELOC("linux,phandle")); + if (soff == 0) + prom_printf("WARNING: Can't find string index for " + " node %s\n", path); + else { + dt_push_token(OF_DT_PROP, mem_start, mem_end); + dt_push_token(4, mem_start, mem_end); + dt_push_token(soff, mem_start, mem_end); + valp = make_room(mem_start, mem_end, 4, 4); + *(u32 *)valp = node; + } + + /* do all our children */ + child = call_prom("child", 1, 1, node); + while (child != (phandle)0) { + scan_dt_build_struct(child, mem_start, mem_end); + child = call_prom("peer", 1, 1, child); + } + + dt_push_token(OF_DT_END_NODE, mem_start, mem_end); +} + +static void __init flatten_device_tree(void) +{ + phandle root; + unsigned long offset = reloc_offset(); + unsigned long mem_start, mem_end, room; + struct boot_param_header *hdr; + char *namep; + u64 *rsvmap; + + /* + * Check how much room we have between alloc top & bottom (+/- a + * few pages), crop to 4Mb, as this is our "chuck" size + */ + room = RELOC(alloc_top) - RELOC(alloc_bottom) - 0x4000; + if (room > DEVTREE_CHUNK_SIZE) + room = DEVTREE_CHUNK_SIZE; + prom_debug("starting device tree allocs at %x\n", RELOC(alloc_bottom)); + + /* Now try to claim that */ + mem_start = (unsigned long)alloc_up(room, PAGE_SIZE); + if (mem_start == 0) + prom_panic("Can't allocate initial device-tree chunk\n"); + mem_end = RELOC(alloc_top); + + /* Get root of tree */ + root = call_prom("peer", 1, 1, (phandle)0); + if (root == (phandle)0) + prom_panic ("couldn't get device tree root\n"); + + /* Build header and make room for mem rsv map */ + mem_start = _ALIGN(mem_start, 4); + hdr = make_room(&mem_start, &mem_end, sizeof(struct boot_param_header), 4); + RELOC(dt_header_start) = (unsigned long)hdr; + rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8); + + /* Start of strings */ + mem_start = PAGE_ALIGN(mem_start); + RELOC(dt_string_start) = mem_start; + mem_start += 4; /* hole */ + + /* Add "linux,phandle" in there, we'll need it */ + namep = make_room(&mem_start, &mem_end, 16, 1); + strcpy(namep, RELOC("linux,phandle")); + mem_start = (unsigned long)namep + strlen(namep) + 1; + RELOC(dt_string_end) = mem_start; + + /* Build string array */ + prom_printf("Building dt strings...\n"); + scan_dt_build_strings(root, &mem_start, &mem_end); + + /* Build structure */ + mem_start = PAGE_ALIGN(mem_start); + RELOC(dt_struct_start) = mem_start; + prom_printf("Building dt structure...\n"); + scan_dt_build_struct(root, &mem_start, &mem_end); + dt_push_token(OF_DT_END, &mem_start, &mem_end); + RELOC(dt_struct_end) = PAGE_ALIGN(mem_start); + + /* Finish header */ + hdr->magic = OF_DT_HEADER; + hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start); + hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start); + hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start); + hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start); + hdr->version = OF_DT_VERSION; + hdr->last_comp_version = 1; + + /* Reserve the whole thing and copy the reserve map in, we + * also bump mem_reserve_cnt to cause further reservations to + * fail since it's too late. + */ + reserve_mem(RELOC(dt_header_start), hdr->totalsize); + memcpy(rsvmap, RELOC(mem_reserve_map), sizeof(mem_reserve_map)); + +#ifdef DEBUG_PROM + { + int i; + prom_printf("reserved memory map:\n"); + for (i = 0; i < RELOC(mem_reserve_cnt); i++) + prom_printf(" %x - %x\n", RELOC(mem_reserve_map)[i].base, + RELOC(mem_reserve_map)[i].size); + } +#endif + RELOC(mem_reserve_cnt) = MEM_RESERVE_MAP_SIZE; + + prom_printf("Device tree strings 0x%x -> 0x%x\n", + RELOC(dt_string_start), RELOC(dt_string_end)); + prom_printf("Device tree struct 0x%x -> 0x%x\n", + RELOC(dt_struct_start), RELOC(dt_struct_end)); + + } + +static void __init prom_find_boot_cpu(void) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + u32 getprop_rval; + ihandle prom_cpu; + phandle cpu_pkg; + + if (prom_getprop(_prom->chosen, "cpu", &prom_cpu, sizeof(prom_cpu)) <= 0) + prom_panic("cannot find boot cpu"); + + cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); + + prom_setprop(cpu_pkg, "linux,boot-cpu", NULL, 0); + prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval)); + _prom->cpu = getprop_rval; + + prom_debug("Booting CPU hw index = 0x%x\n", _prom->cpu); +} + +static void __init prom_check_initrd(unsigned long r3, unsigned long r4) +{ +#ifdef CONFIG_BLK_DEV_INITRD + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + + if ( r3 && r4 && r4 != 0xdeadbeef) { + u64 val; + + RELOC(prom_initrd_start) = (r3 >= KERNELBASE) ? __pa(r3) : r3; + RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4; + + val = (u64)RELOC(prom_initrd_start); + prom_setprop(_prom->chosen, "linux,initrd-start", &val, sizeof(val)); + val = (u64)RELOC(prom_initrd_end); + prom_setprop(_prom->chosen, "linux,initrd-end", &val, sizeof(val)); + + reserve_mem(RELOC(prom_initrd_start), + RELOC(prom_initrd_end) - RELOC(prom_initrd_start)); + + prom_debug("initrd_start=0x%x\n", RELOC(prom_initrd_start)); + prom_debug("initrd_end=0x%x\n", RELOC(prom_initrd_end)); + } +#endif /* CONFIG_BLK_DEV_INITRD */ +} + +/* + * We enter here early on, when the Open Firmware prom is still + * handling exceptions and the MMU hash table for us. + */ + +unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long pp, + unsigned long r6, unsigned long r7) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + unsigned long phys = KERNELBASE - offset; + u32 getprop_rval; + + /* + * First zero the BSS + */ + memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start); + + /* + * Init interface to Open Firmware, get some node references, + * like /chosen + */ + prom_init_client_services(pp); + + /* + * Init prom stdout device + */ + prom_init_stdout(); + prom_debug("klimit=0x%x\n", RELOC(klimit)); + prom_debug("offset=0x%x\n", offset); + + /* + * Check for an initrd + */ + prom_check_initrd(r3, r4); + + /* + * Get default machine type. At this point, we do not differenciate + * between pSeries SMP and pSeries LPAR + */ + RELOC(of_platform) = prom_find_machine_type(); + getprop_rval = RELOC(of_platform); + prom_setprop(_prom->chosen, "linux,platform", + &getprop_rval, sizeof(getprop_rval)); + + /* + * On pSeries, copy the CPU hold code + */ + if (RELOC(of_platform) & PLATFORM_PSERIES) + copy_and_flush(0, KERNELBASE - offset, 0x100, 0); + + /* + * Get memory cells format + */ + getprop_rval = 1; + prom_getprop(_prom->root, "#size-cells", + &getprop_rval, sizeof(getprop_rval)); + _prom->root_size_cells = getprop_rval; + getprop_rval = 2; + prom_getprop(_prom->root, "#address-cells", + &getprop_rval, sizeof(getprop_rval)); + _prom->root_addr_cells = getprop_rval; + + /* + * Do early parsing of command line + */ + early_cmdline_parse(); + + /* + * Initialize memory management within prom_init + */ + prom_init_mem(); + + /* + * Determine which cpu is actually running right _now_ + */ + prom_find_boot_cpu(); + + /* + * Initialize display devices + */ + prom_check_displays(); + + /* + * Initialize IOMMU (TCE tables) on pSeries. Do that before anything else + * that uses the allocator, we need to make sure we get the top of memory + * available for us here... + */ + if (RELOC(of_platform) == PLATFORM_PSERIES) + prom_initialize_tce_table(); + + /* + * On non-powermacs, try to instantiate RTAS and puts all CPUs + * in spin-loops. PowerMacs don't have a working RTAS and use + * a different way to spin CPUs + */ + if (RELOC(of_platform) != PLATFORM_POWERMAC) { + prom_instantiate_rtas(); + prom_hold_cpus(); + } + + /* + * Fill in some infos for use by the kernel later on + */ + if (RELOC(ppc64_iommu_off)) + prom_setprop(_prom->chosen, "linux,iommu-off", NULL, 0); + + if (RELOC(iommu_force_on)) + prom_setprop(_prom->chosen, "linux,iommu-force-on", NULL, 0); + + if (RELOC(prom_memory_limit)) + prom_setprop(_prom->chosen, "linux,memory-limit", + PTRRELOC(&prom_memory_limit), sizeof(RELOC(prom_memory_limit))); + + if (RELOC(prom_tce_alloc_start)) { + prom_setprop(_prom->chosen, "linux,tce-alloc-start", + PTRRELOC(&prom_tce_alloc_start), sizeof(RELOC(prom_tce_alloc_start))); + prom_setprop(_prom->chosen, "linux,tce-alloc-end", + PTRRELOC(&prom_tce_alloc_end), sizeof(RELOC(prom_tce_alloc_end))); + } + + /* + * Now finally create the flattened device-tree + */ + prom_printf("copying OF device tree ...\n"); + flatten_device_tree(); + + /* in case stdin is USB and still active on IBM machines... */ + prom_close_stdin(); + + /* + * Call OF "quiesce" method to shut down pending DMA's from + * devices etc... + */ + prom_printf("Calling quiesce ...\n"); + call_prom("quiesce", 0, 0); + + /* + * And finally, call the kernel passing it the flattened device + * tree and NULL as r5, thus triggering the new entry point which + * is common to us and kexec + */ + prom_printf("returning from prom_init\n"); + prom_debug("->dt_header_start=0x%x\n", RELOC(dt_header_start)); + prom_debug("->phys=0x%x\n", phys); + + __start(RELOC(dt_header_start), phys, 0); + + return 0; +} + diff --git a/arch/ppc64/kernel/ptrace.c b/arch/ppc64/kernel/ptrace.c new file mode 100644 index 00000000000..354a287c67e --- /dev/null +++ b/arch/ppc64/kernel/ptrace.c @@ -0,0 +1,328 @@ +/* + * linux/arch/ppc64/kernel/ptrace.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/m68k/kernel/ptrace.c" + * Copyright (C) 1994 by Hamish Macdonald + * Taken from linux/kernel/ptrace.c and modified for M680x0. + * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds + * + * Modified by Cort Dougan (cort@hq.fsmlabs.com) + * and Paul Mackerras (paulus@linuxcare.com.au). + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure single step bits etc are not set. + */ +void ptrace_disable(struct task_struct *child) +{ + /* make sure the single step bit is not set. */ + clear_single_step(child); +} + +int sys_ptrace(long request, long pid, long addr, long data) +{ + struct task_struct *child; + int ret = -EPERM; + + lock_kernel(); + if (request == PTRACE_TRACEME) { + /* are we already being traced? */ + if (current->ptrace & PT_PTRACED) + goto out; + ret = security_ptrace(current->parent, current); + if (ret) + goto out; + /* set the ptrace bit in the process flags. */ + current->ptrace |= PT_PTRACED; + ret = 0; + goto out; + } + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + if (!child) + goto out; + + ret = -EPERM; + if (pid == 1) /* you may not mess with init */ + goto out_tsk; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out_tsk; + } + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret < 0) + goto out_tsk; + + switch (request) { + /* when I and D space are separate, these will need to be fixed. */ + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + unsigned long tmp; + int copied; + + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + ret = -EIO; + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp,(unsigned long __user *) data); + break; + } + + /* read the word at location addr in the USER area. */ + case PTRACE_PEEKUSR: { + unsigned long index; + unsigned long tmp; + + ret = -EIO; + /* convert to index and check */ + index = (unsigned long) addr >> 3; + if ((addr & 7) || (index > PT_FPSCR)) + break; + + if (index < PT_FPR0) { + tmp = get_reg(child, (int)index); + } else { + flush_fp_to_thread(child); + tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0]; + } + ret = put_user(tmp,(unsigned long __user *) data); + break; + } + + /* If I and D space are separate, this will have to be fixed. */ + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = 0; + if (access_process_vm(child, addr, &data, sizeof(data), 1) + == sizeof(data)) + break; + ret = -EIO; + break; + + /* write the word at location addr in the USER area */ + case PTRACE_POKEUSR: { + unsigned long index; + + ret = -EIO; + /* convert to index and check */ + index = (unsigned long) addr >> 3; + if ((addr & 7) || (index > PT_FPSCR)) + break; + + if (index == PT_ORIG_R3) + break; + if (index < PT_FPR0) { + ret = put_reg(child, index, data); + } else { + flush_fp_to_thread(child); + ((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data; + ret = 0; + } + break; + } + + case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ + case PTRACE_CONT: { /* restart after signal. */ + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + if (request == PTRACE_SYSCALL) + set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + else + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + child->exit_code = data; + /* make sure the single step bit is not set. */ + clear_single_step(child); + wake_up_process(child); + ret = 0; + break; + } + + /* + * make the child exit. Best I can do is send it a sigkill. + * perhaps it should be put in the status that it wants to + * exit. + */ + case PTRACE_KILL: { + ret = 0; + if (child->exit_state == EXIT_ZOMBIE) /* already dead */ + break; + child->exit_code = SIGKILL; + /* make sure the single step bit is not set. */ + clear_single_step(child); + wake_up_process(child); + break; + } + + case PTRACE_SINGLESTEP: { /* set the trap flag. */ + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + set_single_step(child); + child->exit_code = data; + /* give it a chance to run. */ + wake_up_process(child); + ret = 0; + break; + } + + case PTRACE_DETACH: + ret = ptrace_detach(child, data); + break; + + case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; + unsigned long __user *tmp = (unsigned long __user *)addr; + + for (i = 0; i < 32; i++) { + ret = put_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; + unsigned long __user *tmp = (unsigned long __user *)addr; + + for (i = 0; i < 32; i++) { + ret = get_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; + unsigned long __user *tmp = (unsigned long __user *)addr; + + flush_fp_to_thread(child); + + for (i = 0; i < 32; i++) { + ret = put_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; + unsigned long __user *tmp = (unsigned long __user *)addr; + + flush_fp_to_thread(child); + + for (i = 0; i < 32; i++) { + ret = get_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + default: + ret = ptrace_request(child, request, addr, data); + break; + } +out_tsk: + put_task_struct(child); +out: + unlock_kernel(); + return ret; +} + +static void do_syscall_trace(void) +{ + /* the 0x80 provides a way for the tracing parent to distinguish + between a syscall stop and SIGTRAP delivery */ + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) + ? 0x80 : 0)); + + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} + +void do_syscall_trace_enter(struct pt_regs *regs) +{ + if (unlikely(current->audit_context)) + audit_syscall_entry(current, regs->gpr[0], + regs->gpr[3], regs->gpr[4], + regs->gpr[5], regs->gpr[6]); + + if (test_thread_flag(TIF_SYSCALL_TRACE) + && (current->ptrace & PT_PTRACED)) + do_syscall_trace(); +} + +void do_syscall_trace_leave(struct pt_regs *regs) +{ + secure_computing(regs->gpr[0]); + + if (unlikely(current->audit_context)) + audit_syscall_exit(current, regs->result); + + if ((test_thread_flag(TIF_SYSCALL_TRACE) + || test_thread_flag(TIF_SINGLESTEP)) + && (current->ptrace & PT_PTRACED)) + do_syscall_trace(); +} diff --git a/arch/ppc64/kernel/ptrace32.c b/arch/ppc64/kernel/ptrace32.c new file mode 100644 index 00000000000..ee81b1b776c --- /dev/null +++ b/arch/ppc64/kernel/ptrace32.c @@ -0,0 +1,420 @@ +/* + * linux/arch/ppc64/kernel/ptrace32.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/m68k/kernel/ptrace.c" + * Copyright (C) 1994 by Hamish Macdonald + * Taken from linux/kernel/ptrace.c and modified for M680x0. + * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds + * + * Modified by Cort Dougan (cort@hq.fsmlabs.com) + * and Paul Mackerras (paulus@linuxcare.com.au). + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +int sys32_ptrace(long request, long pid, unsigned long addr, unsigned long data) +{ + struct task_struct *child; + int ret = -EPERM; + + lock_kernel(); + if (request == PTRACE_TRACEME) { + /* are we already being traced? */ + if (current->ptrace & PT_PTRACED) + goto out; + ret = security_ptrace(current->parent, current); + if (ret) + goto out; + /* set the ptrace bit in the process flags. */ + current->ptrace |= PT_PTRACED; + ret = 0; + goto out; + } + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + if (!child) + goto out; + + ret = -EPERM; + if (pid == 1) /* you may not mess with init */ + goto out_tsk; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out_tsk; + } + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret < 0) + goto out_tsk; + + switch (request) { + /* when I and D space are separate, these will need to be fixed. */ + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + unsigned int tmp; + int copied; + + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + ret = -EIO; + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp, (u32 __user *)data); + break; + } + + /* + * Read 4 bytes of the other process' storage + * data is a pointer specifying where the user wants the + * 4 bytes copied into + * addr is a pointer in the user's storage that contains an 8 byte + * address in the other process of the 4 bytes that is to be read + * (this is run in a 32-bit process looking at a 64-bit process) + * when I and D space are separate, these will need to be fixed. + */ + case PPC_PTRACE_PEEKTEXT_3264: + case PPC_PTRACE_PEEKDATA_3264: { + u32 tmp; + int copied; + u32 __user * addrOthers; + + ret = -EIO; + + /* Get the addr in the other process that we want to read */ + if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) + break; + + copied = access_process_vm(child, (u64)addrOthers, &tmp, + sizeof(tmp), 0); + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp, (u32 __user *)data); + break; + } + + /* Read a register (specified by ADDR) out of the "user area" */ + case PTRACE_PEEKUSR: { + int index; + unsigned long tmp; + + ret = -EIO; + /* convert to index and check */ + index = (unsigned long) addr >> 2; + if ((addr & 3) || (index > PT_FPSCR32)) + break; + + if (index < PT_FPR0) { + tmp = get_reg(child, index); + } else { + flush_fp_to_thread(child); + /* + * the user space code considers the floating point + * to be an array of unsigned int (32 bits) - the + * index passed in is based on this assumption. + */ + tmp = ((unsigned int *)child->thread.fpr)[index - PT_FPR0]; + } + ret = put_user((unsigned int)tmp, (u32 __user *)data); + break; + } + + /* + * Read 4 bytes out of the other process' pt_regs area + * data is a pointer specifying where the user wants the + * 4 bytes copied into + * addr is the offset into the other process' pt_regs structure + * that is to be read + * (this is run in a 32-bit process looking at a 64-bit process) + */ + case PPC_PTRACE_PEEKUSR_3264: { + u32 index; + u32 reg32bits; + u64 tmp; + u32 numReg; + u32 part; + + ret = -EIO; + /* Determine which register the user wants */ + index = (u64)addr >> 2; + numReg = index / 2; + /* Determine which part of the register the user wants */ + if (index % 2) + part = 1; /* want the 2nd half of the register (right-most). */ + else + part = 0; /* want the 1st half of the register (left-most). */ + + /* Validate the input - check to see if address is on the wrong boundary or beyond the end of the user area */ + if ((addr & 3) || numReg > PT_FPSCR) + break; + + if (numReg >= PT_FPR0) { + flush_fp_to_thread(child); + tmp = ((unsigned long int *)child->thread.fpr)[numReg - PT_FPR0]; + } else { /* register within PT_REGS struct */ + tmp = get_reg(child, numReg); + } + reg32bits = ((u32*)&tmp)[part]; + ret = put_user(reg32bits, (u32 __user *)data); + break; + } + + /* If I and D space are separate, this will have to be fixed. */ + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: { + unsigned int tmp; + tmp = data; + ret = 0; + if (access_process_vm(child, addr, &tmp, sizeof(tmp), 1) + == sizeof(tmp)) + break; + ret = -EIO; + break; + } + + /* + * Write 4 bytes into the other process' storage + * data is the 4 bytes that the user wants written + * addr is a pointer in the user's storage that contains an + * 8 byte address in the other process where the 4 bytes + * that is to be written + * (this is run in a 32-bit process looking at a 64-bit process) + * when I and D space are separate, these will need to be fixed. + */ + case PPC_PTRACE_POKETEXT_3264: + case PPC_PTRACE_POKEDATA_3264: { + u32 tmp = data; + u32 __user * addrOthers; + + /* Get the addr in the other process that we want to write into */ + ret = -EIO; + if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) + break; + ret = 0; + if (access_process_vm(child, (u64)addrOthers, &tmp, + sizeof(tmp), 1) == sizeof(tmp)) + break; + ret = -EIO; + break; + } + + /* write the word at location addr in the USER area */ + case PTRACE_POKEUSR: { + unsigned long index; + + ret = -EIO; + /* convert to index and check */ + index = (unsigned long) addr >> 2; + if ((addr & 3) || (index > PT_FPSCR32)) + break; + + if (index == PT_ORIG_R3) + break; + if (index < PT_FPR0) { + ret = put_reg(child, index, data); + } else { + flush_fp_to_thread(child); + /* + * the user space code considers the floating point + * to be an array of unsigned int (32 bits) - the + * index passed in is based on this assumption. + */ + ((unsigned int *)child->thread.fpr)[index - PT_FPR0] = data; + ret = 0; + } + break; + } + + /* + * Write 4 bytes into the other process' pt_regs area + * data is the 4 bytes that the user wants written + * addr is the offset into the other process' pt_regs structure + * that is to be written into + * (this is run in a 32-bit process looking at a 64-bit process) + */ + case PPC_PTRACE_POKEUSR_3264: { + u32 index; + u32 numReg; + + ret = -EIO; + /* Determine which register the user wants */ + index = (u64)addr >> 2; + numReg = index / 2; + /* + * Validate the input - check to see if address is on the + * wrong boundary or beyond the end of the user area + */ + if ((addr & 3) || (numReg > PT_FPSCR)) + break; + /* Insure it is a register we let them change */ + if ((numReg == PT_ORIG_R3) + || ((numReg > PT_CCR) && (numReg < PT_FPR0))) + break; + if (numReg >= PT_FPR0) { + flush_fp_to_thread(child); + } + if (numReg == PT_MSR) + data = (data & MSR_DEBUGCHANGE) + | (child->thread.regs->msr & ~MSR_DEBUGCHANGE); + ((u32*)child->thread.regs)[index] = data; + ret = 0; + break; + } + + case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ + case PTRACE_CONT: { /* restart after signal. */ + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + if (request == PTRACE_SYSCALL) + set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + else + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + child->exit_code = data; + /* make sure the single step bit is not set. */ + clear_single_step(child); + wake_up_process(child); + ret = 0; + break; + } + + /* + * make the child exit. Best I can do is send it a sigkill. + * perhaps it should be put in the status that it wants to + * exit. + */ + case PTRACE_KILL: { + ret = 0; + if (child->exit_state == EXIT_ZOMBIE) /* already dead */ + break; + child->exit_code = SIGKILL; + /* make sure the single step bit is not set. */ + clear_single_step(child); + wake_up_process(child); + break; + } + + case PTRACE_SINGLESTEP: { /* set the trap flag. */ + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + set_single_step(child); + child->exit_code = data; + /* give it a chance to run. */ + wake_up_process(child); + ret = 0; + break; + } + + case PTRACE_DETACH: + ret = ptrace_detach(child, data); + break; + + case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; + unsigned int __user *tmp = (unsigned int __user *)addr; + + for (i = 0; i < 32; i++) { + ret = put_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; + unsigned int __user *tmp = (unsigned int __user *)addr; + + for (i = 0; i < 32; i++) { + ret = get_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; + unsigned int __user *tmp = (unsigned int __user *)addr; + + flush_fp_to_thread(child); + + for (i = 0; i < 32; i++) { + ret = put_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; + unsigned int __user *tmp = (unsigned int __user *)addr; + + flush_fp_to_thread(child); + + for (i = 0; i < 32; i++) { + ret = get_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + case PTRACE_GETEVENTMSG: + ret = put_user(child->ptrace_message, (unsigned int __user *) data); + break; + + default: + ret = ptrace_request(child, request, addr, data); + break; + } +out_tsk: + put_task_struct(child); +out: + unlock_kernel(); + return ret; +} diff --git a/arch/ppc64/kernel/ras.c b/arch/ppc64/kernel/ras.c new file mode 100644 index 00000000000..1c4c796b212 --- /dev/null +++ b/arch/ppc64/kernel/ras.c @@ -0,0 +1,356 @@ +/* + * ras.c + * Copyright (C) 2001 Dave Engebretsen IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* Change Activity: + * 2001/09/21 : engebret : Created with minimal EPOW and HW exception support. + * End Change Activity + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; +static DEFINE_SPINLOCK(ras_log_buf_lock); + +char mce_data_buf[RTAS_ERROR_LOG_MAX] +; +/* This is true if we are using the firmware NMI handler (typically LPAR) */ +extern int fwnmi_active; + +extern void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr); + +static int ras_get_sensor_state_token; +static int ras_check_exception_token; + +#define EPOW_SENSOR_TOKEN 9 +#define EPOW_SENSOR_INDEX 0 +#define RAS_VECTOR_OFFSET 0x500 + +static irqreturn_t ras_epow_interrupt(int irq, void *dev_id, + struct pt_regs * regs); +static irqreturn_t ras_error_interrupt(int irq, void *dev_id, + struct pt_regs * regs); + +/* #define DEBUG */ + +static void request_ras_irqs(struct device_node *np, char *propname, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + const char *name) +{ + unsigned int *ireg, len, i; + int virq, n_intr; + + ireg = (unsigned int *)get_property(np, propname, &len); + if (ireg == NULL) + return; + n_intr = prom_n_intr_cells(np); + len /= n_intr * sizeof(*ireg); + + for (i = 0; i < len; i++) { + virq = virt_irq_create_mapping(*ireg); + if (virq == NO_IRQ) { + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", np->full_name); + return; + } + if (request_irq(irq_offset_up(virq), handler, 0, name, NULL)) { + printk(KERN_ERR "Unable to request interrupt %d for " + "%s\n", irq_offset_up(virq), np->full_name); + return; + } + ireg += n_intr; + } +} + +/* + * Initialize handlers for the set of interrupts caused by hardware errors + * and power system events. + */ +static int __init init_ras_IRQ(void) +{ + struct device_node *np; + + ras_get_sensor_state_token = rtas_token("get-sensor-state"); + ras_check_exception_token = rtas_token("check-exception"); + + /* Internal Errors */ + np = of_find_node_by_path("/event-sources/internal-errors"); + if (np != NULL) { + request_ras_irqs(np, "open-pic-interrupt", ras_error_interrupt, + "RAS_ERROR"); + request_ras_irqs(np, "interrupts", ras_error_interrupt, + "RAS_ERROR"); + of_node_put(np); + } + + /* EPOW Events */ + np = of_find_node_by_path("/event-sources/epow-events"); + if (np != NULL) { + request_ras_irqs(np, "open-pic-interrupt", ras_epow_interrupt, + "RAS_EPOW"); + request_ras_irqs(np, "interrupts", ras_epow_interrupt, + "RAS_EPOW"); + of_node_put(np); + } + + return 1; +} +__initcall(init_ras_IRQ); + +/* + * Handle power subsystem events (EPOW). + * + * Presently we just log the event has occurred. This should be fixed + * to examine the type of power failure and take appropriate action where + * the time horizon permits something useful to be done. + */ +static irqreturn_t +ras_epow_interrupt(int irq, void *dev_id, struct pt_regs * regs) +{ + int status = 0xdeadbeef; + int state = 0; + int critical; + + status = rtas_call(ras_get_sensor_state_token, 2, 2, &state, + EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX); + + if (state > 3) + critical = 1; /* Time Critical */ + else + critical = 0; + + spin_lock(&ras_log_buf_lock); + + status = rtas_call(ras_check_exception_token, 6, 1, NULL, + RAS_VECTOR_OFFSET, + virt_irq_to_real(irq_offset_down(irq)), + RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS, + critical, __pa(&ras_log_buf), + rtas_get_error_log_max()); + + udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n", + *((unsigned long *)&ras_log_buf), status, state); + printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n", + *((unsigned long *)&ras_log_buf), status, state); + + /* format and print the extended information */ + log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); + + spin_unlock(&ras_log_buf_lock); + return IRQ_HANDLED; +} + +/* + * Handle hardware error interrupts. + * + * RTAS check-exception is called to collect data on the exception. If + * the error is deemed recoverable, we log a warning and return. + * For nonrecoverable errors, an error is logged and we stop all processing + * as quickly as possible in order to prevent propagation of the failure. + */ +static irqreturn_t +ras_error_interrupt(int irq, void *dev_id, struct pt_regs * regs) +{ + struct rtas_error_log *rtas_elog; + int status = 0xdeadbeef; + int fatal; + + spin_lock(&ras_log_buf_lock); + + status = rtas_call(ras_check_exception_token, 6, 1, NULL, + RAS_VECTOR_OFFSET, + virt_irq_to_real(irq_offset_down(irq)), + RTAS_INTERNAL_ERROR, 1 /*Time Critical */, + __pa(&ras_log_buf), + rtas_get_error_log_max()); + + rtas_elog = (struct rtas_error_log *)ras_log_buf; + + if ((status == 0) && (rtas_elog->severity >= RTAS_SEVERITY_ERROR_SYNC)) + fatal = 1; + else + fatal = 0; + + /* format and print the extended information */ + log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); + + if (fatal) { + udbg_printf("Fatal HW Error <0x%lx 0x%x>\n", + *((unsigned long *)&ras_log_buf), status); + printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n", + *((unsigned long *)&ras_log_buf), status); + +#ifndef DEBUG + /* Don't actually power off when debugging so we can test + * without actually failing while injecting errors. + * Error data will not be logged to syslog. + */ + ppc_md.power_off(); +#endif + } else { + udbg_printf("Recoverable HW Error <0x%lx 0x%x>\n", + *((unsigned long *)&ras_log_buf), status); + printk(KERN_WARNING + "Warning: Recoverable hardware error <0x%lx 0x%x>\n", + *((unsigned long *)&ras_log_buf), status); + } + + spin_unlock(&ras_log_buf_lock); + return IRQ_HANDLED; +} + +/* Get the error information for errors coming through the + * FWNMI vectors. The pt_regs' r3 will be updated to reflect + * the actual r3 if possible, and a ptr to the error log entry + * will be returned if found. + * + * The mce_data_buf does not have any locks or protection around it, + * if a second machine check comes in, or a system reset is done + * before we have logged the error, then we will get corruption in the + * error log. This is preferable over holding off on calling + * ibm,nmi-interlock which would result in us checkstopping if a + * second machine check did come in. + */ +static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) +{ + unsigned long errdata = regs->gpr[3]; + struct rtas_error_log *errhdr = NULL; + unsigned long *savep; + + if ((errdata >= 0x7000 && errdata < 0x7fff0) || + (errdata >= rtas.base && errdata < rtas.base + rtas.size - 16)) { + savep = __va(errdata); + regs->gpr[3] = savep[0]; /* restore original r3 */ + memset(mce_data_buf, 0, RTAS_ERROR_LOG_MAX); + memcpy(mce_data_buf, (char *)(savep + 1), RTAS_ERROR_LOG_MAX); + errhdr = (struct rtas_error_log *)mce_data_buf; + } else { + printk("FWNMI: corrupt r3\n"); + } + return errhdr; +} + +/* Call this when done with the data returned by FWNMI_get_errinfo. + * It will release the saved data area for other CPUs in the + * partition to receive FWNMI errors. + */ +static void fwnmi_release_errinfo(void) +{ + int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); + if (ret != 0) + printk("FWNMI: nmi-interlock failed: %d\n", ret); +} + +void pSeries_system_reset_exception(struct pt_regs *regs) +{ + if (fwnmi_active) { + struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs); + if (errhdr) { + /* XXX Should look at FWNMI information */ + } + fwnmi_release_errinfo(); + } +} + +/* + * See if we can recover from a machine check exception. + * This is only called on power4 (or above) and only via + * the Firmware Non-Maskable Interrupts (fwnmi) handler + * which provides the error analysis for us. + * + * Return 1 if corrected (or delivered a signal). + * Return 0 if there is nothing we can do. + */ +static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err) +{ + int nonfatal = 0; + + if (err->disposition == RTAS_DISP_FULLY_RECOVERED) { + /* Platform corrected itself */ + nonfatal = 1; + } else if ((regs->msr & MSR_RI) && + user_mode(regs) && + err->severity == RTAS_SEVERITY_ERROR_SYNC && + err->disposition == RTAS_DISP_NOT_RECOVERED && + err->target == RTAS_TARGET_MEMORY && + err->type == RTAS_TYPE_ECC_UNCORR && + !(current->pid == 0 || current->pid == 1)) { + /* Kill off a user process with an ECC error */ + printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n", + current->pid); + /* XXX something better for ECC error? */ + _exception(SIGBUS, regs, BUS_ADRERR, regs->nip); + nonfatal = 1; + } + + log_error((char *)err, ERR_TYPE_RTAS_LOG, !nonfatal); + + return nonfatal; +} + +/* + * Handle a machine check. + * + * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi) + * should be present. If so the handler which called us tells us if the + * error was recovered (never true if RI=0). + * + * On hardware prior to Power 4 these exceptions were asynchronous which + * means we can't tell exactly where it occurred and so we can't recover. + */ +int pSeries_machine_check_exception(struct pt_regs *regs) +{ + struct rtas_error_log *errp; + + if (fwnmi_active) { + errp = fwnmi_get_errinfo(regs); + fwnmi_release_errinfo(); + if (errp && recover_mce(regs, errp)) + return 1; + } + + return 0; +} diff --git a/arch/ppc64/kernel/rtas-proc.c b/arch/ppc64/kernel/rtas-proc.c new file mode 100644 index 00000000000..28b1f1521f2 --- /dev/null +++ b/arch/ppc64/kernel/rtas-proc.c @@ -0,0 +1,807 @@ +/* + * arch/ppc64/kernel/rtas-proc.c + * Copyright (C) 2000 Tilmann Bitterberg + * (tilmann@bitterberg.de) + * + * RTAS (Runtime Abstraction Services) stuff + * Intention is to provide a clean user interface + * to use the RTAS. + * + * TODO: + * Split off a header file and maybe move it to a different + * location. Write Documentation on what the /proc/rtas/ entries + * actually do. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include /* for ppc_md */ +#include +#include + +/* Token for Sensors */ +#define KEY_SWITCH 0x0001 +#define ENCLOSURE_SWITCH 0x0002 +#define THERMAL_SENSOR 0x0003 +#define LID_STATUS 0x0004 +#define POWER_SOURCE 0x0005 +#define BATTERY_VOLTAGE 0x0006 +#define BATTERY_REMAINING 0x0007 +#define BATTERY_PERCENTAGE 0x0008 +#define EPOW_SENSOR 0x0009 +#define BATTERY_CYCLESTATE 0x000a +#define BATTERY_CHARGING 0x000b + +/* IBM specific sensors */ +#define IBM_SURVEILLANCE 0x2328 /* 9000 */ +#define IBM_FANRPM 0x2329 /* 9001 */ +#define IBM_VOLTAGE 0x232a /* 9002 */ +#define IBM_DRCONNECTOR 0x232b /* 9003 */ +#define IBM_POWERSUPPLY 0x232c /* 9004 */ + +/* Status return values */ +#define SENSOR_CRITICAL_HIGH 13 +#define SENSOR_WARNING_HIGH 12 +#define SENSOR_NORMAL 11 +#define SENSOR_WARNING_LOW 10 +#define SENSOR_CRITICAL_LOW 9 +#define SENSOR_SUCCESS 0 +#define SENSOR_HW_ERROR -1 +#define SENSOR_BUSY -2 +#define SENSOR_NOT_EXIST -3 +#define SENSOR_DR_ENTITY -9000 + +/* Location Codes */ +#define LOC_SCSI_DEV_ADDR 'A' +#define LOC_SCSI_DEV_LOC 'B' +#define LOC_CPU 'C' +#define LOC_DISKETTE 'D' +#define LOC_ETHERNET 'E' +#define LOC_FAN 'F' +#define LOC_GRAPHICS 'G' +/* reserved / not used 'H' */ +#define LOC_IO_ADAPTER 'I' +/* reserved / not used 'J' */ +#define LOC_KEYBOARD 'K' +#define LOC_LCD 'L' +#define LOC_MEMORY 'M' +#define LOC_NV_MEMORY 'N' +#define LOC_MOUSE 'O' +#define LOC_PLANAR 'P' +#define LOC_OTHER_IO 'Q' +#define LOC_PARALLEL 'R' +#define LOC_SERIAL 'S' +#define LOC_DEAD_RING 'T' +#define LOC_RACKMOUNTED 'U' /* for _u_nit is rack mounted */ +#define LOC_VOLTAGE 'V' +#define LOC_SWITCH_ADAPTER 'W' +#define LOC_OTHER 'X' +#define LOC_FIRMWARE 'Y' +#define LOC_SCSI 'Z' + +/* Tokens for indicators */ +#define TONE_FREQUENCY 0x0001 /* 0 - 1000 (HZ)*/ +#define TONE_VOLUME 0x0002 /* 0 - 100 (%) */ +#define SYSTEM_POWER_STATE 0x0003 +#define WARNING_LIGHT 0x0004 +#define DISK_ACTIVITY_LIGHT 0x0005 +#define HEX_DISPLAY_UNIT 0x0006 +#define BATTERY_WARNING_TIME 0x0007 +#define CONDITION_CYCLE_REQUEST 0x0008 +#define SURVEILLANCE_INDICATOR 0x2328 /* 9000 */ +#define DR_ACTION 0x2329 /* 9001 */ +#define DR_INDICATOR 0x232a /* 9002 */ +/* 9003 - 9004: Vendor specific */ +/* 9006 - 9999: Vendor specific */ + +/* other */ +#define MAX_SENSORS 17 /* I only know of 17 sensors */ +#define MAX_LINELENGTH 256 +#define SENSOR_PREFIX "ibm,sensor-" +#define cel_to_fahr(x) ((x*9/5)+32) + + +/* Globals */ +static struct rtas_sensors sensors; +static struct device_node *rtas_node = NULL; +static unsigned long power_on_time = 0; /* Save the time the user set */ +static char progress_led[MAX_LINELENGTH]; + +static unsigned long rtas_tone_frequency = 1000; +static unsigned long rtas_tone_volume = 0; + +/* ****************STRUCTS******************************************* */ +struct individual_sensor { + unsigned int token; + unsigned int quant; +}; + +struct rtas_sensors { + struct individual_sensor sensor[MAX_SENSORS]; + unsigned int quant; +}; + +/* ****************************************************************** */ +/* Declarations */ +static int ppc_rtas_sensors_show(struct seq_file *m, void *v); +static int ppc_rtas_clock_show(struct seq_file *m, void *v); +static ssize_t ppc_rtas_clock_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); +static int ppc_rtas_progress_show(struct seq_file *m, void *v); +static ssize_t ppc_rtas_progress_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); +static int ppc_rtas_poweron_show(struct seq_file *m, void *v); +static ssize_t ppc_rtas_poweron_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); + +static ssize_t ppc_rtas_tone_freq_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); +static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v); +static ssize_t ppc_rtas_tone_volume_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); +static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v); +static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v); + +static int sensors_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_sensors_show, NULL); +} + +struct file_operations ppc_rtas_sensors_operations = { + .open = sensors_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int poweron_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_poweron_show, NULL); +} + +struct file_operations ppc_rtas_poweron_operations = { + .open = poweron_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_poweron_write, + .release = single_release, +}; + +static int progress_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_progress_show, NULL); +} + +struct file_operations ppc_rtas_progress_operations = { + .open = progress_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_progress_write, + .release = single_release, +}; + +static int clock_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_clock_show, NULL); +} + +struct file_operations ppc_rtas_clock_operations = { + .open = clock_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_clock_write, + .release = single_release, +}; + +static int tone_freq_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_tone_freq_show, NULL); +} + +struct file_operations ppc_rtas_tone_freq_operations = { + .open = tone_freq_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_tone_freq_write, + .release = single_release, +}; + +static int tone_volume_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_tone_volume_show, NULL); +} + +struct file_operations ppc_rtas_tone_volume_operations = { + .open = tone_volume_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_tone_volume_write, + .release = single_release, +}; + +static int rmo_buf_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_rmo_buf_show, NULL); +} + +struct file_operations ppc_rtas_rmo_buf_ops = { + .open = rmo_buf_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ppc_rtas_find_all_sensors(void); +static void ppc_rtas_process_sensor(struct seq_file *m, + struct individual_sensor *s, int state, int error, char *loc); +static char *ppc_rtas_process_error(int error); +static void get_location_code(struct seq_file *m, + struct individual_sensor *s, char *loc); +static void check_location_string(struct seq_file *m, char *c); +static void check_location(struct seq_file *m, char *c); + +static int __init proc_rtas_init(void) +{ + struct proc_dir_entry *entry; + + if (!(systemcfg->platform & PLATFORM_PSERIES)) + return 1; + + rtas_node = of_find_node_by_name(NULL, "rtas"); + if (rtas_node == NULL) + return 1; + + entry = create_proc_entry("ppc64/rtas/progress", S_IRUGO|S_IWUSR, NULL); + if (entry) + entry->proc_fops = &ppc_rtas_progress_operations; + + entry = create_proc_entry("ppc64/rtas/clock", S_IRUGO|S_IWUSR, NULL); + if (entry) + entry->proc_fops = &ppc_rtas_clock_operations; + + entry = create_proc_entry("ppc64/rtas/poweron", S_IWUSR|S_IRUGO, NULL); + if (entry) + entry->proc_fops = &ppc_rtas_poweron_operations; + + entry = create_proc_entry("ppc64/rtas/sensors", S_IRUGO, NULL); + if (entry) + entry->proc_fops = &ppc_rtas_sensors_operations; + + entry = create_proc_entry("ppc64/rtas/frequency", S_IWUSR|S_IRUGO, + NULL); + if (entry) + entry->proc_fops = &ppc_rtas_tone_freq_operations; + + entry = create_proc_entry("ppc64/rtas/volume", S_IWUSR|S_IRUGO, NULL); + if (entry) + entry->proc_fops = &ppc_rtas_tone_volume_operations; + + entry = create_proc_entry("ppc64/rtas/rmo_buffer", S_IRUSR, NULL); + if (entry) + entry->proc_fops = &ppc_rtas_rmo_buf_ops; + + return 0; +} + +__initcall(proc_rtas_init); + +static int parse_number(const char __user *p, size_t count, unsigned long *val) +{ + char buf[40]; + char *end; + + if (count > 39) + return -EINVAL; + + if (copy_from_user(buf, p, count)) + return -EFAULT; + + buf[count] = 0; + + *val = simple_strtoul(buf, &end, 10); + if (*end && *end != '\n') + return -EINVAL; + + return 0; +} + +/* ****************************************************************** */ +/* POWER-ON-TIME */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_poweron_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct rtc_time tm; + unsigned long nowtime; + int error = parse_number(buf, count, &nowtime); + if (error) + return error; + + power_on_time = nowtime; /* save the time */ + + to_tm(nowtime, &tm); + + error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL, + tm.tm_year, tm.tm_mon, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); + if (error) + printk(KERN_WARNING "error: setting poweron time returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static int ppc_rtas_poweron_show(struct seq_file *m, void *v) +{ + if (power_on_time == 0) + seq_printf(m, "Power on time not set\n"); + else + seq_printf(m, "%lu\n",power_on_time); + return 0; +} + +/* ****************************************************************** */ +/* PROGRESS */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_progress_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + unsigned long hex; + + if (count >= MAX_LINELENGTH) + count = MAX_LINELENGTH -1; + if (copy_from_user(progress_led, buf, count)) { /* save the string */ + return -EFAULT; + } + progress_led[count] = 0; + + /* Lets see if the user passed hexdigits */ + hex = simple_strtoul(progress_led, NULL, 10); + + ppc_md.progress ((char *)progress_led, hex); + return count; + + /* clear the line */ + /* ppc_md.progress(" ", 0xffff);*/ +} +/* ****************************************************************** */ +static int ppc_rtas_progress_show(struct seq_file *m, void *v) +{ + if (progress_led) + seq_printf(m, "%s\n", progress_led); + return 0; +} + +/* ****************************************************************** */ +/* CLOCK */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_clock_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct rtc_time tm; + unsigned long nowtime; + int error = parse_number(buf, count, &nowtime); + if (error) + return error; + + to_tm(nowtime, &tm); + error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, + tm.tm_year, tm.tm_mon, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0); + if (error) + printk(KERN_WARNING "error: setting the clock returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static int ppc_rtas_clock_show(struct seq_file *m, void *v) +{ + int ret[8]; + int error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + + if (error) { + printk(KERN_WARNING "error: reading the clock returned: %s\n", + ppc_rtas_process_error(error)); + seq_printf(m, "0"); + } else { + unsigned int year, mon, day, hour, min, sec; + year = ret[0]; mon = ret[1]; day = ret[2]; + hour = ret[3]; min = ret[4]; sec = ret[5]; + seq_printf(m, "%lu\n", + mktime(year, mon, day, hour, min, sec)); + } + return 0; +} + +/* ****************************************************************** */ +/* SENSOR STUFF */ +/* ****************************************************************** */ +static int ppc_rtas_sensors_show(struct seq_file *m, void *v) +{ + int i,j; + int state, error; + int get_sensor_state = rtas_token("get-sensor-state"); + + seq_printf(m, "RTAS (RunTime Abstraction Services) Sensor Information\n"); + seq_printf(m, "Sensor\t\tValue\t\tCondition\tLocation\n"); + seq_printf(m, "********************************************************\n"); + + if (ppc_rtas_find_all_sensors() != 0) { + seq_printf(m, "\nNo sensors are available\n"); + return 0; + } + + for (i=0; itoken); + loc = (char *) get_property(rtas_node, rstr, &llen); + + /* A sensor may have multiple instances */ + for (j = 0, offs = 0; j <= p->quant; j++) { + error = rtas_call(get_sensor_state, 2, 2, &state, + p->token, j); + + ppc_rtas_process_sensor(m, p, state, error, loc); + seq_putc(m, '\n'); + if (loc) { + offs += strlen(loc) + 1; + loc += strlen(loc) + 1; + if (offs >= llen) + loc = NULL; + } + } + } + return 0; +} + +/* ****************************************************************** */ + +static int ppc_rtas_find_all_sensors(void) +{ + unsigned int *utmp; + int len, i; + + utmp = (unsigned int *) get_property(rtas_node, "rtas-sensors", &len); + if (utmp == NULL) { + printk (KERN_ERR "error: could not get rtas-sensors\n"); + return 1; + } + + sensors.quant = len / 8; /* int + int */ + + for (i=0; itoken) { + case KEY_SWITCH: + seq_printf(m, "Key switch:\t"); + num_states = sizeof(key_switch) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", key_switch[state]); + have_strings = 1; + } + break; + case ENCLOSURE_SWITCH: + seq_printf(m, "Enclosure switch:\t"); + num_states = sizeof(enclosure_switch) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + enclosure_switch[state]); + have_strings = 1; + } + break; + case THERMAL_SENSOR: + seq_printf(m, "Temp. (C/F):\t"); + temperature = 1; + break; + case LID_STATUS: + seq_printf(m, "Lid status:\t"); + num_states = sizeof(lid_status) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", lid_status[state]); + have_strings = 1; + } + break; + case POWER_SOURCE: + seq_printf(m, "Power source:\t"); + num_states = sizeof(power_source) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + power_source[state]); + have_strings = 1; + } + break; + case BATTERY_VOLTAGE: + seq_printf(m, "Battery voltage:\t"); + break; + case BATTERY_REMAINING: + seq_printf(m, "Battery remaining:\t"); + num_states = sizeof(battery_remaining) / sizeof(char *); + if (state < num_states) + { + seq_printf(m, "%s\t", + battery_remaining[state]); + have_strings = 1; + } + break; + case BATTERY_PERCENTAGE: + seq_printf(m, "Battery percentage:\t"); + break; + case EPOW_SENSOR: + seq_printf(m, "EPOW Sensor:\t"); + num_states = sizeof(epow_sensor) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", epow_sensor[state]); + have_strings = 1; + } + break; + case BATTERY_CYCLESTATE: + seq_printf(m, "Battery cyclestate:\t"); + num_states = sizeof(battery_cyclestate) / + sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + battery_cyclestate[state]); + have_strings = 1; + } + break; + case BATTERY_CHARGING: + seq_printf(m, "Battery Charging:\t"); + num_states = sizeof(battery_charging) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + battery_charging[state]); + have_strings = 1; + } + break; + case IBM_SURVEILLANCE: + seq_printf(m, "Surveillance:\t"); + break; + case IBM_FANRPM: + seq_printf(m, "Fan (rpm):\t"); + break; + case IBM_VOLTAGE: + seq_printf(m, "Voltage (mv):\t"); + break; + case IBM_DRCONNECTOR: + seq_printf(m, "DR connector:\t"); + num_states = sizeof(ibm_drconnector) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + ibm_drconnector[state]); + have_strings = 1; + } + break; + case IBM_POWERSUPPLY: + seq_printf(m, "Powersupply:\t"); + break; + default: + seq_printf(m, "Unknown sensor (type %d), ignoring it\n", + s->token); + unknown = 1; + have_strings = 1; + break; + } + if (have_strings == 0) { + if (temperature) { + seq_printf(m, "%4d /%4d\t", state, cel_to_fahr(state)); + } else + seq_printf(m, "%10d\t", state); + } + if (unknown == 0) { + seq_printf(m, "%s\t", ppc_rtas_process_error(error)); + get_location_code(m, s, loc); + } +} + +/* ****************************************************************** */ + +static void check_location(struct seq_file *m, char *c) +{ + switch (c[0]) { + case LOC_PLANAR: + seq_printf(m, "Planar #%c", c[1]); + break; + case LOC_CPU: + seq_printf(m, "CPU #%c", c[1]); + break; + case LOC_FAN: + seq_printf(m, "Fan #%c", c[1]); + break; + case LOC_RACKMOUNTED: + seq_printf(m, "Rack #%c", c[1]); + break; + case LOC_VOLTAGE: + seq_printf(m, "Voltage #%c", c[1]); + break; + case LOC_LCD: + seq_printf(m, "LCD #%c", c[1]); + break; + case '.': + seq_printf(m, "- %c", c[1]); + break; + default: + seq_printf(m, "Unknown location"); + break; + } +} + + +/* ****************************************************************** */ +/* + * Format: + * ${LETTER}${NUMBER}[[-/]${LETTER}${NUMBER} [ ... ] ] + * the '.' may be an abbrevation + */ +static void check_location_string(struct seq_file *m, char *c) +{ + while (*c) { + if (isalpha(*c) || *c == '.') + check_location(m, c); + else if (*c == '/' || *c == '-') + seq_printf(m, " at "); + c++; + } +} + + +/* ****************************************************************** */ + +static void get_location_code(struct seq_file *m, struct individual_sensor *s, char *loc) +{ + if (!loc || !*loc) { + seq_printf(m, "---");/* does not have a location */ + } else { + check_location_string(m, loc); + } + seq_putc(m, ' '); +} +/* ****************************************************************** */ +/* INDICATORS - Tone Frequency */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_tone_freq_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + unsigned long freq; + int error = parse_number(buf, count, &freq); + if (error) + return error; + + rtas_tone_frequency = freq; /* save it for later */ + error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, + TONE_FREQUENCY, 0, freq); + if (error) + printk(KERN_WARNING "error: setting tone frequency returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%lu\n", rtas_tone_frequency); + return 0; +} +/* ****************************************************************** */ +/* INDICATORS - Tone Volume */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_tone_volume_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + unsigned long volume; + int error = parse_number(buf, count, &volume); + if (error) + return error; + + if (volume > 100) + volume = 100; + + rtas_tone_volume = volume; /* save it for later */ + error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, + TONE_VOLUME, 0, volume); + if (error) + printk(KERN_WARNING "error: setting tone volume returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%lu\n", rtas_tone_volume); + return 0; +} + +#define RMO_READ_BUF_MAX 30 + +/* RTAS Userspace access */ +static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_RMOBUF_MAX); + return 0; +} diff --git a/arch/ppc64/kernel/rtas.c b/arch/ppc64/kernel/rtas.c new file mode 100644 index 00000000000..5575603def2 --- /dev/null +++ b/arch/ppc64/kernel/rtas.c @@ -0,0 +1,657 @@ +/* + * + * Procedures for interfacing to the RTAS on CHRP machines. + * + * Peter Bergner, IBM March 2001. + * Copyright (C) 2001 IBM. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct flash_block_list_header rtas_firmware_flash_list = {0, NULL}; + +struct rtas_t rtas = { + .lock = SPIN_LOCK_UNLOCKED +}; + +EXPORT_SYMBOL(rtas); + +char rtas_err_buf[RTAS_ERROR_LOG_MAX]; + +DEFINE_SPINLOCK(rtas_data_buf_lock); +char rtas_data_buf[RTAS_DATA_BUF_SIZE]__page_aligned; +unsigned long rtas_rmo_buf; + +void +call_rtas_display_status(unsigned char c) +{ + struct rtas_args *args = &rtas.args; + unsigned long s; + + if (!rtas.base) + return; + spin_lock_irqsave(&rtas.lock, s); + + args->token = 10; + args->nargs = 1; + args->nret = 1; + args->rets = (rtas_arg_t *)&(args->args[1]); + args->args[0] = (int)c; + + enter_rtas(__pa(args)); + + spin_unlock_irqrestore(&rtas.lock, s); +} + +void +call_rtas_display_status_delay(unsigned char c) +{ + static int pending_newline = 0; /* did last write end with unprinted newline? */ + static int width = 16; + + if (c == '\n') { + while (width-- > 0) + call_rtas_display_status(' '); + width = 16; + udelay(500000); + pending_newline = 1; + } else { + if (pending_newline) { + call_rtas_display_status('\r'); + call_rtas_display_status('\n'); + } + pending_newline = 0; + if (width--) { + call_rtas_display_status(c); + udelay(10000); + } + } +} + +int +rtas_token(const char *service) +{ + int *tokp; + if (rtas.dev == NULL) { + PPCDBG(PPCDBG_RTAS,"\tNo rtas device in device-tree...\n"); + return RTAS_UNKNOWN_SERVICE; + } + tokp = (int *) get_property(rtas.dev, service, NULL); + return tokp ? *tokp : RTAS_UNKNOWN_SERVICE; +} + +/* + * Return the firmware-specified size of the error log buffer + * for all rtas calls that require an error buffer argument. + * This includes 'check-exception' and 'rtas-last-error'. + */ +int rtas_get_error_log_max(void) +{ + static int rtas_error_log_max; + if (rtas_error_log_max) + return rtas_error_log_max; + + rtas_error_log_max = rtas_token ("rtas-error-log-max"); + if ((rtas_error_log_max == RTAS_UNKNOWN_SERVICE) || + (rtas_error_log_max > RTAS_ERROR_LOG_MAX)) { + printk (KERN_WARNING "RTAS: bad log buffer size %d\n", rtas_error_log_max); + rtas_error_log_max = RTAS_ERROR_LOG_MAX; + } + return rtas_error_log_max; +} + + +/** Return a copy of the detailed error text associated with the + * most recent failed call to rtas. Because the error text + * might go stale if there are any other intervening rtas calls, + * this routine must be called atomically with whatever produced + * the error (i.e. with rtas.lock still held from the previous call). + */ +static int +__fetch_rtas_last_error(void) +{ + struct rtas_args err_args, save_args; + u32 bufsz; + + bufsz = rtas_get_error_log_max(); + + err_args.token = rtas_token("rtas-last-error"); + err_args.nargs = 2; + err_args.nret = 1; + + err_args.args[0] = (rtas_arg_t)__pa(rtas_err_buf); + err_args.args[1] = bufsz; + err_args.args[2] = 0; + + save_args = rtas.args; + rtas.args = err_args; + + enter_rtas(__pa(&rtas.args)); + + err_args = rtas.args; + rtas.args = save_args; + + return err_args.args[2]; +} + +int rtas_call(int token, int nargs, int nret, int *outputs, ...) +{ + va_list list; + int i, logit = 0; + unsigned long s; + struct rtas_args *rtas_args; + char * buff_copy = NULL; + int ret; + + PPCDBG(PPCDBG_RTAS, "Entering rtas_call\n"); + PPCDBG(PPCDBG_RTAS, "\ttoken = 0x%x\n", token); + PPCDBG(PPCDBG_RTAS, "\tnargs = %d\n", nargs); + PPCDBG(PPCDBG_RTAS, "\tnret = %d\n", nret); + PPCDBG(PPCDBG_RTAS, "\t&outputs = 0x%lx\n", outputs); + if (token == RTAS_UNKNOWN_SERVICE) + return -1; + + /* Gotta do something different here, use global lock for now... */ + spin_lock_irqsave(&rtas.lock, s); + rtas_args = &rtas.args; + + rtas_args->token = token; + rtas_args->nargs = nargs; + rtas_args->nret = nret; + rtas_args->rets = (rtas_arg_t *)&(rtas_args->args[nargs]); + va_start(list, outputs); + for (i = 0; i < nargs; ++i) { + rtas_args->args[i] = va_arg(list, rtas_arg_t); + PPCDBG(PPCDBG_RTAS, "\tnarg[%d] = 0x%x\n", i, rtas_args->args[i]); + } + va_end(list); + + for (i = 0; i < nret; ++i) + rtas_args->rets[i] = 0; + + PPCDBG(PPCDBG_RTAS, "\tentering rtas with 0x%lx\n", + __pa(rtas_args)); + enter_rtas(__pa(rtas_args)); + PPCDBG(PPCDBG_RTAS, "\treturned from rtas ...\n"); + + /* A -1 return code indicates that the last command couldn't + be completed due to a hardware error. */ + if (rtas_args->rets[0] == -1) + logit = (__fetch_rtas_last_error() == 0); + + ifppcdebug(PPCDBG_RTAS) { + for(i=0; i < nret ;i++) + udbg_printf("\tnret[%d] = 0x%lx\n", i, (ulong)rtas_args->rets[i]); + } + + if (nret > 1 && outputs != NULL) + for (i = 0; i < nret-1; ++i) + outputs[i] = rtas_args->rets[i+1]; + ret = (nret > 0)? rtas_args->rets[0]: 0; + + /* Log the error in the unlikely case that there was one. */ + if (unlikely(logit)) { + buff_copy = rtas_err_buf; + if (mem_init_done) { + buff_copy = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC); + if (buff_copy) + memcpy(buff_copy, rtas_err_buf, + RTAS_ERROR_LOG_MAX); + } + } + + /* Gotta do something different here, use global lock for now... */ + spin_unlock_irqrestore(&rtas.lock, s); + + if (buff_copy) { + log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0); + if (mem_init_done) + kfree(buff_copy); + } + return ret; +} + +/* Given an RTAS status code of 990n compute the hinted delay of 10^n + * (last digit) milliseconds. For now we bound at n=5 (100 sec). + */ +unsigned int +rtas_extended_busy_delay_time(int status) +{ + int order = status - 9900; + unsigned long ms; + + if (order < 0) + order = 0; /* RTC depends on this for -2 clock busy */ + else if (order > 5) + order = 5; /* bound */ + + /* Use microseconds for reasonable accuracy */ + for (ms=1; order > 0; order--) + ms *= 10; + + return ms; +} + +int rtas_error_rc(int rtas_rc) +{ + int rc; + + switch (rtas_rc) { + case -1: /* Hardware Error */ + rc = -EIO; + break; + case -3: /* Bad indicator/domain/etc */ + rc = -EINVAL; + break; + case -9000: /* Isolation error */ + rc = -EFAULT; + break; + case -9001: /* Outstanding TCE/PTE */ + rc = -EEXIST; + break; + case -9002: /* No usable slot */ + rc = -ENODEV; + break; + default: + printk(KERN_ERR "%s: unexpected RTAS error %d\n", + __FUNCTION__, rtas_rc); + rc = -ERANGE; + break; + } + return rc; +} + +int rtas_get_power_level(int powerdomain, int *level) +{ + int token = rtas_token("get-power-level"); + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + while ((rc = rtas_call(token, 1, 2, level, powerdomain)) == RTAS_BUSY) + udelay(1); + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} + +int rtas_set_power_level(int powerdomain, int level, int *setlevel) +{ + int token = rtas_token("set-power-level"); + unsigned int wait_time; + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + while (1) { + rc = rtas_call(token, 2, 2, setlevel, powerdomain, level); + if (rc == RTAS_BUSY) + udelay(1); + else if (rtas_is_extended_busy(rc)) { + wait_time = rtas_extended_busy_delay_time(rc); + udelay(wait_time * 1000); + } else + break; + } + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} + +int rtas_get_sensor(int sensor, int index, int *state) +{ + int token = rtas_token("get-sensor-state"); + unsigned int wait_time; + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + while (1) { + rc = rtas_call(token, 2, 2, state, sensor, index); + if (rc == RTAS_BUSY) + udelay(1); + else if (rtas_is_extended_busy(rc)) { + wait_time = rtas_extended_busy_delay_time(rc); + udelay(wait_time * 1000); + } else + break; + } + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} + +int rtas_set_indicator(int indicator, int index, int new_value) +{ + int token = rtas_token("set-indicator"); + unsigned int wait_time; + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + while (1) { + rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value); + if (rc == RTAS_BUSY) + udelay(1); + else if (rtas_is_extended_busy(rc)) { + wait_time = rtas_extended_busy_delay_time(rc); + udelay(wait_time * 1000); + } + else + break; + } + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} + +#define FLASH_BLOCK_LIST_VERSION (1UL) +static void +rtas_flash_firmware(void) +{ + unsigned long image_size; + struct flash_block_list *f, *next, *flist; + unsigned long rtas_block_list; + int i, status, update_token; + + update_token = rtas_token("ibm,update-flash-64-and-reboot"); + if (update_token == RTAS_UNKNOWN_SERVICE) { + printk(KERN_ALERT "FLASH: ibm,update-flash-64-and-reboot is not available -- not a service partition?\n"); + printk(KERN_ALERT "FLASH: firmware will not be flashed\n"); + return; + } + + /* NOTE: the "first" block list is a global var with no data + * blocks in the kernel data segment. We do this because + * we want to ensure this block_list addr is under 4GB. + */ + rtas_firmware_flash_list.num_blocks = 0; + flist = (struct flash_block_list *)&rtas_firmware_flash_list; + rtas_block_list = virt_to_abs(flist); + if (rtas_block_list >= 4UL*1024*1024*1024) { + printk(KERN_ALERT "FLASH: kernel bug...flash list header addr above 4GB\n"); + return; + } + + printk(KERN_ALERT "FLASH: preparing saved firmware image for flash\n"); + /* Update the block_list in place. */ + image_size = 0; + for (f = flist; f; f = next) { + /* Translate data addrs to absolute */ + for (i = 0; i < f->num_blocks; i++) { + f->blocks[i].data = (char *)virt_to_abs(f->blocks[i].data); + image_size += f->blocks[i].length; + } + next = f->next; + /* Don't translate NULL pointer for last entry */ + if (f->next) + f->next = (struct flash_block_list *)virt_to_abs(f->next); + else + f->next = NULL; + /* make num_blocks into the version/length field */ + f->num_blocks = (FLASH_BLOCK_LIST_VERSION << 56) | ((f->num_blocks+1)*16); + } + + printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size); + printk(KERN_ALERT "FLASH: performing flash and reboot\n"); + ppc_md.progress("Flashing \n", 0x0); + ppc_md.progress("Please Wait... ", 0x0); + printk(KERN_ALERT "FLASH: this will take several minutes. Do not power off!\n"); + status = rtas_call(update_token, 1, 1, NULL, rtas_block_list); + switch (status) { /* should only get "bad" status */ + case 0: + printk(KERN_ALERT "FLASH: success\n"); + break; + case -1: + printk(KERN_ALERT "FLASH: hardware error. Firmware may not be not flashed\n"); + break; + case -3: + printk(KERN_ALERT "FLASH: image is corrupt or not correct for this platform. Firmware not flashed\n"); + break; + case -4: + printk(KERN_ALERT "FLASH: flash failed when partially complete. System may not reboot\n"); + break; + default: + printk(KERN_ALERT "FLASH: unknown flash return code %d\n", status); + break; + } +} + +void rtas_flash_bypass_warning(void) +{ + printk(KERN_ALERT "FLASH: firmware flash requires a reboot\n"); + printk(KERN_ALERT "FLASH: the firmware image will NOT be flashed\n"); +} + + +void +rtas_restart(char *cmd) +{ + if (rtas_firmware_flash_list.next) + rtas_flash_firmware(); + + printk("RTAS system-reboot returned %d\n", + rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); + for (;;); +} + +void +rtas_power_off(void) +{ + if (rtas_firmware_flash_list.next) + rtas_flash_bypass_warning(); + /* allow power on only with power button press */ + printk("RTAS power-off returned %d\n", + rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + for (;;); +} + +void +rtas_halt(void) +{ + if (rtas_firmware_flash_list.next) + rtas_flash_bypass_warning(); + rtas_power_off(); +} + +/* Must be in the RMO region, so we place it here */ +static char rtas_os_term_buf[2048]; + +void rtas_os_term(char *str) +{ + int status; + + if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term")) + return; + + snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); + + do { + status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL, + __pa(rtas_os_term_buf)); + + if (status == RTAS_BUSY) + udelay(1); + else if (status != 0) + printk(KERN_EMERG "ibm,os-term call failed %d\n", + status); + } while (status == RTAS_BUSY); +} + + +asmlinkage int ppc_rtas(struct rtas_args __user *uargs) +{ + struct rtas_args args; + unsigned long flags; + char * buff_copy; + int nargs; + int err_rc = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0) + return -EFAULT; + + nargs = args.nargs; + if (nargs > ARRAY_SIZE(args.args) + || args.nret > ARRAY_SIZE(args.args) + || nargs + args.nret > ARRAY_SIZE(args.args)) + return -EINVAL; + + /* Copy in args. */ + if (copy_from_user(args.args, uargs->args, + nargs * sizeof(rtas_arg_t)) != 0) + return -EFAULT; + + buff_copy = kmalloc(RTAS_ERROR_LOG_MAX, GFP_KERNEL); + + spin_lock_irqsave(&rtas.lock, flags); + + rtas.args = args; + enter_rtas(__pa(&rtas.args)); + args = rtas.args; + + args.rets = &args.args[nargs]; + + /* A -1 return code indicates that the last command couldn't + be completed due to a hardware error. */ + if (args.rets[0] == -1) { + err_rc = __fetch_rtas_last_error(); + if ((err_rc == 0) && buff_copy) { + memcpy(buff_copy, rtas_err_buf, RTAS_ERROR_LOG_MAX); + } + } + + spin_unlock_irqrestore(&rtas.lock, flags); + + if (buff_copy) { + if ((args.rets[0] == -1) && (err_rc == 0)) { + log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0); + } + kfree(buff_copy); + } + + /* Copy out args. */ + if (copy_to_user(uargs->args + nargs, + args.args + nargs, + args.nret * sizeof(rtas_arg_t)) != 0) + return -EFAULT; + + return 0; +} + +/* This version can't take the spinlock, because it never returns */ + +struct rtas_args rtas_stop_self_args = { + /* The token is initialized for real in setup_system() */ + .token = RTAS_UNKNOWN_SERVICE, + .nargs = 0, + .nret = 1, + .rets = &rtas_stop_self_args.args[0], +}; + +void rtas_stop_self(void) +{ + struct rtas_args *rtas_args = &rtas_stop_self_args; + + local_irq_disable(); + + BUG_ON(rtas_args->token == RTAS_UNKNOWN_SERVICE); + + printk("cpu %u (hwid %u) Ready to die...\n", + smp_processor_id(), hard_smp_processor_id()); + enter_rtas(__pa(rtas_args)); + + panic("Alas, I survived.\n"); +} + +/* + * Call early during boot, before mem init or bootmem, to retreive the RTAS + * informations from the device-tree and allocate the RMO buffer for userland + * accesses. + */ +void __init rtas_initialize(void) +{ + /* Get RTAS dev node and fill up our "rtas" structure with infos + * about it. + */ + rtas.dev = of_find_node_by_name(NULL, "rtas"); + if (rtas.dev) { + u32 *basep, *entryp; + u32 *sizep; + + basep = (u32 *)get_property(rtas.dev, "linux,rtas-base", NULL); + sizep = (u32 *)get_property(rtas.dev, "rtas-size", NULL); + if (basep != NULL && sizep != NULL) { + rtas.base = *basep; + rtas.size = *sizep; + entryp = (u32 *)get_property(rtas.dev, "linux,rtas-entry", NULL); + if (entryp == NULL) /* Ugh */ + rtas.entry = rtas.base; + else + rtas.entry = *entryp; + } else + rtas.dev = NULL; + } + /* If RTAS was found, allocate the RMO buffer for it and look for + * the stop-self token if any + */ + if (rtas.dev) { + unsigned long rtas_region = RTAS_INSTANTIATE_MAX; + if (systemcfg->platform == PLATFORM_PSERIES_LPAR) + rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX); + + rtas_rmo_buf = lmb_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, + rtas_region); + +#ifdef CONFIG_HOTPLUG_CPU + rtas_stop_self_args.token = rtas_token("stop-self"); +#endif /* CONFIG_HOTPLUG_CPU */ + } + +} + + +EXPORT_SYMBOL(rtas_firmware_flash_list); +EXPORT_SYMBOL(rtas_token); +EXPORT_SYMBOL(rtas_call); +EXPORT_SYMBOL(rtas_data_buf); +EXPORT_SYMBOL(rtas_data_buf_lock); +EXPORT_SYMBOL(rtas_extended_busy_delay_time); +EXPORT_SYMBOL(rtas_get_sensor); +EXPORT_SYMBOL(rtas_get_power_level); +EXPORT_SYMBOL(rtas_set_power_level); +EXPORT_SYMBOL(rtas_set_indicator); +EXPORT_SYMBOL(rtas_get_error_log_max); diff --git a/arch/ppc64/kernel/rtas_flash.c b/arch/ppc64/kernel/rtas_flash.c new file mode 100644 index 00000000000..3213837282c --- /dev/null +++ b/arch/ppc64/kernel/rtas_flash.c @@ -0,0 +1,725 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * /proc/ppc64/rtas/firmware_flash interface + * + * This file implements a firmware_flash interface to pump a firmware + * image into the kernel. At reboot time rtas_restart() will see the + * firmware image and flash it as it reboots (see rtas.c). + */ + +#include +#include +#include +#include +#include +#include + +#define MODULE_VERS "1.0" +#define MODULE_NAME "rtas_flash" + +#define FIRMWARE_FLASH_NAME "firmware_flash" +#define FIRMWARE_UPDATE_NAME "firmware_update" +#define MANAGE_FLASH_NAME "manage_flash" +#define VALIDATE_FLASH_NAME "validate_flash" + +/* General RTAS Status Codes */ +#define RTAS_RC_SUCCESS 0 +#define RTAS_RC_HW_ERR -1 +#define RTAS_RC_BUSY -2 + +/* Flash image status values */ +#define FLASH_AUTH -9002 /* RTAS Not Service Authority Partition */ +#define FLASH_NO_OP -1099 /* No operation initiated by user */ +#define FLASH_IMG_SHORT -1005 /* Flash image shorter than expected */ +#define FLASH_IMG_BAD_LEN -1004 /* Bad length value in flash list block */ +#define FLASH_IMG_NULL_DATA -1003 /* Bad data value in flash list block */ +#define FLASH_IMG_READY 0 /* Firmware img ready for flash on reboot */ + +/* Manage image status values */ +#define MANAGE_AUTH -9002 /* RTAS Not Service Authority Partition */ +#define MANAGE_ACTIVE_ERR -9001 /* RTAS Cannot Overwrite Active Img */ +#define MANAGE_NO_OP -1099 /* No operation initiated by user */ +#define MANAGE_PARAM_ERR -3 /* RTAS Parameter Error */ +#define MANAGE_HW_ERR -1 /* RTAS Hardware Error */ + +/* Validate image status values */ +#define VALIDATE_AUTH -9002 /* RTAS Not Service Authority Partition */ +#define VALIDATE_NO_OP -1099 /* No operation initiated by the user */ +#define VALIDATE_INCOMPLETE -1002 /* User copied < VALIDATE_BUF_SIZE */ +#define VALIDATE_READY -1001 /* Firmware image ready for validation */ +#define VALIDATE_PARAM_ERR -3 /* RTAS Parameter Error */ +#define VALIDATE_HW_ERR -1 /* RTAS Hardware Error */ +#define VALIDATE_TMP_UPDATE 0 /* Validate Return Status */ +#define VALIDATE_FLASH_AUTH 1 /* Validate Return Status */ +#define VALIDATE_INVALID_IMG 2 /* Validate Return Status */ +#define VALIDATE_CUR_UNKNOWN 3 /* Validate Return Status */ +#define VALIDATE_TMP_COMMIT_DL 4 /* Validate Return Status */ +#define VALIDATE_TMP_COMMIT 5 /* Validate Return Status */ +#define VALIDATE_TMP_UPDATE_DL 6 /* Validate Return Status */ + +/* ibm,manage-flash-image operation tokens */ +#define RTAS_REJECT_TMP_IMG 0 +#define RTAS_COMMIT_TMP_IMG 1 + +/* Array sizes */ +#define VALIDATE_BUF_SIZE 4096 +#define RTAS_MSG_MAXLEN 64 + +/* Local copy of the flash block list. + * We only allow one open of the flash proc file and create this + * list as we go. This list will be put in the kernel's + * rtas_firmware_flash_list global var once it is fully read. + * + * For convenience as we build the list we use virtual addrs, + * we do not fill in the version number, and the length field + * is treated as the number of entries currently in the block + * (i.e. not a byte count). This is all fixed on release. + */ + +/* Status int must be first member of struct */ +struct rtas_update_flash_t +{ + int status; /* Flash update status */ + struct flash_block_list *flist; /* Local copy of flash block list */ +}; + +/* Status int must be first member of struct */ +struct rtas_manage_flash_t +{ + int status; /* Returned status */ + unsigned int op; /* Reject or commit image */ +}; + +/* Status int must be first member of struct */ +struct rtas_validate_flash_t +{ + int status; /* Returned status */ + char buf[VALIDATE_BUF_SIZE]; /* Candidate image buffer */ + unsigned int buf_size; /* Size of image buf */ + unsigned int update_results; /* Update results token */ +}; + +static DEFINE_SPINLOCK(flash_file_open_lock); +static struct proc_dir_entry *firmware_flash_pde; +static struct proc_dir_entry *firmware_update_pde; +static struct proc_dir_entry *validate_pde; +static struct proc_dir_entry *manage_pde; + +/* Do simple sanity checks on the flash image. */ +static int flash_list_valid(struct flash_block_list *flist) +{ + struct flash_block_list *f; + int i; + unsigned long block_size, image_size; + + /* Paranoid self test here. We also collect the image size. */ + image_size = 0; + for (f = flist; f; f = f->next) { + for (i = 0; i < f->num_blocks; i++) { + if (f->blocks[i].data == NULL) { + return FLASH_IMG_NULL_DATA; + } + block_size = f->blocks[i].length; + if (block_size <= 0 || block_size > PAGE_SIZE) { + return FLASH_IMG_BAD_LEN; + } + image_size += block_size; + } + } + + if (image_size < (256 << 10)) { + if (image_size < 2) + return FLASH_NO_OP; + } + + printk(KERN_INFO "FLASH: flash image with %ld bytes stored for hardware flash on reboot\n", image_size); + + return FLASH_IMG_READY; +} + +static void free_flash_list(struct flash_block_list *f) +{ + struct flash_block_list *next; + int i; + + while (f) { + for (i = 0; i < f->num_blocks; i++) + free_page((unsigned long)(f->blocks[i].data)); + next = f->next; + free_page((unsigned long)f); + f = next; + } +} + +static int rtas_flash_release(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_update_flash_t *uf; + + uf = (struct rtas_update_flash_t *) dp->data; + if (uf->flist) { + /* File was opened in write mode for a new flash attempt */ + /* Clear saved list */ + if (rtas_firmware_flash_list.next) { + free_flash_list(rtas_firmware_flash_list.next); + rtas_firmware_flash_list.next = NULL; + } + + if (uf->status != FLASH_AUTH) + uf->status = flash_list_valid(uf->flist); + + if (uf->status == FLASH_IMG_READY) + rtas_firmware_flash_list.next = uf->flist; + else + free_flash_list(uf->flist); + + uf->flist = NULL; + } + + atomic_dec(&dp->count); + return 0; +} + +static void get_flash_status_msg(int status, char *buf) +{ + char *msg; + + switch (status) { + case FLASH_AUTH: + msg = "error: this partition does not have service authority\n"; + break; + case FLASH_NO_OP: + msg = "info: no firmware image for flash\n"; + break; + case FLASH_IMG_SHORT: + msg = "error: flash image short\n"; + break; + case FLASH_IMG_BAD_LEN: + msg = "error: internal error bad length\n"; + break; + case FLASH_IMG_NULL_DATA: + msg = "error: internal error null data\n"; + break; + case FLASH_IMG_READY: + msg = "ready: firmware image ready for flash on reboot\n"; + break; + default: + sprintf(buf, "error: unexpected status value %d\n", status); + return; + } + + strcpy(buf, msg); +} + +/* Reading the proc file will show status (not the firmware contents) */ +static ssize_t rtas_flash_read(struct file *file, char *buf, + size_t count, loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_update_flash_t *uf; + char msg[RTAS_MSG_MAXLEN]; + int msglen; + + uf = (struct rtas_update_flash_t *) dp->data; + + if (!strcmp(dp->name, FIRMWARE_FLASH_NAME)) { + get_flash_status_msg(uf->status, msg); + } else { /* FIRMWARE_UPDATE_NAME */ + sprintf(msg, "%d\n", uf->status); + } + msglen = strlen(msg); + if (msglen > count) + msglen = count; + + if (ppos && *ppos != 0) + return 0; /* be cheap */ + + if (!access_ok(VERIFY_WRITE, buf, msglen)) + return -EINVAL; + + if (copy_to_user(buf, msg, msglen)) + return -EFAULT; + + if (ppos) + *ppos = msglen; + return msglen; +} + +/* We could be much more efficient here. But to keep this function + * simple we allocate a page to the block list no matter how small the + * count is. If the system is low on memory it will be just as well + * that we fail.... + */ +static ssize_t rtas_flash_write(struct file *file, const char *buffer, + size_t count, loff_t *off) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_update_flash_t *uf; + char *p; + int next_free; + struct flash_block_list *fl; + + uf = (struct rtas_update_flash_t *) dp->data; + + if (uf->status == FLASH_AUTH || count == 0) + return count; /* discard data */ + + /* In the case that the image is not ready for flashing, the memory + * allocated for the block list will be freed upon the release of the + * proc file + */ + if (uf->flist == NULL) { + uf->flist = (struct flash_block_list *) get_zeroed_page(GFP_KERNEL); + if (!uf->flist) + return -ENOMEM; + } + + fl = uf->flist; + while (fl->next) + fl = fl->next; /* seek to last block_list for append */ + next_free = fl->num_blocks; + if (next_free == FLASH_BLOCKS_PER_NODE) { + /* Need to allocate another block_list */ + fl->next = (struct flash_block_list *)get_zeroed_page(GFP_KERNEL); + if (!fl->next) + return -ENOMEM; + fl = fl->next; + next_free = 0; + } + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + p = (char *)get_zeroed_page(GFP_KERNEL); + if (!p) + return -ENOMEM; + + if(copy_from_user(p, buffer, count)) { + free_page((unsigned long)p); + return -EFAULT; + } + fl->blocks[next_free].data = p; + fl->blocks[next_free].length = count; + fl->num_blocks++; + + return count; +} + +static int rtas_excl_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(inode); + + /* Enforce exclusive open with use count of PDE */ + spin_lock(&flash_file_open_lock); + if (atomic_read(&dp->count) > 1) { + spin_unlock(&flash_file_open_lock); + return -EBUSY; + } + + atomic_inc(&dp->count); + spin_unlock(&flash_file_open_lock); + + return 0; +} + +static int rtas_excl_release(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(inode); + + atomic_dec(&dp->count); + + return 0; +} + +static void manage_flash(struct rtas_manage_flash_t *args_buf) +{ + unsigned int wait_time; + s32 rc; + + while (1) { + rc = rtas_call(rtas_token("ibm,manage-flash-image"), 1, + 1, NULL, args_buf->op); + if (rc == RTAS_RC_BUSY) + udelay(1); + else if (rtas_is_extended_busy(rc)) { + wait_time = rtas_extended_busy_delay_time(rc); + udelay(wait_time * 1000); + } else + break; + } + + args_buf->status = rc; +} + +static ssize_t manage_flash_read(struct file *file, char *buf, + size_t count, loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_manage_flash_t *args_buf; + char msg[RTAS_MSG_MAXLEN]; + int msglen; + + args_buf = (struct rtas_manage_flash_t *) dp->data; + if (args_buf == NULL) + return 0; + + msglen = sprintf(msg, "%d\n", args_buf->status); + if (msglen > count) + msglen = count; + + if (ppos && *ppos != 0) + return 0; /* be cheap */ + + if (!access_ok(VERIFY_WRITE, buf, msglen)) + return -EINVAL; + + if (copy_to_user(buf, msg, msglen)) + return -EFAULT; + + if (ppos) + *ppos = msglen; + return msglen; +} + +static ssize_t manage_flash_write(struct file *file, const char *buf, + size_t count, loff_t *off) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_manage_flash_t *args_buf; + const char reject_str[] = "0"; + const char commit_str[] = "1"; + char stkbuf[10]; + int op; + + args_buf = (struct rtas_manage_flash_t *) dp->data; + if ((args_buf->status == MANAGE_AUTH) || (count == 0)) + return count; + + op = -1; + if (buf) { + if (count > 9) count = 9; + if (copy_from_user (stkbuf, buf, count)) { + return -EFAULT; + } + if (strncmp(stkbuf, reject_str, strlen(reject_str)) == 0) + op = RTAS_REJECT_TMP_IMG; + else if (strncmp(stkbuf, commit_str, strlen(commit_str)) == 0) + op = RTAS_COMMIT_TMP_IMG; + } + + if (op == -1) /* buf is empty, or contains invalid string */ + return -EINVAL; + + args_buf->op = op; + manage_flash(args_buf); + + return count; +} + +static void validate_flash(struct rtas_validate_flash_t *args_buf) +{ + int token = rtas_token("ibm,validate-flash-image"); + unsigned int wait_time; + int update_results; + s32 rc; + + rc = 0; + while(1) { + spin_lock(&rtas_data_buf_lock); + memcpy(rtas_data_buf, args_buf->buf, VALIDATE_BUF_SIZE); + rc = rtas_call(token, 2, 2, &update_results, + (u32) __pa(rtas_data_buf), args_buf->buf_size); + memcpy(args_buf->buf, rtas_data_buf, VALIDATE_BUF_SIZE); + spin_unlock(&rtas_data_buf_lock); + + if (rc == RTAS_RC_BUSY) + udelay(1); + else if (rtas_is_extended_busy(rc)) { + wait_time = rtas_extended_busy_delay_time(rc); + udelay(wait_time * 1000); + } else + break; + } + + args_buf->status = rc; + args_buf->update_results = update_results; +} + +static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf, + char *msg) +{ + int n; + + if (args_buf->status >= VALIDATE_TMP_UPDATE) { + n = sprintf(msg, "%d\n", args_buf->update_results); + if ((args_buf->update_results >= VALIDATE_CUR_UNKNOWN) || + (args_buf->update_results == VALIDATE_TMP_UPDATE)) + n += sprintf(msg + n, "%s\n", args_buf->buf); + } else { + n = sprintf(msg, "%d\n", args_buf->status); + } + return n; +} + +static ssize_t validate_flash_read(struct file *file, char *buf, + size_t count, loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_validate_flash_t *args_buf; + char msg[RTAS_MSG_MAXLEN]; + int msglen; + + args_buf = (struct rtas_validate_flash_t *) dp->data; + + if (ppos && *ppos != 0) + return 0; /* be cheap */ + + msglen = get_validate_flash_msg(args_buf, msg); + if (msglen > count) + msglen = count; + + if (!access_ok(VERIFY_WRITE, buf, msglen)) + return -EINVAL; + + if (copy_to_user(buf, msg, msglen)) + return -EFAULT; + + if (ppos) + *ppos = msglen; + return msglen; +} + +static ssize_t validate_flash_write(struct file *file, const char *buf, + size_t count, loff_t *off) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_validate_flash_t *args_buf; + int rc; + + args_buf = (struct rtas_validate_flash_t *) dp->data; + + if (dp->data == NULL) { + dp->data = kmalloc(sizeof(struct rtas_validate_flash_t), + GFP_KERNEL); + if (dp->data == NULL) + return -ENOMEM; + } + + /* We are only interested in the first 4K of the + * candidate image */ + if ((*off >= VALIDATE_BUF_SIZE) || + (args_buf->status == VALIDATE_AUTH)) { + *off += count; + return count; + } + + if (*off + count >= VALIDATE_BUF_SIZE) { + count = VALIDATE_BUF_SIZE - *off; + args_buf->status = VALIDATE_READY; + } else { + args_buf->status = VALIDATE_INCOMPLETE; + } + + if (!access_ok(VERIFY_READ, buf, count)) { + rc = -EFAULT; + goto done; + } + if (copy_from_user(args_buf->buf + *off, buf, count)) { + rc = -EFAULT; + goto done; + } + + *off += count; + rc = count; +done: + if (rc < 0) { + kfree(dp->data); + dp->data = NULL; + } + return rc; +} + +static int validate_flash_release(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_validate_flash_t *args_buf; + + args_buf = (struct rtas_validate_flash_t *) dp->data; + + if (args_buf->status == VALIDATE_READY) { + args_buf->buf_size = VALIDATE_BUF_SIZE; + validate_flash(args_buf); + } + + /* The matching atomic_inc was in rtas_excl_open() */ + atomic_dec(&dp->count); + + return 0; +} + +static void remove_flash_pde(struct proc_dir_entry *dp) +{ + if (dp) { + if (dp->data != NULL) + kfree(dp->data); + dp->owner = NULL; + remove_proc_entry(dp->name, dp->parent); + } +} + +static int initialize_flash_pde_data(const char *rtas_call_name, + size_t buf_size, + struct proc_dir_entry *dp) +{ + int *status; + int token; + + dp->data = kmalloc(buf_size, GFP_KERNEL); + if (dp->data == NULL) { + remove_flash_pde(dp); + return -ENOMEM; + } + + memset(dp->data, 0, buf_size); + + /* + * This code assumes that the status int is the first member of the + * struct + */ + status = (int *) dp->data; + token = rtas_token(rtas_call_name); + if (token == RTAS_UNKNOWN_SERVICE) + *status = FLASH_AUTH; + else + *status = FLASH_NO_OP; + + return 0; +} + +static struct proc_dir_entry *create_flash_pde(const char *filename, + struct file_operations *fops) +{ + struct proc_dir_entry *ent = NULL; + + ent = create_proc_entry(filename, S_IRUSR | S_IWUSR, NULL); + if (ent != NULL) { + ent->nlink = 1; + ent->proc_fops = fops; + ent->owner = THIS_MODULE; + } + + return ent; +} + +static struct file_operations rtas_flash_operations = { + .read = rtas_flash_read, + .write = rtas_flash_write, + .open = rtas_excl_open, + .release = rtas_flash_release, +}; + +static struct file_operations manage_flash_operations = { + .read = manage_flash_read, + .write = manage_flash_write, + .open = rtas_excl_open, + .release = rtas_excl_release, +}; + +static struct file_operations validate_flash_operations = { + .read = validate_flash_read, + .write = validate_flash_write, + .open = rtas_excl_open, + .release = validate_flash_release, +}; + +int __init rtas_flash_init(void) +{ + int rc; + + if (rtas_token("ibm,update-flash-64-and-reboot") == + RTAS_UNKNOWN_SERVICE) { + printk(KERN_ERR "rtas_flash: no firmware flash support\n"); + return 1; + } + + firmware_flash_pde = create_flash_pde("ppc64/rtas/" + FIRMWARE_FLASH_NAME, + &rtas_flash_operations); + if (firmware_flash_pde == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + rc = initialize_flash_pde_data("ibm,update-flash-64-and-reboot", + sizeof(struct rtas_update_flash_t), + firmware_flash_pde); + if (rc != 0) + goto cleanup; + + firmware_update_pde = create_flash_pde("ppc64/rtas/" + FIRMWARE_UPDATE_NAME, + &rtas_flash_operations); + if (firmware_update_pde == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + rc = initialize_flash_pde_data("ibm,update-flash-64-and-reboot", + sizeof(struct rtas_update_flash_t), + firmware_update_pde); + if (rc != 0) + goto cleanup; + + validate_pde = create_flash_pde("ppc64/rtas/" VALIDATE_FLASH_NAME, + &validate_flash_operations); + if (validate_pde == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + rc = initialize_flash_pde_data("ibm,validate-flash-image", + sizeof(struct rtas_validate_flash_t), + validate_pde); + if (rc != 0) + goto cleanup; + + manage_pde = create_flash_pde("ppc64/rtas/" MANAGE_FLASH_NAME, + &manage_flash_operations); + if (manage_pde == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + rc = initialize_flash_pde_data("ibm,manage-flash-image", + sizeof(struct rtas_manage_flash_t), + manage_pde); + if (rc != 0) + goto cleanup; + + return 0; + +cleanup: + remove_flash_pde(firmware_flash_pde); + remove_flash_pde(firmware_update_pde); + remove_flash_pde(validate_pde); + remove_flash_pde(manage_pde); + + return rc; +} + +void __exit rtas_flash_cleanup(void) +{ + remove_flash_pde(firmware_flash_pde); + remove_flash_pde(firmware_update_pde); + remove_flash_pde(validate_pde); + remove_flash_pde(manage_pde); +} + +module_init(rtas_flash_init); +module_exit(rtas_flash_cleanup); +MODULE_LICENSE("GPL"); diff --git a/arch/ppc64/kernel/rtasd.c b/arch/ppc64/kernel/rtasd.c new file mode 100644 index 00000000000..ff65dc33320 --- /dev/null +++ b/arch/ppc64/kernel/rtasd.c @@ -0,0 +1,527 @@ +/* + * Copyright (C) 2001 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Communication to userspace based on kernel/printk.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUG(A...) printk(KERN_ERR A) +#else +#define DEBUG(A...) +#endif + +static DEFINE_SPINLOCK(rtasd_log_lock); + +DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait); + +static char *rtas_log_buf; +static unsigned long rtas_log_start; +static unsigned long rtas_log_size; + +static int surveillance_timeout = -1; +static unsigned int rtas_event_scan_rate; +static unsigned int rtas_error_log_max; +static unsigned int rtas_error_log_buffer_max; + +static int full_rtas_msgs = 0; + +extern int no_logging; + +volatile int error_log_cnt = 0; + +/* + * Since we use 32 bit RTAS, the physical address of this must be below + * 4G or else bad things happen. Allocate this in the kernel data and + * make it big enough. + */ +static unsigned char logdata[RTAS_ERROR_LOG_MAX]; + +static int get_eventscan_parms(void); + +static char *rtas_type[] = { + "Unknown", "Retry", "TCE Error", "Internal Device Failure", + "Timeout", "Data Parity", "Address Parity", "Cache Parity", + "Address Invalid", "ECC Uncorrected", "ECC Corrupted", +}; + +static char *rtas_event_type(int type) +{ + if ((type > 0) && (type < 11)) + return rtas_type[type]; + + switch (type) { + case RTAS_TYPE_EPOW: + return "EPOW"; + case RTAS_TYPE_PLATFORM: + return "Platform Error"; + case RTAS_TYPE_IO: + return "I/O Event"; + case RTAS_TYPE_INFO: + return "Platform Information Event"; + case RTAS_TYPE_DEALLOC: + return "Resource Deallocation Event"; + case RTAS_TYPE_DUMP: + return "Dump Notification Event"; + } + + return rtas_type[0]; +} + +/* To see this info, grep RTAS /var/log/messages and each entry + * will be collected together with obvious begin/end. + * There will be a unique identifier on the begin and end lines. + * This will persist across reboots. + * + * format of error logs returned from RTAS: + * bytes (size) : contents + * -------------------------------------------------------- + * 0-7 (8) : rtas_error_log + * 8-47 (40) : extended info + * 48-51 (4) : vendor id + * 52-1023 (vendor specific) : location code and debug data + */ +static void printk_log_rtas(char *buf, int len) +{ + + int i,j,n = 0; + int perline = 16; + char buffer[64]; + char * str = "RTAS event"; + + if (full_rtas_msgs) { + printk(RTAS_DEBUG "%d -------- %s begin --------\n", + error_log_cnt, str); + + /* + * Print perline bytes on each line, each line will start + * with RTAS and a changing number, so syslogd will + * print lines that are otherwise the same. Separate every + * 4 bytes with a space. + */ + for (i = 0; i < len; i++) { + j = i % perline; + if (j == 0) { + memset(buffer, 0, sizeof(buffer)); + n = sprintf(buffer, "RTAS %d:", i/perline); + } + + if ((i % 4) == 0) + n += sprintf(buffer+n, " "); + + n += sprintf(buffer+n, "%02x", (unsigned char)buf[i]); + + if (j == (perline-1)) + printk(KERN_DEBUG "%s\n", buffer); + } + if ((i % perline) != 0) + printk(KERN_DEBUG "%s\n", buffer); + + printk(RTAS_DEBUG "%d -------- %s end ----------\n", + error_log_cnt, str); + } else { + struct rtas_error_log *errlog = (struct rtas_error_log *)buf; + + printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n", + error_log_cnt, rtas_event_type(errlog->type), + errlog->severity); + } +} + +static int log_rtas_len(char * buf) +{ + int len; + struct rtas_error_log *err; + + /* rtas fixed header */ + len = 8; + err = (struct rtas_error_log *)buf; + if (err->extended_log_length) { + + /* extended header */ + len += err->extended_log_length; + } + + if (rtas_error_log_max == 0) { + get_eventscan_parms(); + } + if (len > rtas_error_log_max) + len = rtas_error_log_max; + + return len; +} + +/* + * First write to nvram, if fatal error, that is the only + * place we log the info. The error will be picked up + * on the next reboot by rtasd. If not fatal, run the + * method for the type of error. Currently, only RTAS + * errors have methods implemented, but in the future + * there might be a need to store data in nvram before a + * call to panic(). + * + * XXX We write to nvram periodically, to indicate error has + * been written and sync'd, but there is a possibility + * that if we don't shutdown correctly, a duplicate error + * record will be created on next reboot. + */ +void pSeries_log_error(char *buf, unsigned int err_type, int fatal) +{ + unsigned long offset; + unsigned long s; + int len = 0; + + DEBUG("logging event\n"); + if (buf == NULL) + return; + + spin_lock_irqsave(&rtasd_log_lock, s); + + /* get length and increase count */ + switch (err_type & ERR_TYPE_MASK) { + case ERR_TYPE_RTAS_LOG: + len = log_rtas_len(buf); + if (!(err_type & ERR_FLAG_BOOT)) + error_log_cnt++; + break; + case ERR_TYPE_KERNEL_PANIC: + default: + spin_unlock_irqrestore(&rtasd_log_lock, s); + return; + } + + /* Write error to NVRAM */ + if (!no_logging && !(err_type & ERR_FLAG_BOOT)) + nvram_write_error_log(buf, len, err_type); + + /* + * rtas errors can occur during boot, and we do want to capture + * those somewhere, even if nvram isn't ready (why not?), and even + * if rtasd isn't ready. Put them into the boot log, at least. + */ + if ((err_type & ERR_TYPE_MASK) == ERR_TYPE_RTAS_LOG) + printk_log_rtas(buf, len); + + /* Check to see if we need to or have stopped logging */ + if (fatal || no_logging) { + no_logging = 1; + spin_unlock_irqrestore(&rtasd_log_lock, s); + return; + } + + /* call type specific method for error */ + switch (err_type & ERR_TYPE_MASK) { + case ERR_TYPE_RTAS_LOG: + offset = rtas_error_log_buffer_max * + ((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK); + + /* First copy over sequence number */ + memcpy(&rtas_log_buf[offset], (void *) &error_log_cnt, sizeof(int)); + + /* Second copy over error log data */ + offset += sizeof(int); + memcpy(&rtas_log_buf[offset], buf, len); + + if (rtas_log_size < LOG_NUMBER) + rtas_log_size += 1; + else + rtas_log_start += 1; + + spin_unlock_irqrestore(&rtasd_log_lock, s); + wake_up_interruptible(&rtas_log_wait); + break; + case ERR_TYPE_KERNEL_PANIC: + default: + spin_unlock_irqrestore(&rtasd_log_lock, s); + return; + } + +} + + +static int rtas_log_open(struct inode * inode, struct file * file) +{ + return 0; +} + +static int rtas_log_release(struct inode * inode, struct file * file) +{ + return 0; +} + +/* This will check if all events are logged, if they are then, we + * know that we can safely clear the events in NVRAM. + * Next we'll sit and wait for something else to log. + */ +static ssize_t rtas_log_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + int error; + char *tmp; + unsigned long s; + unsigned long offset; + + if (!buf || count < rtas_error_log_buffer_max) + return -EINVAL; + + count = rtas_error_log_buffer_max; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + + spin_lock_irqsave(&rtasd_log_lock, s); + /* if it's 0, then we know we got the last one (the one in NVRAM) */ + if (rtas_log_size == 0 && !no_logging) + nvram_clear_error_log(); + spin_unlock_irqrestore(&rtasd_log_lock, s); + + + error = wait_event_interruptible(rtas_log_wait, rtas_log_size); + if (error) + goto out; + + spin_lock_irqsave(&rtasd_log_lock, s); + offset = rtas_error_log_buffer_max * (rtas_log_start & LOG_NUMBER_MASK); + memcpy(tmp, &rtas_log_buf[offset], count); + + rtas_log_start += 1; + rtas_log_size -= 1; + spin_unlock_irqrestore(&rtasd_log_lock, s); + + error = copy_to_user(buf, tmp, count) ? -EFAULT : count; +out: + kfree(tmp); + return error; +} + +static unsigned int rtas_log_poll(struct file *file, poll_table * wait) +{ + poll_wait(file, &rtas_log_wait, wait); + if (rtas_log_size) + return POLLIN | POLLRDNORM; + return 0; +} + +struct file_operations proc_rtas_log_operations = { + .read = rtas_log_read, + .poll = rtas_log_poll, + .open = rtas_log_open, + .release = rtas_log_release, +}; + +static int enable_surveillance(int timeout) +{ + int error; + + error = rtas_set_indicator(SURVEILLANCE_TOKEN, 0, timeout); + + if (error == 0) + return 0; + + if (error == -EINVAL) { + printk(KERN_INFO "rtasd: surveillance not supported\n"); + return 0; + } + + printk(KERN_ERR "rtasd: could not update surveillance\n"); + return -1; +} + +static int get_eventscan_parms(void) +{ + struct device_node *node; + int *ip; + + node = of_find_node_by_path("/rtas"); + + ip = (int *)get_property(node, "rtas-event-scan-rate", NULL); + if (ip == NULL) { + printk(KERN_ERR "rtasd: no rtas-event-scan-rate\n"); + of_node_put(node); + return -1; + } + rtas_event_scan_rate = *ip; + DEBUG("rtas-event-scan-rate %d\n", rtas_event_scan_rate); + + /* Make room for the sequence number */ + rtas_error_log_max = rtas_get_error_log_max(); + rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int); + + of_node_put(node); + + return 0; +} + +static void do_event_scan(int event_scan) +{ + int error; + do { + memset(logdata, 0, rtas_error_log_max); + error = rtas_call(event_scan, 4, 1, NULL, + RTAS_EVENT_SCAN_ALL_EVENTS, 0, + __pa(logdata), rtas_error_log_max); + if (error == -1) { + printk(KERN_ERR "event-scan failed\n"); + break; + } + + if (error == 0) + pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0); + + } while(error == 0); +} + +static void do_event_scan_all_cpus(long delay) +{ + int cpu; + + lock_cpu_hotplug(); + cpu = first_cpu(cpu_online_map); + for (;;) { + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + do_event_scan(rtas_token("event-scan")); + set_cpus_allowed(current, CPU_MASK_ALL); + + /* Drop hotplug lock, and sleep for the specified delay */ + unlock_cpu_hotplug(); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(delay); + lock_cpu_hotplug(); + + cpu = next_cpu(cpu, cpu_online_map); + if (cpu == NR_CPUS) + break; + } + unlock_cpu_hotplug(); +} + +static int rtasd(void *unused) +{ + unsigned int err_type; + int event_scan = rtas_token("event-scan"); + int rc; + + daemonize("rtasd"); + + if (event_scan == RTAS_UNKNOWN_SERVICE || get_eventscan_parms() == -1) + goto error; + + rtas_log_buf = vmalloc(rtas_error_log_buffer_max*LOG_NUMBER); + if (!rtas_log_buf) { + printk(KERN_ERR "rtasd: no memory\n"); + goto error; + } + + printk(KERN_ERR "RTAS daemon started\n"); + + DEBUG("will sleep for %d jiffies\n", (HZ*60/rtas_event_scan_rate) / 2); + + /* See if we have any error stored in NVRAM */ + memset(logdata, 0, rtas_error_log_max); + + rc = nvram_read_error_log(logdata, rtas_error_log_max, &err_type); + + /* We can use rtas_log_buf now */ + no_logging = 0; + + if (!rc) { + if (err_type != ERR_FLAG_ALREADY_LOGGED) { + pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0); + } + } + + /* First pass. */ + do_event_scan_all_cpus(HZ); + + if (surveillance_timeout != -1) { + DEBUG("enabling surveillance\n"); + enable_surveillance(surveillance_timeout); + DEBUG("surveillance enabled\n"); + } + + /* Delay should be at least one second since some + * machines have problems if we call event-scan too + * quickly. */ + for (;;) + do_event_scan_all_cpus((HZ*60/rtas_event_scan_rate) / 2); + +error: + /* Should delete proc entries */ + return -EINVAL; +} + +static int __init rtas_init(void) +{ + struct proc_dir_entry *entry; + + /* No RTAS, only warn if we are on a pSeries box */ + if (rtas_token("event-scan") == RTAS_UNKNOWN_SERVICE) { + if (systemcfg->platform & PLATFORM_PSERIES) + printk(KERN_ERR "rtasd: no event-scan on system\n"); + return 1; + } + + entry = create_proc_entry("ppc64/rtas/error_log", S_IRUSR, NULL); + if (entry) + entry->proc_fops = &proc_rtas_log_operations; + else + printk(KERN_ERR "Failed to create error_log proc entry\n"); + + if (kernel_thread(rtasd, NULL, CLONE_FS) < 0) + printk(KERN_ERR "Failed to start RTAS daemon\n"); + + return 0; +} + +static int __init surveillance_setup(char *str) +{ + int i; + + if (get_option(&str,&i)) { + if (i >= 0 && i <= 255) + surveillance_timeout = i; + } + + return 1; +} + +static int __init rtasmsgs_setup(char *str) +{ + if (strcmp(str, "on") == 0) + full_rtas_msgs = 1; + else if (strcmp(str, "off") == 0) + full_rtas_msgs = 0; + + return 1; +} +__initcall(rtas_init); +__setup("surveillance=", surveillance_setup); +__setup("rtasmsgs=", rtasmsgs_setup); diff --git a/arch/ppc64/kernel/rtc.c b/arch/ppc64/kernel/rtc.c new file mode 100644 index 00000000000..3e70b91375f --- /dev/null +++ b/arch/ppc64/kernel/rtc.c @@ -0,0 +1,440 @@ +/* + * Real Time Clock interface for PPC64. + * + * Based on rtc.c by Paul Gortmaker + * + * This driver allows use of the real time clock + * from user space. It exports the /dev/rtc + * interface supporting various ioctl() and also the + * /proc/driver/rtc pseudo-file for status information. + * + * Interface does not support RTC interrupts nor an alarm. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 1.0 Mike Corrigan: IBM iSeries rtc support + * 1.1 Dave Engebretsen: IBM pSeries rtc support + */ + +#define RTC_VERSION "1.1" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +extern int piranha_simulator; + +/* + * We sponge a minor off of the misc major. No need slurping + * up another valuable major dev number for this. If you add + * an ioctl, make sure you don't conflict with SPARC's RTC + * ioctls. + */ + +static ssize_t rtc_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos); + +static int rtc_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg); + +static int rtc_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data); + +/* + * If this driver ever becomes modularised, it will be really nice + * to make the epoch retain its value across module reload... + */ + +static unsigned long epoch = 1900; /* year corresponding to 0x00 */ + +static const unsigned char days_in_mo[] = +{0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + +/* + * Now all the various file operations that we export. + */ + +static ssize_t rtc_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return -EIO; +} + +static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct rtc_time wtime; + + switch (cmd) { + case RTC_RD_TIME: /* Read the time/date from RTC */ + { + memset(&wtime, 0, sizeof(struct rtc_time)); + ppc_md.get_rtc_time(&wtime); + break; + } + case RTC_SET_TIME: /* Set the RTC */ + { + struct rtc_time rtc_tm; + unsigned char mon, day, hrs, min, sec, leap_yr; + unsigned int yrs; + + if (!capable(CAP_SYS_TIME)) + return -EACCES; + + if (copy_from_user(&rtc_tm, (struct rtc_time __user *)arg, + sizeof(struct rtc_time))) + return -EFAULT; + + yrs = rtc_tm.tm_year; + mon = rtc_tm.tm_mon + 1; /* tm_mon starts at zero */ + day = rtc_tm.tm_mday; + hrs = rtc_tm.tm_hour; + min = rtc_tm.tm_min; + sec = rtc_tm.tm_sec; + + if (yrs < 70) + return -EINVAL; + + leap_yr = ((!(yrs % 4) && (yrs % 100)) || !(yrs % 400)); + + if ((mon > 12) || (day == 0)) + return -EINVAL; + + if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr))) + return -EINVAL; + + if ((hrs >= 24) || (min >= 60) || (sec >= 60)) + return -EINVAL; + + if ( yrs > 169 ) + return -EINVAL; + + ppc_md.set_rtc_time(&rtc_tm); + + return 0; + } + case RTC_EPOCH_READ: /* Read the epoch. */ + { + return put_user (epoch, (unsigned long __user *)arg); + } + case RTC_EPOCH_SET: /* Set the epoch. */ + { + /* + * There were no RTC clocks before 1900. + */ + if (arg < 1900) + return -EINVAL; + + if (!capable(CAP_SYS_TIME)) + return -EACCES; + + epoch = arg; + return 0; + } + default: + return -EINVAL; + } + return copy_to_user((void __user *)arg, &wtime, sizeof wtime) ? -EFAULT : 0; +} + +static int rtc_open(struct inode *inode, struct file *file) +{ + nonseekable_open(inode, file); + return 0; +} + +static int rtc_release(struct inode *inode, struct file *file) +{ + return 0; +} + +/* + * The various file operations we support. + */ +static struct file_operations rtc_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = rtc_read, + .ioctl = rtc_ioctl, + .open = rtc_open, + .release = rtc_release, +}; + +static struct miscdevice rtc_dev = { + .minor = RTC_MINOR, + .name = "rtc", + .fops = &rtc_fops +}; + +static int __init rtc_init(void) +{ + int retval; + + retval = misc_register(&rtc_dev); + if(retval < 0) + return retval; + +#ifdef CONFIG_PROC_FS + if (create_proc_read_entry("driver/rtc", 0, NULL, rtc_read_proc, NULL) + == NULL) { + misc_deregister(&rtc_dev); + return -ENOMEM; + } +#endif + + printk(KERN_INFO "i/pSeries Real Time Clock Driver v" RTC_VERSION "\n"); + + return 0; +} + +static void __exit rtc_exit (void) +{ + remove_proc_entry ("driver/rtc", NULL); + misc_deregister(&rtc_dev); +} + +module_init(rtc_init); +module_exit(rtc_exit); + +/* + * Info exported via "/proc/driver/rtc". + */ + +static int rtc_proc_output (char *buf) +{ + + char *p; + struct rtc_time tm; + + p = buf; + + ppc_md.get_rtc_time(&tm); + + /* + * There is no way to tell if the luser has the RTC set for local + * time or for Universal Standard Time (GMT). Probably local though. + */ + p += sprintf(p, + "rtc_time\t: %02d:%02d:%02d\n" + "rtc_date\t: %04d-%02d-%02d\n" + "rtc_epoch\t: %04lu\n", + tm.tm_hour, tm.tm_min, tm.tm_sec, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, epoch); + + p += sprintf(p, + "DST_enable\t: no\n" + "BCD\t\t: yes\n" + "24hr\t\t: yes\n" ); + + return p - buf; +} + +static int rtc_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = rtc_proc_output (page); + if (len <= off+count) *eof = 1; + *start = page + off; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; +} + +#ifdef CONFIG_PPC_ISERIES +/* + * Get the RTC from the virtual service processor + * This requires flowing LpEvents to the primary partition + */ +void iSeries_get_rtc_time(struct rtc_time *rtc_tm) +{ + if (piranha_simulator) + return; + + mf_get_rtc(rtc_tm); + rtc_tm->tm_mon--; +} + +/* + * Set the RTC in the virtual service processor + * This requires flowing LpEvents to the primary partition + */ +int iSeries_set_rtc_time(struct rtc_time *tm) +{ + mf_set_rtc(tm); + return 0; +} + +void iSeries_get_boot_time(struct rtc_time *tm) +{ + unsigned long time; + static unsigned long lastsec = 1; + + u32 dataWord1 = *((u32 *)(&xSpCommArea.xBcdTimeAtIplStart)); + u32 dataWord2 = *(((u32 *)&(xSpCommArea.xBcdTimeAtIplStart)) + 1); + int year = 1970; + int year1 = ( dataWord1 >> 24 ) & 0x000000FF; + int year2 = ( dataWord1 >> 16 ) & 0x000000FF; + int sec = ( dataWord1 >> 8 ) & 0x000000FF; + int min = dataWord1 & 0x000000FF; + int hour = ( dataWord2 >> 24 ) & 0x000000FF; + int day = ( dataWord2 >> 8 ) & 0x000000FF; + int mon = dataWord2 & 0x000000FF; + + if ( piranha_simulator ) + return; + + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year1); + BCD_TO_BIN(year2); + year = year1 * 100 + year2; + + time = mktime(year, mon, day, hour, min, sec); + time += ( jiffies / HZ ); + + /* Now THIS is a nasty hack! + * It ensures that the first two calls get different answers. + * That way the loop in init_time (time.c) will not think + * the clock is stuck. + */ + if ( lastsec ) { + time -= lastsec; + --lastsec; + } + + to_tm(time, tm); + tm->tm_year -= 1900; + tm->tm_mon -= 1; +} +#endif + +#ifdef CONFIG_PPC_RTAS +#define MAX_RTC_WAIT 5000 /* 5 sec */ +#define RTAS_CLOCK_BUSY (-2) +void pSeries_get_boot_time(struct rtc_time *rtc_tm) +{ + int ret[8]; + int error, wait_time; + unsigned long max_wait_tb; + + max_wait_tb = __get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; + do { + error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) { + wait_time = rtas_extended_busy_delay_time(error); + /* This is boot time so we spin. */ + udelay(wait_time*1000); + error = RTAS_CLOCK_BUSY; + } + } while (error == RTAS_CLOCK_BUSY && (__get_tb() < max_wait_tb)); + + if (error != 0 && printk_ratelimit()) { + printk(KERN_WARNING "error: reading the clock failed (%d)\n", + error); + return; + } + + rtc_tm->tm_sec = ret[5]; + rtc_tm->tm_min = ret[4]; + rtc_tm->tm_hour = ret[3]; + rtc_tm->tm_mday = ret[2]; + rtc_tm->tm_mon = ret[1] - 1; + rtc_tm->tm_year = ret[0] - 1900; +} + +/* NOTE: get_rtc_time will get an error if executed in interrupt context + * and if a delay is needed to read the clock. In this case we just + * silently return without updating rtc_tm. + */ +void pSeries_get_rtc_time(struct rtc_time *rtc_tm) +{ + int ret[8]; + int error, wait_time; + unsigned long max_wait_tb; + + max_wait_tb = __get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; + do { + error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) { + if (in_interrupt() && printk_ratelimit()) { + printk(KERN_WARNING "error: reading clock would delay interrupt\n"); + return; /* delay not allowed */ + } + wait_time = rtas_extended_busy_delay_time(error); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(wait_time); + error = RTAS_CLOCK_BUSY; + } + } while (error == RTAS_CLOCK_BUSY && (__get_tb() < max_wait_tb)); + + if (error != 0 && printk_ratelimit()) { + printk(KERN_WARNING "error: reading the clock failed (%d)\n", + error); + return; + } + + rtc_tm->tm_sec = ret[5]; + rtc_tm->tm_min = ret[4]; + rtc_tm->tm_hour = ret[3]; + rtc_tm->tm_mday = ret[2]; + rtc_tm->tm_mon = ret[1] - 1; + rtc_tm->tm_year = ret[0] - 1900; +} + +int pSeries_set_rtc_time(struct rtc_time *tm) +{ + int error, wait_time; + unsigned long max_wait_tb; + + max_wait_tb = __get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; + do { + error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, + tm->tm_year + 1900, tm->tm_mon + 1, + tm->tm_mday, tm->tm_hour, tm->tm_min, + tm->tm_sec, 0); + if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) { + if (in_interrupt()) + return 1; /* probably decrementer */ + wait_time = rtas_extended_busy_delay_time(error); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(wait_time); + error = RTAS_CLOCK_BUSY; + } + } while (error == RTAS_CLOCK_BUSY && (__get_tb() < max_wait_tb)); + + if (error != 0 && printk_ratelimit()) + printk(KERN_WARNING "error: setting the clock failed (%d)\n", + error); + + return 0; +} +#endif diff --git a/arch/ppc64/kernel/scanlog.c b/arch/ppc64/kernel/scanlog.c new file mode 100644 index 00000000000..189b81a4198 --- /dev/null +++ b/arch/ppc64/kernel/scanlog.c @@ -0,0 +1,245 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * scan-log-data driver for PPC64 Todd Inglett + * + * When ppc64 hardware fails the service processor dumps internal state + * of the system. After a reboot the operating system can access a dump + * of this data using this driver. A dump exists if the device-tree + * /chosen/ibm,scan-log-data property exists. + * + * This driver exports /proc/ppc64/scan-log-dump which can be read. + * The driver supports only sequential reads. + * + * The driver looks at a write to the driver for the single word "reset". + * If given, the driver will reset the scanlog so the platform can free it. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define MODULE_VERS "1.0" +#define MODULE_NAME "scanlog" + +/* Status returns from ibm,scan-log-dump */ +#define SCANLOG_COMPLETE 0 +#define SCANLOG_HWERROR -1 +#define SCANLOG_CONTINUE 1 + +#define DEBUG(A...) do { if (scanlog_debug) printk(KERN_ERR "scanlog: " A); } while (0) + +static int scanlog_debug; +static unsigned int ibm_scan_log_dump; /* RTAS token */ +static struct proc_dir_entry *proc_ppc64_scan_log_dump; /* The proc file */ + +static ssize_t scanlog_read(struct file *file, char *buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + struct proc_dir_entry *dp; + unsigned int *data; + int status; + unsigned long len, off; + unsigned int wait_time; + + dp = PDE(inode); + data = (unsigned int *)dp->data; + + if (!data) { + printk(KERN_ERR "scanlog: read failed no data\n"); + return -EIO; + } + + if (count > RTAS_DATA_BUF_SIZE) + count = RTAS_DATA_BUF_SIZE; + + if (count < 1024) { + /* This is the min supported by this RTAS call. Rather + * than do all the buffering we insist the user code handle + * larger reads. As long as cp works... :) + */ + printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count); + return -EINVAL; + } + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + for (;;) { + wait_time = HZ/2; /* default wait if no data */ + spin_lock(&rtas_data_buf_lock); + memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE); + status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, + (u32) __pa(rtas_data_buf), (u32) count); + memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE); + spin_unlock(&rtas_data_buf_lock); + + DEBUG("status=%d, data[0]=%x, data[1]=%x, data[2]=%x\n", + status, data[0], data[1], data[2]); + switch (status) { + case SCANLOG_COMPLETE: + DEBUG("hit eof\n"); + return 0; + case SCANLOG_HWERROR: + DEBUG("hardware error reading scan log data\n"); + return -EIO; + case SCANLOG_CONTINUE: + /* We may or may not have data yet */ + len = data[1]; + off = data[2]; + if (len > 0) { + if (copy_to_user(buf, ((char *)data)+off, len)) + return -EFAULT; + return len; + } + /* Break to sleep default time */ + break; + default: + if (status > 9900 && status <= 9905) { + /* No data. RTAS is hinting at a delay required + * between 1-100000 milliseconds + */ + int ms = 1; + for (; status > 9900; status--) + ms = ms * 10; + /* Use microseconds for reasonable accuracy */ + ms *= 1000; + wait_time = ms / (1000000/HZ); /* round down is fine */ + /* Fall through to sleep */ + } else { + printk(KERN_ERR "scanlog: unknown error from rtas: %d\n", status); + return -EIO; + } + } + /* Apparently no data yet. Wait and try again. */ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(wait_time); + } + /*NOTREACHED*/ +} + +static ssize_t scanlog_write(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + char stkbuf[20]; + int status; + + if (count > 19) count = 19; + if (copy_from_user (stkbuf, buf, count)) { + return -EFAULT; + } + stkbuf[count] = 0; + + if (buf) { + if (strncmp(stkbuf, "reset", 5) == 0) { + DEBUG("reset scanlog\n"); + status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0); + DEBUG("rtas returns %d\n", status); + } else if (strncmp(stkbuf, "debugon", 7) == 0) { + printk(KERN_ERR "scanlog: debug on\n"); + scanlog_debug = 1; + } else if (strncmp(stkbuf, "debugoff", 8) == 0) { + printk(KERN_ERR "scanlog: debug off\n"); + scanlog_debug = 0; + } + } + return count; +} + +static int scanlog_open(struct inode * inode, struct file * file) +{ + struct proc_dir_entry *dp = PDE(inode); + unsigned int *data = (unsigned int *)dp->data; + + if (!data) { + printk(KERN_ERR "scanlog: open failed no data\n"); + return -EIO; + } + + if (data[0] != 0) { + /* This imperfect test stops a second copy of the + * data (or a reset while data is being copied) + */ + return -EBUSY; + } + + data[0] = 0; /* re-init so we restart the scan */ + + return 0; +} + +static int scanlog_release(struct inode * inode, struct file * file) +{ + struct proc_dir_entry *dp = PDE(inode); + unsigned int *data = (unsigned int *)dp->data; + + if (!data) { + printk(KERN_ERR "scanlog: release failed no data\n"); + return -EIO; + } + data[0] = 0; + + return 0; +} + +struct file_operations scanlog_fops = { + .owner = THIS_MODULE, + .read = scanlog_read, + .write = scanlog_write, + .open = scanlog_open, + .release = scanlog_release, +}; + +int __init scanlog_init(void) +{ + struct proc_dir_entry *ent; + + ibm_scan_log_dump = rtas_token("ibm,scan-log-dump"); + if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE) { + printk(KERN_ERR "scan-log-dump not implemented on this system\n"); + return -EIO; + } + + ent = create_proc_entry("ppc64/rtas/scan-log-dump", S_IRUSR, NULL); + if (ent) { + ent->proc_fops = &scanlog_fops; + /* Ideally we could allocate a buffer < 4G */ + ent->data = kmalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!ent->data) { + printk(KERN_ERR "Failed to allocate a buffer\n"); + remove_proc_entry("scan-log-dump", ent->parent); + return -ENOMEM; + } + ((unsigned int *)ent->data)[0] = 0; + } else { + printk(KERN_ERR "Failed to create ppc64/scan-log-dump proc entry\n"); + return -EIO; + } + proc_ppc64_scan_log_dump = ent; + + return 0; +} + +void __exit scanlog_cleanup(void) +{ + if (proc_ppc64_scan_log_dump) { + if (proc_ppc64_scan_log_dump->data) + kfree(proc_ppc64_scan_log_dump->data); + remove_proc_entry("scan-log-dump", proc_ppc64_scan_log_dump->parent); + } +} + +module_init(scanlog_init); +module_exit(scanlog_cleanup); +MODULE_LICENSE("GPL"); diff --git a/arch/ppc64/kernel/semaphore.c b/arch/ppc64/kernel/semaphore.c new file mode 100644 index 00000000000..a1c1db573e9 --- /dev/null +++ b/arch/ppc64/kernel/semaphore.c @@ -0,0 +1,136 @@ +/* + * + * + * PowerPC-specific semaphore code. + * + * Copyright (C) 1999 Cort Dougan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * April 2001 - Reworked by Paul Mackerras + * to eliminate the SMP races in the old version between the updates + * of `count' and `waking'. Now we use negative `count' values to + * indicate that some process(es) are waiting for the semaphore. + */ + +#include +#include +#include + +#include +#include +#include + +/* + * Atomically update sem->count. + * This does the equivalent of the following: + * + * old_count = sem->count; + * tmp = MAX(old_count, 0) + incr; + * sem->count = tmp; + * return old_count; + */ +static inline int __sem_update_count(struct semaphore *sem, int incr) +{ + int old_count, tmp; + + __asm__ __volatile__("\n" +"1: lwarx %0,0,%3\n" +" srawi %1,%0,31\n" +" andc %1,%0,%1\n" +" add %1,%1,%4\n" +" stwcx. %1,0,%3\n" +" bne 1b" + : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) + : "r" (&sem->count), "r" (incr), "m" (sem->count) + : "cc"); + + return old_count; +} + +void __up(struct semaphore *sem) +{ + /* + * Note that we incremented count in up() before we came here, + * but that was ineffective since the result was <= 0, and + * any negative value of count is equivalent to 0. + * This ends up setting count to 1, unless count is now > 0 + * (i.e. because some other cpu has called up() in the meantime), + * in which case we just increment count. + */ + __sem_update_count(sem, 1); + wake_up(&sem->wait); +} +EXPORT_SYMBOL(__up); + +/* + * Note that when we come in to __down or __down_interruptible, + * we have already decremented count, but that decrement was + * ineffective since the result was < 0, and any negative value + * of count is equivalent to 0. + * Thus it is only when we decrement count from some value > 0 + * that we have actually got the semaphore. + */ +void __sched __down(struct semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + __set_task_state(tsk, TASK_UNINTERRUPTIBLE); + add_wait_queue_exclusive(&sem->wait, &wait); + + /* + * Try to get the semaphore. If the count is > 0, then we've + * got the semaphore; we decrement count and exit the loop. + * If the count is 0 or negative, we set it to -1, indicating + * that we are asleep, and then sleep. + */ + while (__sem_update_count(sem, -1) <= 0) { + schedule(); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + } + remove_wait_queue(&sem->wait, &wait); + __set_task_state(tsk, TASK_RUNNING); + + /* + * If there are any more sleepers, wake one of them up so + * that it can either get the semaphore, or set count to -1 + * indicating that there are still processes sleeping. + */ + wake_up(&sem->wait); +} +EXPORT_SYMBOL(__down); + +int __sched __down_interruptible(struct semaphore * sem) +{ + int retval = 0; + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + __set_task_state(tsk, TASK_INTERRUPTIBLE); + add_wait_queue_exclusive(&sem->wait, &wait); + + while (__sem_update_count(sem, -1) <= 0) { + if (signal_pending(current)) { + /* + * A signal is pending - give up trying. + * Set sem->count to 0 if it is negative, + * since we are no longer sleeping. + */ + __sem_update_count(sem, 0); + retval = -EINTR; + break; + } + schedule(); + set_task_state(tsk, TASK_INTERRUPTIBLE); + } + remove_wait_queue(&sem->wait, &wait); + __set_task_state(tsk, TASK_RUNNING); + + wake_up(&sem->wait); + return retval; +} +EXPORT_SYMBOL(__down_interruptible); diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c new file mode 100644 index 00000000000..21c57f539c2 --- /dev/null +++ b/arch/ppc64/kernel/setup.c @@ -0,0 +1,1392 @@ +/* + * + * Common boot and setup code. + * + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* + * Here are some early debugging facilities. You can enable one + * but your kernel will not boot on anything else if you do so + */ + +/* This one is for use on LPAR machines that support an HVC console + * on vterm 0 + */ +extern void udbg_init_debug_lpar(void); +/* This one is for use on Apple G5 machines + */ +extern void udbg_init_pmac_realmode(void); +/* That's RTAS panel debug */ +extern void call_rtas_display_status_delay(unsigned char c); +/* Here's maple real mode debug */ +extern void udbg_init_maple_realmode(void); + +#define EARLY_DEBUG_INIT() do {} while(0) + +#if 0 +#define EARLY_DEBUG_INIT() udbg_init_debug_lpar() +#define EARLY_DEBUG_INIT() udbg_init_maple_realmode() +#define EARLY_DEBUG_INIT() udbg_init_pmac_realmode() +#define EARLY_DEBUG_INIT() \ + do { ppc_md.udbg_putc = call_rtas_display_status_delay; } while(0) +#endif + +/* extern void *stab; */ +extern unsigned long klimit; + +extern void mm_init_ppc64(void); +extern int idle_setup(void); +extern void stab_initialize(unsigned long stab); +extern void htab_initialize(void); +extern void early_init_devtree(void *flat_dt); +extern void unflatten_device_tree(void); + +extern void smp_release_cpus(void); + +unsigned long decr_overclock = 1; +unsigned long decr_overclock_proc0 = 1; +unsigned long decr_overclock_set = 0; +unsigned long decr_overclock_proc0_set = 0; + +int have_of = 1; +int boot_cpuid = 0; +int boot_cpuid_phys = 0; +dev_t boot_dev; +u64 ppc64_pft_size; +u64 ppc64_debug_switch; + +struct ppc64_caches ppc64_caches; +EXPORT_SYMBOL_GPL(ppc64_caches); + +/* + * These are used in binfmt_elf.c to put aux entries on the stack + * for each elf executable being started. + */ +int dcache_bsize; +int icache_bsize; +int ucache_bsize; + +/* The main machine-dep calls structure + */ +struct machdep_calls ppc_md; +EXPORT_SYMBOL(ppc_md); + +#ifdef CONFIG_MAGIC_SYSRQ +unsigned long SYSRQ_KEY; +#endif /* CONFIG_MAGIC_SYSRQ */ + + +static int ppc64_panic_event(struct notifier_block *, unsigned long, void *); +static struct notifier_block ppc64_panic_block = { + .notifier_call = ppc64_panic_event, + .priority = INT_MIN /* may not return; must be done last */ +}; + +/* + * Perhaps we can put the pmac screen_info[] here + * on pmac as well so we don't need the ifdef's. + * Until we get multiple-console support in here + * that is. -- Cort + * Maybe tie it to serial consoles, since this is really what + * these processors use on existing boards. -- Dan + */ +struct screen_info screen_info = { + .orig_x = 0, + .orig_y = 25, + .orig_video_cols = 80, + .orig_video_lines = 25, + .orig_video_isVGA = 1, + .orig_video_points = 16 +}; + +/* + * Initialize the PPCDBG state. Called before relocation has been enabled. + */ +void __init ppcdbg_initialize(void) +{ + ppc64_debug_switch = PPC_DEBUG_DEFAULT; /* | PPCDBG_BUSWALK | */ + /* PPCDBG_PHBINIT | PPCDBG_MM | PPCDBG_MMINIT | PPCDBG_TCEINIT | PPCDBG_TCE */; +} + +/* + * Early boot console based on udbg + */ +static struct console udbg_console = { + .name = "udbg", + .write = udbg_console_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; +static int early_console_initialized; + +void __init disable_early_printk(void) +{ + if (!early_console_initialized) + return; + unregister_console(&udbg_console); + early_console_initialized = 0; +} + +#if defined(CONFIG_PPC_MULTIPLATFORM) && defined(CONFIG_SMP) + +static int smt_enabled_cmdline; + +/* Look for ibm,smt-enabled OF option */ +static void check_smt_enabled(void) +{ + struct device_node *dn; + char *smt_option; + + /* Allow the command line to overrule the OF option */ + if (smt_enabled_cmdline) + return; + + dn = of_find_node_by_path("/options"); + + if (dn) { + smt_option = (char *)get_property(dn, "ibm,smt-enabled", NULL); + + if (smt_option) { + if (!strcmp(smt_option, "on")) + smt_enabled_at_boot = 1; + else if (!strcmp(smt_option, "off")) + smt_enabled_at_boot = 0; + } + } +} + +/* Look for smt-enabled= cmdline option */ +static int __init early_smt_enabled(char *p) +{ + smt_enabled_cmdline = 1; + + if (!p) + return 0; + + if (!strcmp(p, "on") || !strcmp(p, "1")) + smt_enabled_at_boot = 1; + else if (!strcmp(p, "off") || !strcmp(p, "0")) + smt_enabled_at_boot = 0; + + return 0; +} +early_param("smt-enabled", early_smt_enabled); + +/** + * setup_cpu_maps - initialize the following cpu maps: + * cpu_possible_map + * cpu_present_map + * cpu_sibling_map + * + * Having the possible map set up early allows us to restrict allocations + * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. + * + * We do not initialize the online map here; cpus set their own bits in + * cpu_online_map as they come up. + * + * This function is valid only for Open Firmware systems. finish_device_tree + * must be called before using this. + * + * While we're here, we may as well set the "physical" cpu ids in the paca. + */ +static void __init setup_cpu_maps(void) +{ + struct device_node *dn = NULL; + int cpu = 0; + int swap_cpuid = 0; + + check_smt_enabled(); + + while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { + u32 *intserv; + int j, len = sizeof(u32), nthreads; + + intserv = (u32 *)get_property(dn, "ibm,ppc-interrupt-server#s", + &len); + if (!intserv) + intserv = (u32 *)get_property(dn, "reg", NULL); + + nthreads = len / sizeof(u32); + + for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { + cpu_set(cpu, cpu_present_map); + set_hard_smp_processor_id(cpu, intserv[j]); + + if (intserv[j] == boot_cpuid_phys) + swap_cpuid = cpu; + cpu_set(cpu, cpu_possible_map); + cpu++; + } + } + + /* Swap CPU id 0 with boot_cpuid_phys, so we can always assume that + * boot cpu is logical 0. + */ + if (boot_cpuid_phys != get_hard_smp_processor_id(0)) { + u32 tmp; + tmp = get_hard_smp_processor_id(0); + set_hard_smp_processor_id(0, boot_cpuid_phys); + set_hard_smp_processor_id(swap_cpuid, tmp); + } + + /* + * On pSeries LPAR, we need to know how many cpus + * could possibly be added to this partition. + */ + if (systemcfg->platform == PLATFORM_PSERIES_LPAR && + (dn = of_find_node_by_path("/rtas"))) { + int num_addr_cell, num_size_cell, maxcpus; + unsigned int *ireg; + + num_addr_cell = prom_n_addr_cells(dn); + num_size_cell = prom_n_size_cells(dn); + + ireg = (unsigned int *) + get_property(dn, "ibm,lrdr-capacity", NULL); + + if (!ireg) + goto out; + + maxcpus = ireg[num_addr_cell + num_size_cell]; + + /* Double maxcpus for processors which have SMT capability */ + if (cpu_has_feature(CPU_FTR_SMT)) + maxcpus *= 2; + + if (maxcpus > NR_CPUS) { + printk(KERN_WARNING + "Partition configured for %d cpus, " + "operating system maximum is %d.\n", + maxcpus, NR_CPUS); + maxcpus = NR_CPUS; + } else + printk(KERN_INFO "Partition configured for %d cpus.\n", + maxcpus); + + for (cpu = 0; cpu < maxcpus; cpu++) + cpu_set(cpu, cpu_possible_map); + out: + of_node_put(dn); + } + + /* + * Do the sibling map; assume only two threads per processor. + */ + for_each_cpu(cpu) { + cpu_set(cpu, cpu_sibling_map[cpu]); + if (cpu_has_feature(CPU_FTR_SMT)) + cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); + } + + systemcfg->processorCount = num_present_cpus(); +} +#endif /* defined(CONFIG_PPC_MULTIPLATFORM) && defined(CONFIG_SMP) */ + + +#ifdef CONFIG_PPC_MULTIPLATFORM + +extern struct machdep_calls pSeries_md; +extern struct machdep_calls pmac_md; +extern struct machdep_calls maple_md; + +/* Ultimately, stuff them in an elf section like initcalls... */ +static struct machdep_calls __initdata *machines[] = { +#ifdef CONFIG_PPC_PSERIES + &pSeries_md, +#endif /* CONFIG_PPC_PSERIES */ +#ifdef CONFIG_PPC_PMAC + &pmac_md, +#endif /* CONFIG_PPC_PMAC */ +#ifdef CONFIG_PPC_MAPLE + &maple_md, +#endif /* CONFIG_PPC_MAPLE */ + NULL +}; + +/* + * Early initialization entry point. This is called by head.S + * with MMU translation disabled. We rely on the "feature" of + * the CPU that ignores the top 2 bits of the address in real + * mode so we can access kernel globals normally provided we + * only toy with things in the RMO region. From here, we do + * some early parsing of the device-tree to setup out LMB + * data structures, and allocate & initialize the hash table + * and segment tables so we can start running with translation + * enabled. + * + * It is this function which will call the probe() callback of + * the various platform types and copy the matching one to the + * global ppc_md structure. Your platform can eventually do + * some very early initializations from the probe() routine, but + * this is not recommended, be very careful as, for example, the + * device-tree is not accessible via normal means at this point. + */ + +void __init early_setup(unsigned long dt_ptr) +{ + struct paca_struct *lpaca = get_paca(); + static struct machdep_calls **mach; + + /* + * Enable early debugging if any specified (see top of + * this file) + */ + EARLY_DEBUG_INIT(); + + DBG(" -> early_setup()\n"); + + /* + * Fill the default DBG level (do we want to keep + * that old mecanism around forever ?) + */ + ppcdbg_initialize(); + + /* + * Do early initializations using the flattened device + * tree, like retreiving the physical memory map or + * calculating/retreiving the hash table size + */ + early_init_devtree(__va(dt_ptr)); + + /* + * Iterate all ppc_md structures until we find the proper + * one for the current machine type + */ + DBG("Probing machine type for platform %x...\n", + systemcfg->platform); + + for (mach = machines; *mach; mach++) { + if ((*mach)->probe(systemcfg->platform)) + break; + } + /* What can we do if we didn't find ? */ + if (*mach == NULL) { + DBG("No suitable machine found !\n"); + for (;;); + } + ppc_md = **mach; + + /* our udbg callbacks got overriden by the above, let's put them + * back in. Ultimately, I want those things to be split from the + * main ppc_md + */ + EARLY_DEBUG_INIT(); + + DBG("Found, Initializing memory management...\n"); + + /* + * Initialize stab / SLB management + */ + stab_initialize(lpaca->stab_real); + + /* + * Initialize the MMU Hash table and create the linear mapping + * of memory + */ + htab_initialize(); + + DBG(" <- early_setup()\n"); +} + + +/* + * Initialize some remaining members of the ppc64_caches and systemcfg structures + * (at least until we get rid of them completely). This is mostly some + * cache informations about the CPU that will be used by cache flush + * routines and/or provided to userland + */ +static void __init initialize_cache_info(void) +{ + struct device_node *np; + unsigned long num_cpus = 0; + + DBG(" -> initialize_cache_info()\n"); + + for (np = NULL; (np = of_find_node_by_type(np, "cpu"));) { + num_cpus += 1; + + /* We're assuming *all* of the CPUs have the same + * d-cache and i-cache sizes... -Peter + */ + + if ( num_cpus == 1 ) { + u32 *sizep, *lsizep; + u32 size, lsize; + const char *dc, *ic; + + /* Then read cache informations */ + if (systemcfg->platform == PLATFORM_POWERMAC) { + dc = "d-cache-block-size"; + ic = "i-cache-block-size"; + } else { + dc = "d-cache-line-size"; + ic = "i-cache-line-size"; + } + + size = 0; + lsize = cur_cpu_spec->dcache_bsize; + sizep = (u32 *)get_property(np, "d-cache-size", NULL); + if (sizep != NULL) + size = *sizep; + lsizep = (u32 *) get_property(np, dc, NULL); + if (lsizep != NULL) + lsize = *lsizep; + if (sizep == 0 || lsizep == 0) + DBG("Argh, can't find dcache properties ! " + "sizep: %p, lsizep: %p\n", sizep, lsizep); + + systemcfg->dcache_size = ppc64_caches.dsize = size; + systemcfg->dcache_line_size = + ppc64_caches.dline_size = lsize; + ppc64_caches.log_dline_size = __ilog2(lsize); + ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; + + size = 0; + lsize = cur_cpu_spec->icache_bsize; + sizep = (u32 *)get_property(np, "i-cache-size", NULL); + if (sizep != NULL) + size = *sizep; + lsizep = (u32 *)get_property(np, ic, NULL); + if (lsizep != NULL) + lsize = *lsizep; + if (sizep == 0 || lsizep == 0) + DBG("Argh, can't find icache properties ! " + "sizep: %p, lsizep: %p\n", sizep, lsizep); + + systemcfg->icache_size = ppc64_caches.isize = size; + systemcfg->icache_line_size = + ppc64_caches.iline_size = lsize; + ppc64_caches.log_iline_size = __ilog2(lsize); + ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; + } + } + + /* Add an eye catcher and the systemcfg layout version number */ + strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); + systemcfg->version.major = SYSTEMCFG_MAJOR; + systemcfg->version.minor = SYSTEMCFG_MINOR; + systemcfg->processor = mfspr(SPRN_PVR); + + DBG(" <- initialize_cache_info()\n"); +} + +static void __init check_for_initrd(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + u64 *prop; + + DBG(" -> check_for_initrd()\n"); + + prop = (u64 *)get_property(of_chosen, "linux,initrd-start", NULL); + if (prop != NULL) { + initrd_start = (unsigned long)__va(*prop); + prop = (u64 *)get_property(of_chosen, "linux,initrd-end", NULL); + if (prop != NULL) { + initrd_end = (unsigned long)__va(*prop); + initrd_below_start_ok = 1; + } else + initrd_start = 0; + } + + /* If we were passed an initrd, set the ROOT_DEV properly if the values + * look sensible. If not, clear initrd reference. + */ + if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && + initrd_end > initrd_start) + ROOT_DEV = Root_RAM0; + else + initrd_start = initrd_end = 0; + + if (initrd_start) + printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); + + DBG(" <- check_for_initrd()\n"); +#endif /* CONFIG_BLK_DEV_INITRD */ +} + +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +/* + * Do some initial setup of the system. The parameters are those which + * were passed in from the bootloader. + */ +void __init setup_system(void) +{ + DBG(" -> setup_system()\n"); + +#ifdef CONFIG_PPC_ISERIES + /* pSeries systems are identified in prom.c via OF. */ + if (itLpNaca.xLparInstalled == 1) + systemcfg->platform = PLATFORM_ISERIES_LPAR; + + ppc_md.init_early(); +#else /* CONFIG_PPC_ISERIES */ + + /* + * Unflatten the device-tree passed by prom_init or kexec + */ + unflatten_device_tree(); + + /* + * Fill the ppc64_caches & systemcfg structures with informations + * retreived from the device-tree. Need to be called before + * finish_device_tree() since the later requires some of the + * informations filled up here to properly parse the interrupt + * tree. + * It also sets up the cache line sizes which allows to call + * routines like flush_icache_range (used by the hash init + * later on). + */ + initialize_cache_info(); + +#ifdef CONFIG_PPC_RTAS + /* + * Initialize RTAS if available + */ + rtas_initialize(); +#endif /* CONFIG_PPC_RTAS */ + + /* + * Check if we have an initrd provided via the device-tree + */ + check_for_initrd(); + + /* + * Do some platform specific early initializations, that includes + * setting up the hash table pointers. It also sets up some interrupt-mapping + * related options that will be used by finish_device_tree() + */ + ppc_md.init_early(); + + /* + * "Finish" the device-tree, that is do the actual parsing of + * some of the properties like the interrupt map + */ + finish_device_tree(); + + /* + * Initialize xmon + */ +#ifdef CONFIG_XMON_DEFAULT + xmon_init(); +#endif + /* + * Register early console + */ + early_console_initialized = 1; + register_console(&udbg_console); + + /* Save unparsed command line copy for /proc/cmdline */ + strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); + + parse_early_param(); +#endif /* !CONFIG_PPC_ISERIES */ + +#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES) + /* + * iSeries has already initialized the cpu maps at this point. + */ + setup_cpu_maps(); + + /* Release secondary cpus out of their spinloops at 0x60 now that + * we can map physical -> logical CPU ids + */ + smp_release_cpus(); +#endif /* defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES) */ + + printk("Starting Linux PPC64 %s\n", UTS_RELEASE); + + printk("-----------------------------------------------------\n"); + printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size); + printk("ppc64_debug_switch = 0x%lx\n", ppc64_debug_switch); + printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller); + printk("systemcfg = 0x%p\n", systemcfg); + printk("systemcfg->platform = 0x%x\n", systemcfg->platform); + printk("systemcfg->processorCount = 0x%lx\n", systemcfg->processorCount); + printk("systemcfg->physicalMemorySize = 0x%lx\n", systemcfg->physicalMemorySize); + printk("ppc64_caches.dcache_line_size = 0x%x\n", + ppc64_caches.dline_size); + printk("ppc64_caches.icache_line_size = 0x%x\n", + ppc64_caches.iline_size); + printk("htab_address = 0x%p\n", htab_address); + printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); + printk("-----------------------------------------------------\n"); + + mm_init_ppc64(); + + DBG(" <- setup_system()\n"); +} + + +void machine_restart(char *cmd) +{ + if (ppc_md.nvram_sync) + ppc_md.nvram_sync(); + ppc_md.restart(cmd); +} + +EXPORT_SYMBOL(machine_restart); + +void machine_power_off(void) +{ + if (ppc_md.nvram_sync) + ppc_md.nvram_sync(); + ppc_md.power_off(); +} + +EXPORT_SYMBOL(machine_power_off); + +void machine_halt(void) +{ + if (ppc_md.nvram_sync) + ppc_md.nvram_sync(); + ppc_md.halt(); +} + +EXPORT_SYMBOL(machine_halt); + +unsigned long ppc_proc_freq; +unsigned long ppc_tb_freq; + +static int ppc64_panic_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + ppc_md.panic((char *)ptr); /* May not return */ + return NOTIFY_DONE; +} + + +#ifdef CONFIG_SMP +DEFINE_PER_CPU(unsigned int, pvr); +#endif + +static int show_cpuinfo(struct seq_file *m, void *v) +{ + unsigned long cpu_id = (unsigned long)v - 1; + unsigned int pvr; + unsigned short maj; + unsigned short min; + + if (cpu_id == NR_CPUS) { + seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); + + if (ppc_md.get_cpuinfo != NULL) + ppc_md.get_cpuinfo(m); + + return 0; + } + + /* We only show online cpus: disable preempt (overzealous, I + * knew) to prevent cpu going down. */ + preempt_disable(); + if (!cpu_online(cpu_id)) { + preempt_enable(); + return 0; + } + +#ifdef CONFIG_SMP + pvr = per_cpu(pvr, cpu_id); +#else + pvr = mfspr(SPRN_PVR); +#endif + maj = (pvr >> 8) & 0xFF; + min = pvr & 0xFF; + + seq_printf(m, "processor\t: %lu\n", cpu_id); + seq_printf(m, "cpu\t\t: "); + + if (cur_cpu_spec->pvr_mask) + seq_printf(m, "%s", cur_cpu_spec->cpu_name); + else + seq_printf(m, "unknown (%08x)", pvr); + +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + seq_printf(m, ", altivec supported"); +#endif /* CONFIG_ALTIVEC */ + + seq_printf(m, "\n"); + + /* + * Assume here that all clock rates are the same in a + * smp system. -- Cort + */ + seq_printf(m, "clock\t\t: %lu.%06luMHz\n", ppc_proc_freq / 1000000, + ppc_proc_freq % 1000000); + + seq_printf(m, "revision\t: %hd.%hd\n\n", maj, min); + + preempt_enable(); + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos <= NR_CPUS ? (void *)((*pos)+1) : NULL; +} +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} +static void c_stop(struct seq_file *m, void *v) +{ +} +struct seq_operations cpuinfo_op = { + .start =c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + +/* + * These three variables are used to save values passed to us by prom_init() + * via the device tree. The TCE variables are needed because with a memory_limit + * in force we may need to explicitly map the TCE are at the top of RAM. + */ +unsigned long memory_limit; +unsigned long tce_alloc_start; +unsigned long tce_alloc_end; + +#ifdef CONFIG_PPC_ISERIES +/* + * On iSeries we just parse the mem=X option from the command line. + * On pSeries it's a bit more complicated, see prom_init_mem() + */ +static int __init early_parsemem(char *p) +{ + if (!p) + return 0; + + memory_limit = ALIGN(memparse(p, &p), PAGE_SIZE); + + return 0; +} +early_param("mem", early_parsemem); +#endif /* CONFIG_PPC_ISERIES */ + +#ifdef CONFIG_PPC_MULTIPLATFORM +static int __init set_preferred_console(void) +{ + struct device_node *prom_stdout = NULL; + char *name; + u32 *spd; + int offset = 0; + + DBG(" -> set_preferred_console()\n"); + + /* The user has requested a console so this is already set up. */ + if (strstr(saved_command_line, "console=")) { + DBG(" console was specified !\n"); + return -EBUSY; + } + + if (!of_chosen) { + DBG(" of_chosen is NULL !\n"); + return -ENODEV; + } + /* We are getting a weird phandle from OF ... */ + /* ... So use the full path instead */ + name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + if (name == NULL) { + DBG(" no linux,stdout-path !\n"); + return -ENODEV; + } + prom_stdout = of_find_node_by_path(name); + if (!prom_stdout) { + DBG(" can't find stdout package %s !\n", name); + return -ENODEV; + } + DBG("stdout is %s\n", prom_stdout->full_name); + + name = (char *)get_property(prom_stdout, "name", NULL); + if (!name) { + DBG(" stdout package has no name !\n"); + goto not_found; + } + spd = (u32 *)get_property(prom_stdout, "current-speed", NULL); + + if (0) + ; +#ifdef CONFIG_SERIAL_8250_CONSOLE + else if (strcmp(name, "serial") == 0) { + int i; + u32 *reg = (u32 *)get_property(prom_stdout, "reg", &i); + if (i > 8) { + switch (reg[1]) { + case 0x3f8: + offset = 0; + break; + case 0x2f8: + offset = 1; + break; + case 0x898: + offset = 2; + break; + case 0x890: + offset = 3; + break; + default: + /* We dont recognise the serial port */ + goto not_found; + } + } + } +#endif /* CONFIG_SERIAL_8250_CONSOLE */ +#ifdef CONFIG_PPC_PSERIES + else if (strcmp(name, "vty") == 0) { + u32 *reg = (u32 *)get_property(prom_stdout, "reg", NULL); + char *compat = (char *)get_property(prom_stdout, "compatible", NULL); + + if (reg && compat && (strcmp(compat, "hvterm-protocol") == 0)) { + /* Host Virtual Serial Interface */ + int offset; + switch (reg[0]) { + case 0x30000000: + offset = 0; + break; + case 0x30000001: + offset = 1; + break; + default: + goto not_found; + } + of_node_put(prom_stdout); + DBG("Found hvsi console at offset %d\n", offset); + return add_preferred_console("hvsi", offset, NULL); + } else { + /* pSeries LPAR virtual console */ + of_node_put(prom_stdout); + DBG("Found hvc console\n"); + return add_preferred_console("hvc", 0, NULL); + } + } +#endif /* CONFIG_PPC_PSERIES */ +#ifdef CONFIG_SERIAL_PMACZILOG_CONSOLE + else if (strcmp(name, "ch-a") == 0) + offset = 0; + else if (strcmp(name, "ch-b") == 0) + offset = 1; +#endif /* CONFIG_SERIAL_PMACZILOG_CONSOLE */ + else + goto not_found; + of_node_put(prom_stdout); + + DBG("Found serial console at ttyS%d\n", offset); + + if (spd) { + static char __initdata opt[16]; + sprintf(opt, "%d", *spd); + return add_preferred_console("ttyS", offset, opt); + } else + return add_preferred_console("ttyS", offset, NULL); + + not_found: + DBG("No preferred console found !\n"); + of_node_put(prom_stdout); + return -ENODEV; +} +console_initcall(set_preferred_console); +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +#ifdef CONFIG_IRQSTACKS +static void __init irqstack_early_init(void) +{ + unsigned int i; + + /* + * interrupt stacks must be under 256MB, we cannot afford to take + * SLB misses on them. + */ + for_each_cpu(i) { + softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, + THREAD_SIZE, 0x10000000)); + hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, + THREAD_SIZE, 0x10000000)); + } +} +#else +#define irqstack_early_init() +#endif + +/* + * Stack space used when we detect a bad kernel stack pointer, and + * early in SMP boots before relocation is enabled. + */ +static void __init emergency_stack_init(void) +{ + unsigned long limit; + unsigned int i; + + /* + * Emergency stacks must be under 256MB, we cannot afford to take + * SLB misses on them. The ABI also requires them to be 128-byte + * aligned. + * + * Since we use these as temporary stacks during secondary CPU + * bringup, we need to get at them in real mode. This means they + * must also be within the RMO region. + */ + limit = min(0x10000000UL, lmb.rmo_size); + + for_each_cpu(i) + paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128, + limit)) + PAGE_SIZE; +} + +/* + * Called from setup_arch to initialize the bitmap of available + * syscalls in the systemcfg page + */ +void __init setup_syscall_map(void) +{ + unsigned int i, count64 = 0, count32 = 0; + extern unsigned long *sys_call_table; + extern unsigned long *sys_call_table32; + extern unsigned long sys_ni_syscall; + + + for (i = 0; i < __NR_syscalls; i++) { + if (sys_call_table[i] == sys_ni_syscall) + continue; + count64++; + systemcfg->syscall_map_64[i >> 5] |= 0x80000000UL >> (i & 0x1f); + } + for (i = 0; i < __NR_syscalls; i++) { + if (sys_call_table32[i] == sys_ni_syscall) + continue; + count32++; + systemcfg->syscall_map_32[i >> 5] |= 0x80000000UL >> (i & 0x1f); + } + printk(KERN_INFO "Syscall map setup, %d 32 bits and %d 64 bits syscalls\n", + count32, count64); +} + +/* + * Called into from start_kernel, after lock_kernel has been called. + * Initializes bootmem, which is unsed to manage page allocation until + * mem_init is called. + */ +void __init setup_arch(char **cmdline_p) +{ + extern void do_init_bootmem(void); + + ppc64_boot_msg(0x12, "Setup Arch"); + + *cmdline_p = cmd_line; + + /* + * Set cache line size based on type of cpu as a default. + * Systems with OF can look in the properties on the cpu node(s) + * for a possibly more accurate value. + */ + dcache_bsize = ppc64_caches.dline_size; + icache_bsize = ppc64_caches.iline_size; + + /* reboot on panic */ + panic_timeout = 180; + + if (ppc_md.panic) + notifier_chain_register(&panic_notifier_list, &ppc64_panic_block); + + init_mm.start_code = PAGE_OFFSET; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = klimit; + + irqstack_early_init(); + emergency_stack_init(); + + /* set up the bootmem stuff with available memory */ + do_init_bootmem(); + + /* initialize the syscall map in systemcfg */ + setup_syscall_map(); + + ppc_md.setup_arch(); + + /* Select the correct idle loop for the platform. */ + idle_setup(); + + paging_init(); + ppc64_boot_msg(0x15, "Setup Done"); +} + + +/* ToDo: do something useful if ppc_md is not yet setup. */ +#define PPC64_LINUX_FUNCTION 0x0f000000 +#define PPC64_IPL_MESSAGE 0xc0000000 +#define PPC64_TERM_MESSAGE 0xb0000000 +#define PPC64_ATTN_MESSAGE 0xa0000000 +#define PPC64_DUMP_MESSAGE 0xd0000000 + +static void ppc64_do_msg(unsigned int src, const char *msg) +{ + if (ppc_md.progress) { + char buf[32]; + + sprintf(buf, "%08x \n", src); + ppc_md.progress(buf, 0); + sprintf(buf, "%-16s", msg); + ppc_md.progress(buf, 0); + } +} + +/* Print a boot progress message. */ +void ppc64_boot_msg(unsigned int src, const char *msg) +{ + ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_IPL_MESSAGE|src, msg); + printk("[boot]%04x %s\n", src, msg); +} + +/* Print a termination message (print only -- does not stop the kernel) */ +void ppc64_terminate_msg(unsigned int src, const char *msg) +{ + ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_TERM_MESSAGE|src, msg); + printk("[terminate]%04x %s\n", src, msg); +} + +/* Print something that needs attention (device error, etc) */ +void ppc64_attention_msg(unsigned int src, const char *msg) +{ + ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_ATTN_MESSAGE|src, msg); + printk("[attention]%04x %s\n", src, msg); +} + +/* Print a dump progress message. */ +void ppc64_dump_msg(unsigned int src, const char *msg) +{ + ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_DUMP_MESSAGE|src, msg); + printk("[dump]%04x %s\n", src, msg); +} + +int set_spread_lpevents( char * str ) +{ + /* The parameter is the number of processors to share in processing lp events */ + unsigned long i; + unsigned long val = simple_strtoul( str, NULL, 0 ); + if ( ( val > 0 ) && ( val <= NR_CPUS ) ) { + for ( i=1; idefault_decr = tb_ticks_per_jiffy / decr_overclock_proc0; + lpaca->next_jiffy_update_tb = get_tb() + tb_ticks_per_jiffy; +} + +int set_decr_overclock_proc0( char * str ) +{ + unsigned long val = simple_strtoul( str, NULL, 0 ); + if ( ( val >= 1 ) && ( val <= 48 ) ) { + decr_overclock_proc0_set = 1; + decr_overclock_proc0 = val; + printk("proc 0 decrementer overclock factor of %ld\n", val); + } + else + printk("invalid proc 0 decrementer overclock factor of %ld\n", val); + return 1; +} + +int set_decr_overclock( char * str ) +{ + unsigned long val = simple_strtoul( str, NULL, 0 ); + if ( ( val >= 1 ) && ( val <= 48 ) ) { + decr_overclock_set = 1; + decr_overclock = val; + printk("decrementer overclock factor of %ld\n", val); + } + else + printk("invalid decrementer overclock factor of %ld\n", val); + return 1; + +} + +__setup("spread_lpevents=", set_spread_lpevents ); +__setup("decr_overclock_proc0=", set_decr_overclock_proc0 ); +__setup("decr_overclock=", set_decr_overclock ); + +#ifndef CONFIG_PPC_ISERIES +/* + * This function can be used by platforms to "find" legacy serial ports. + * It works for "serial" nodes under an "isa" node, and will try to + * respect the "ibm,aix-loc" property if any. It works with up to 8 + * ports. + */ + +#define MAX_LEGACY_SERIAL_PORTS 8 +static struct plat_serial8250_port serial_ports[MAX_LEGACY_SERIAL_PORTS+1]; +static unsigned int old_serial_count; + +void __init generic_find_legacy_serial_ports(u64 *physport, + unsigned int *default_speed) +{ + struct device_node *np; + u32 *sizeprop; + + struct isa_reg_property { + u32 space; + u32 address; + u32 size; + }; + struct pci_reg_property { + struct pci_address addr; + u32 size_hi; + u32 size_lo; + }; + + DBG(" -> generic_find_legacy_serial_port()\n"); + + *physport = 0; + if (default_speed) + *default_speed = 0; + + np = of_find_node_by_path("/"); + if (!np) + return; + + /* First fill our array */ + for (np = NULL; (np = of_find_node_by_type(np, "serial"));) { + struct device_node *isa, *pci; + struct isa_reg_property *reg; + unsigned long phys_size, addr_size, io_base; + u32 *rangesp; + u32 *interrupts, *clk, *spd; + char *typep; + int index, rlen, rentsize; + + /* Ok, first check if it's under an "isa" parent */ + isa = of_get_parent(np); + if (!isa || strcmp(isa->name, "isa")) { + DBG("%s: no isa parent found\n", np->full_name); + continue; + } + + /* Now look for an "ibm,aix-loc" property that gives us ordering + * if any... + */ + typep = (char *)get_property(np, "ibm,aix-loc", NULL); + + /* Get the ISA port number */ + reg = (struct isa_reg_property *)get_property(np, "reg", NULL); + if (reg == NULL) + goto next_port; + /* We assume the interrupt number isn't translated ... */ + interrupts = (u32 *)get_property(np, "interrupts", NULL); + /* get clock freq. if present */ + clk = (u32 *)get_property(np, "clock-frequency", NULL); + /* get default speed if present */ + spd = (u32 *)get_property(np, "current-speed", NULL); + /* Default to locate at end of array */ + index = old_serial_count; /* end of the array by default */ + + /* If we have a location index, then use it */ + if (typep && *typep == 'S') { + index = simple_strtol(typep+1, NULL, 0) - 1; + /* if index is out of range, use end of array instead */ + if (index >= MAX_LEGACY_SERIAL_PORTS) + index = old_serial_count; + /* if our index is still out of range, that mean that + * array is full, we could scan for a free slot but that + * make little sense to bother, just skip the port + */ + if (index >= MAX_LEGACY_SERIAL_PORTS) + goto next_port; + if (index >= old_serial_count) + old_serial_count = index + 1; + /* Check if there is a port who already claimed our slot */ + if (serial_ports[index].iobase != 0) { + /* if we still have some room, move it, else override */ + if (old_serial_count < MAX_LEGACY_SERIAL_PORTS) { + DBG("Moved legacy port %d -> %d\n", index, + old_serial_count); + serial_ports[old_serial_count++] = + serial_ports[index]; + } else { + DBG("Replacing legacy port %d\n", index); + } + } + } + if (index >= MAX_LEGACY_SERIAL_PORTS) + goto next_port; + if (index >= old_serial_count) + old_serial_count = index + 1; + + /* Now fill the entry */ + memset(&serial_ports[index], 0, sizeof(struct plat_serial8250_port)); + serial_ports[index].uartclk = clk ? *clk : BASE_BAUD * 16; + serial_ports[index].iobase = reg->address; + serial_ports[index].irq = interrupts ? interrupts[0] : 0; + serial_ports[index].flags = ASYNC_BOOT_AUTOCONF; + + DBG("Added legacy port, index: %d, port: %x, irq: %d, clk: %d\n", + index, + serial_ports[index].iobase, + serial_ports[index].irq, + serial_ports[index].uartclk); + + /* Get phys address of IO reg for port 1 */ + if (index != 0) + goto next_port; + + pci = of_get_parent(isa); + if (!pci) { + DBG("%s: no pci parent found\n", np->full_name); + goto next_port; + } + + rangesp = (u32 *)get_property(pci, "ranges", &rlen); + if (rangesp == NULL) { + of_node_put(pci); + goto next_port; + } + rlen /= 4; + + /* we need the #size-cells of the PCI bridge node itself */ + phys_size = 1; + sizeprop = (u32 *)get_property(pci, "#size-cells", NULL); + if (sizeprop != NULL) + phys_size = *sizeprop; + /* we need the parent #addr-cells */ + addr_size = prom_n_addr_cells(pci); + rentsize = 3 + addr_size + phys_size; + io_base = 0; + for (;rlen >= rentsize; rlen -= rentsize,rangesp += rentsize) { + if (((rangesp[0] >> 24) & 0x3) != 1) + continue; /* not IO space */ + io_base = rangesp[3]; + if (addr_size == 2) + io_base = (io_base << 32) | rangesp[4]; + } + if (io_base != 0) { + *physport = io_base + reg->address; + if (default_speed && spd) + *default_speed = *spd; + } + of_node_put(pci); + next_port: + of_node_put(isa); + } + + DBG(" <- generic_find_legacy_serial_port()\n"); +} + +static struct platform_device serial_device = { + .name = "serial8250", + .id = 0, + .dev = { + .platform_data = serial_ports, + }, +}; + +static int __init serial_dev_init(void) +{ + return platform_device_register(&serial_device); +} +arch_initcall(serial_dev_init); + +#endif /* CONFIG_PPC_ISERIES */ + +int check_legacy_ioport(unsigned long base_port) +{ + if (ppc_md.check_legacy_ioport == NULL) + return 0; + return ppc_md.check_legacy_ioport(base_port); +} +EXPORT_SYMBOL(check_legacy_ioport); + +#ifdef CONFIG_XMON +static int __init early_xmon(char *p) +{ + /* ensure xmon is enabled */ + if (p) { + if (strncmp(p, "on", 2) == 0) + xmon_init(); + if (strncmp(p, "early", 5) != 0) + return 0; + } + xmon_init(); + debugger(NULL); + + return 0; +} +early_param("xmon", early_xmon); +#endif + +void cpu_die(void) +{ + if (ppc_md.cpu_die) + ppc_md.cpu_die(); +} diff --git a/arch/ppc64/kernel/signal.c b/arch/ppc64/kernel/signal.c new file mode 100644 index 00000000000..a95a2b49a1d --- /dev/null +++ b/arch/ppc64/kernel/signal.c @@ -0,0 +1,575 @@ +/* + * linux/arch/ppc64/kernel/signal.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/i386/kernel/signal.c" + * Copyright (C) 1991, 1992 Linus Torvalds + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_SIG 0 + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +#ifndef MIN +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +#define GP_REGS_SIZE MIN(sizeof(elf_gregset_t), sizeof(struct pt_regs)) +#define FP_REGS_SIZE sizeof(elf_fpregset_t) + +#define TRAMP_TRACEBACK 3 +#define TRAMP_SIZE 6 + +/* + * When we have signals to deliver, we set up on the user stack, + * going down from the original stack pointer: + * 1) a rt_sigframe struct which contains the ucontext + * 2) a gap of __SIGNAL_FRAMESIZE bytes which acts as a dummy caller + * frame for the signal handler. + */ + +struct rt_sigframe { + /* sys_rt_sigreturn requires the ucontext be the first field */ + struct ucontext uc; + unsigned long _unused[2]; + unsigned int tramp[TRAMP_SIZE]; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + /* 64 bit ABI allows for 288 bytes below sp before decrementing it. */ + char abigap[288]; +} __attribute__ ((aligned (16))); + + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ +long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, int p3, int p4, + int p6, int p7, struct pt_regs *regs) +{ + sigset_t saveset, newset; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&newset, unewset, sizeof(newset))) + return -EFAULT; + sigdelsetmask(&newset, ~_BLOCKABLE); + + spin_lock_irq(¤t->sighand->siglock); + saveset = current->blocked; + current->blocked = newset; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + regs->result = -EINTR; + regs->gpr[3] = EINTR; + regs->ccr |= 0x10000000; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(&saveset, regs)) + return 0; + } +} + +long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r5, + unsigned long r6, unsigned long r7, unsigned long r8, + struct pt_regs *regs) +{ + return do_sigaltstack(uss, uoss, regs->gpr[1]); +} + + +/* + * Set up the sigcontext for the signal frame. + */ + +static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, + int signr, sigset_t *set, unsigned long handler) +{ + /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the + * process never used altivec yet (MSR_VEC is zero in pt_regs of + * the context). This is very important because we must ensure we + * don't lose the VRSAVE content that may have been set prior to + * the process doing its first vector operation + * Userland shall check AT_HWCAP to know wether it can rely on the + * v_regs pointer or not + */ +#ifdef CONFIG_ALTIVEC + elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *)(((unsigned long)sc->vmx_reserve + 15) & ~0xful); +#endif + long err = 0; + + flush_fp_to_thread(current); + + /* Make sure signal doesn't get spurrious FP exceptions */ + current->thread.fpscr = 0; + +#ifdef CONFIG_ALTIVEC + err |= __put_user(v_regs, &sc->v_regs); + + /* save altivec registers */ + if (current->thread.used_vr) { + flush_altivec_to_thread(current); + /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ + err |= __copy_to_user(v_regs, current->thread.vr, 33 * sizeof(vector128)); + /* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg) + * contains valid data. + */ + regs->msr |= MSR_VEC; + } + /* We always copy to/from vrsave, it's 0 if we don't have or don't + * use altivec. + */ + err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); +#else /* CONFIG_ALTIVEC */ + err |= __put_user(0, &sc->v_regs); +#endif /* CONFIG_ALTIVEC */ + err |= __put_user(&sc->gp_regs, &sc->regs); + err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); + err |= __copy_to_user(&sc->fp_regs, ¤t->thread.fpr, FP_REGS_SIZE); + err |= __put_user(signr, &sc->signal); + err |= __put_user(handler, &sc->handler); + if (set != NULL) + err |= __put_user(set->sig[0], &sc->oldmask); + + return err; +} + +/* + * Restore the sigcontext from the signal frame. + */ + +static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, + struct sigcontext __user *sc) +{ +#ifdef CONFIG_ALTIVEC + elf_vrreg_t __user *v_regs; +#endif + unsigned long err = 0; + unsigned long save_r13 = 0; + elf_greg_t *gregs = (elf_greg_t *)regs; +#ifdef CONFIG_ALTIVEC + unsigned long msr; +#endif + int i; + + /* If this is not a signal return, we preserve the TLS in r13 */ + if (!sig) + save_r13 = regs->gpr[13]; + + /* copy everything before MSR */ + err |= __copy_from_user(regs, &sc->gp_regs, + PT_MSR*sizeof(unsigned long)); + + /* skip MSR and SOFTE */ + for (i = PT_MSR+1; i <= PT_RESULT; i++) { + if (i == PT_SOFTE) + continue; + err |= __get_user(gregs[i], &sc->gp_regs[i]); + } + + if (!sig) + regs->gpr[13] = save_r13; + err |= __copy_from_user(¤t->thread.fpr, &sc->fp_regs, FP_REGS_SIZE); + if (set != NULL) + err |= __get_user(set->sig[0], &sc->oldmask); + +#ifdef CONFIG_ALTIVEC + err |= __get_user(v_regs, &sc->v_regs); + err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + if (err) + return err; + /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ + if (v_regs != 0 && (msr & MSR_VEC) != 0) + err |= __copy_from_user(current->thread.vr, v_regs, + 33 * sizeof(vector128)); + else if (current->thread.used_vr) + memset(current->thread.vr, 0, 33 * sizeof(vector128)); + /* Always get VRSAVE back */ + if (v_regs != 0) + err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); + else + current->thread.vrsave = 0; +#endif /* CONFIG_ALTIVEC */ + +#ifndef CONFIG_SMP + preempt_disable(); + if (last_task_used_math == current) + last_task_used_math = NULL; + if (last_task_used_altivec == current) + last_task_used_altivec = NULL; + preempt_enable(); +#endif + /* Force reload of FP/VEC */ + regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC); + + return err; +} + +/* + * Allocate space for the signal frame + */ +static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + size_t frame_size) +{ + unsigned long newsp; + + /* Default to using normal stack */ + newsp = regs->gpr[1]; + + if (ka->sa.sa_flags & SA_ONSTACK) { + if (! on_sig_stack(regs->gpr[1])) + newsp = (current->sas_ss_sp + current->sas_ss_size); + } + + return (void __user *)((newsp - frame_size) & -16ul); +} + +/* + * Setup the trampoline code on the stack + */ +static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) +{ + int i; + long err = 0; + + /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */ + err |= __put_user(0x38210000UL | (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]); + /* li r0, __NR_[rt_]sigreturn| */ + err |= __put_user(0x38000000UL | (syscall & 0xffff), &tramp[1]); + /* sc */ + err |= __put_user(0x44000002UL, &tramp[2]); + + /* Minimal traceback info */ + for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++) + err |= __put_user(0, &tramp[i]); + + if (!err) + flush_icache_range((unsigned long) &tramp[0], + (unsigned long) &tramp[TRAMP_SIZE]); + + return err; +} + +/* + * Restore the user process's signal mask (also used by signal32.c) + */ +void restore_sigmask(sigset_t *set) +{ + sigdelsetmask(set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = *set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); +} + + +/* + * Handle {get,set,swap}_context operations + */ +int sys_swapcontext(struct ucontext __user *old_ctx, + struct ucontext __user *new_ctx, + long ctx_size, long r6, long r7, long r8, struct pt_regs *regs) +{ + unsigned char tmp; + sigset_t set; + + /* Context size is for future use. Right now, we only make sure + * we are passed something we understand + */ + if (ctx_size < sizeof(struct ucontext)) + return -EINVAL; + + if (old_ctx != NULL) { + if (!access_ok(VERIFY_WRITE, old_ctx, sizeof(*old_ctx)) + || setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0) + || __copy_to_user(&old_ctx->uc_sigmask, + ¤t->blocked, sizeof(sigset_t))) + return -EFAULT; + } + if (new_ctx == NULL) + return 0; + if (!access_ok(VERIFY_READ, new_ctx, sizeof(*new_ctx)) + || __get_user(tmp, (u8 __user *) new_ctx) + || __get_user(tmp, (u8 __user *) (new_ctx + 1) - 1)) + return -EFAULT; + + /* + * If we get a fault copying the context into the kernel's + * image of the user's registers, we can't just return -EFAULT + * because the user's registers will be corrupted. For instance + * the NIP value may have been updated but not some of the + * other registers. Given that we have done the access_ok + * and successfully read the first and last bytes of the region + * above, this should only happen in an out-of-memory situation + * or if another thread unmaps the region containing the context. + * We kill the task with a SIGSEGV in this situation. + */ + + if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set))) + do_exit(SIGSEGV); + restore_sigmask(&set); + if (restore_sigcontext(regs, NULL, 0, &new_ctx->uc_mcontext)) + do_exit(SIGSEGV); + + /* This returns like rt_sigreturn */ + return 0; +} + + +/* + * Do a signal return; undo the signal stack. + */ + +int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7, unsigned long r8, + struct pt_regs *regs) +{ + struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1]; + sigset_t set; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + if (!access_ok(VERIFY_READ, uc, sizeof(*uc))) + goto badframe; + + if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) + goto badframe; + restore_sigmask(&set); + if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext)) + goto badframe; + + /* do_sigaltstack expects a __user pointer and won't modify + * what's in there anyway + */ + do_sigaltstack(&uc->uc_stack, NULL, regs->gpr[1]); + + return regs->result; + +badframe: +#if DEBUG_SIG + printk("badframe in sys_rt_sigreturn, regs=%p uc=%p &uc->uc_mcontext=%p\n", + regs, uc, &uc->uc_mcontext); +#endif + force_sig(SIGSEGV, current); + return 0; +} + +static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + /* Handler is *really* a pointer to the function descriptor for + * the signal routine. The first entry in the function + * descriptor is the entry address of signal and the second + * entry is the TOC value we need to use. + */ + func_descr_t __user *funct_desc_ptr; + struct rt_sigframe __user *frame; + unsigned long newsp = 0; + long err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto badframe; + + err |= __put_user(&frame->info, &frame->pinfo); + err |= __put_user(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); + if (err) + goto badframe; + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->gpr[1]), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, NULL, + (unsigned long)ka->sa.sa_handler); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto badframe; + + /* Set up to return from userspace. */ + if (vdso64_rt_sigtramp && current->thread.vdso_base) { + regs->link = current->thread.vdso_base + vdso64_rt_sigtramp; + } else { + err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); + if (err) + goto badframe; + regs->link = (unsigned long) &frame->tramp[0]; + } + funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler; + + /* Allocate a dummy caller frame for the signal handler. */ + newsp = (unsigned long)frame - __SIGNAL_FRAMESIZE; + err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); + + /* Set up "regs" so we "return" to the signal handler. */ + err |= get_user(regs->nip, &funct_desc_ptr->entry); + regs->gpr[1] = newsp; + err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); + regs->gpr[3] = signr; + regs->result = 0; + if (ka->sa.sa_flags & SA_SIGINFO) { + err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo); + err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc); + regs->gpr[6] = (unsigned long) frame; + } else { + regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext; + } + if (err) + goto badframe; + + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + + return 1; + +badframe: +#if DEBUG_SIG + printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n", + regs, frame, newsp); +#endif + force_sigsegv(signr, current); + return 0; +} + + +/* + * OK, we're invoking a handler + */ +static int handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) +{ + int ret; + + /* Set up Signal Frame */ + ret = setup_rt_frame(sig, ka, info, oldset, regs); + + if (ret && !(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + sigaddset(¤t->blocked,sig); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + } + + return ret; +} + +static inline void syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) +{ + switch ((int)regs->result) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + /* ERESTARTNOHAND means that the syscall should only be + * restarted if there was no handler for the signal, and since + * we only get here if there is a handler, we dont restart. + */ + regs->result = -EINTR; + break; + case -ERESTARTSYS: + /* ERESTARTSYS means to restart the syscall if there is no + * handler or the handler was registered with SA_RESTART + */ + if (!(ka->sa.sa_flags & SA_RESTART)) { + regs->result = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + /* ERESTARTNOINTR means that the syscall should be + * called again after the signal handler returns. + */ + regs->gpr[3] = regs->orig_gpr3; + regs->nip -= 4; + regs->result = 0; + break; + } +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ +int do_signal(sigset_t *oldset, struct pt_regs *regs) +{ + siginfo_t info; + int signr; + struct k_sigaction ka; + + /* + * If the current thread is 32 bit - invoke the + * 32 bit signal handling code + */ + if (test_thread_flag(TIF_32BIT)) + return do_signal32(oldset, regs); + + if (!oldset) + oldset = ¤t->blocked; + + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + if (signr > 0) { + /* Whee! Actually deliver the signal. */ + if (TRAP(regs) == 0x0C00) + syscall_restart(regs, &ka); + return handle_signal(signr, &ka, &info, oldset, regs); + } + + if (TRAP(regs) == 0x0C00) { /* System Call! */ + if ((int)regs->result == -ERESTARTNOHAND || + (int)regs->result == -ERESTARTSYS || + (int)regs->result == -ERESTARTNOINTR) { + regs->gpr[3] = regs->orig_gpr3; + regs->nip -= 4; /* Back up & retry system call */ + regs->result = 0; + } else if ((int)regs->result == -ERESTART_RESTARTBLOCK) { + regs->gpr[0] = __NR_restart_syscall; + regs->nip -= 4; + regs->result = 0; + } + } + + return 0; +} +EXPORT_SYMBOL(do_signal); diff --git a/arch/ppc64/kernel/signal32.c b/arch/ppc64/kernel/signal32.c new file mode 100644 index 00000000000..b0e167db6af --- /dev/null +++ b/arch/ppc64/kernel/signal32.c @@ -0,0 +1,989 @@ +/* + * signal32.c: Support 32bit signal syscalls. + * + * Copyright (C) 2001 IBM + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * + * These routines maintain argument size conversion between 32bit and 64bit + * environment. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_SIG 0 + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +#define GP_REGS_SIZE32 min(sizeof(elf_gregset_t32), sizeof(struct pt_regs32)) + +/* + * When we have signals to deliver, we set up on the + * user stack, going down from the original stack pointer: + * a sigregs32 struct + * a sigcontext32 struct + * a gap of __SIGNAL_FRAMESIZE32 bytes + * + * Each of these things must be a multiple of 16 bytes in size. + * + */ +struct sigregs32 { + struct mcontext32 mctx; /* all the register values */ + /* + * Programs using the rs6000/xcoff abi can save up to 19 gp + * regs and 18 fp regs below sp before decrementing it. + */ + int abigap[56]; +}; + +/* We use the mc_pad field for the signal return trampoline. */ +#define tramp mc_pad + +/* + * When we have rt signals to deliver, we set up on the + * user stack, going down from the original stack pointer: + * one rt_sigframe32 struct (siginfo + ucontext + ABI gap) + * a gap of __SIGNAL_FRAMESIZE32+16 bytes + * (the +16 is to get the siginfo and ucontext32 in the same + * positions as in older kernels). + * + * Each of these things must be a multiple of 16 bytes in size. + * + */ +struct rt_sigframe32 { + compat_siginfo_t info; + struct ucontext32 uc; + /* + * Programs using the rs6000/xcoff abi can save up to 19 gp + * regs and 18 fp regs below sp before decrementing it. + */ + int abigap[56]; +}; + + +/* + * Common utility functions used by signal and context support + * + */ + +/* + * Restore the user process's signal mask + * (implemented in signal.c) + */ +extern void restore_sigmask(sigset_t *set); + +/* + * Functions for flipping sigsets (thanks to brain dead generic + * implementation that makes things simple for little endian only + */ +static inline void compat_from_sigset(compat_sigset_t *compat, sigset_t *set) +{ + switch (_NSIG_WORDS) { + case 4: compat->sig[5] = set->sig[3] & 0xffffffffull ; + compat->sig[7] = set->sig[3] >> 32; + case 3: compat->sig[4] = set->sig[2] & 0xffffffffull ; + compat->sig[5] = set->sig[2] >> 32; + case 2: compat->sig[2] = set->sig[1] & 0xffffffffull ; + compat->sig[3] = set->sig[1] >> 32; + case 1: compat->sig[0] = set->sig[0] & 0xffffffffull ; + compat->sig[1] = set->sig[0] >> 32; + } +} + +static inline void sigset_from_compat(sigset_t *set, compat_sigset_t *compat) +{ + switch (_NSIG_WORDS) { + case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32); + case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32); + case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32); + case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32); + } +} + + +/* + * Save the current user registers on the user stack. + * We only save the altivec registers if the process has used + * altivec instructions at some point. + */ +static int save_user_regs(struct pt_regs *regs, struct mcontext32 __user *frame, int sigret) +{ + elf_greg_t64 *gregs = (elf_greg_t64 *)regs; + int i, err = 0; + + /* Make sure floating point registers are stored in regs */ + flush_fp_to_thread(current); + + /* save general and floating-point registers */ + for (i = 0; i <= PT_RESULT; i ++) + err |= __put_user((unsigned int)gregs[i], &frame->mc_gregs[i]); + err |= __copy_to_user(&frame->mc_fregs, current->thread.fpr, + ELF_NFPREG * sizeof(double)); + if (err) + return 1; + + current->thread.fpscr = 0; /* turn off all fp exceptions */ + +#ifdef CONFIG_ALTIVEC + /* save altivec registers */ + if (current->thread.used_vr) { + flush_altivec_to_thread(current); + if (__copy_to_user(&frame->mc_vregs, current->thread.vr, + ELF_NVRREG32 * sizeof(vector128))) + return 1; + /* set MSR_VEC in the saved MSR value to indicate that + frame->mc_vregs contains valid data */ + if (__put_user(regs->msr | MSR_VEC, &frame->mc_gregs[PT_MSR])) + return 1; + } + /* else assert((regs->msr & MSR_VEC) == 0) */ + + /* We always copy to/from vrsave, it's 0 if we don't have or don't + * use altivec. Since VSCR only contains 32 bits saved in the least + * significant bits of a vector, we "cheat" and stuff VRSAVE in the + * most significant bits of that same vector. --BenH + */ + if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32])) + return 1; +#endif /* CONFIG_ALTIVEC */ + + if (sigret) { + /* Set up the sigreturn trampoline: li r0,sigret; sc */ + if (__put_user(0x38000000UL + sigret, &frame->tramp[0]) + || __put_user(0x44000002UL, &frame->tramp[1])) + return 1; + flush_icache_range((unsigned long) &frame->tramp[0], + (unsigned long) &frame->tramp[2]); + } + + return 0; +} + +/* + * Restore the current user register values from the user stack, + * (except for MSR). + */ +static long restore_user_regs(struct pt_regs *regs, + struct mcontext32 __user *sr, int sig) +{ + elf_greg_t64 *gregs = (elf_greg_t64 *)regs; + int i; + long err = 0; + unsigned int save_r2 = 0; +#ifdef CONFIG_ALTIVEC + unsigned long msr; +#endif + + /* + * restore general registers but not including MSR or SOFTE. Also + * take care of keeping r2 (TLS) intact if not a signal + */ + if (!sig) + save_r2 = (unsigned int)regs->gpr[2]; + for (i = 0; i <= PT_RESULT; i++) { + if ((i == PT_MSR) || (i == PT_SOFTE)) + continue; + err |= __get_user(gregs[i], &sr->mc_gregs[i]); + } + if (!sig) + regs->gpr[2] = (unsigned long) save_r2; + if (err) + return 1; + + /* force the process to reload the FP registers from + current->thread when it next does FP instructions */ + regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); + if (__copy_from_user(current->thread.fpr, &sr->mc_fregs, + sizeof(sr->mc_fregs))) + return 1; + +#ifdef CONFIG_ALTIVEC + /* force the process to reload the altivec registers from + current->thread when it next does altivec instructions */ + regs->msr &= ~MSR_VEC; + if (!__get_user(msr, &sr->mc_gregs[PT_MSR]) && (msr & MSR_VEC) != 0) { + /* restore altivec registers from the stack */ + if (__copy_from_user(current->thread.vr, &sr->mc_vregs, + sizeof(sr->mc_vregs))) + return 1; + } else if (current->thread.used_vr) + memset(current->thread.vr, 0, ELF_NVRREG32 * sizeof(vector128)); + + /* Always get VRSAVE back */ + if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32])) + return 1; +#endif /* CONFIG_ALTIVEC */ + +#ifndef CONFIG_SMP + preempt_disable(); + if (last_task_used_math == current) + last_task_used_math = NULL; + if (last_task_used_altivec == current) + last_task_used_altivec = NULL; + preempt_enable(); +#endif + return 0; +} + + +/* + * Start of nonRT signal support + * + * sigset_t is 32 bits for non-rt signals + * + * System Calls + * sigaction sys32_sigaction + * sigreturn sys32_sigreturn + * + * Note sigsuspend has no special 32 bit routine - uses the 64 bit routine + * + * Other routines + * setup_frame32 + */ + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ +long sys32_sigsuspend(old_sigset_t mask, int p2, int p3, int p4, int p6, int p7, + struct pt_regs *regs) +{ + sigset_t saveset; + + mask &= _BLOCKABLE; + spin_lock_irq(¤t->sighand->siglock); + saveset = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + regs->result = -EINTR; + regs->gpr[3] = EINTR; + regs->ccr |= 0x10000000; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal32(&saveset, regs)) + /* + * Returning 0 means we return to userspace via + * ret_from_except and thus restore all user + * registers from *regs. This is what we need + * to do when a signal has been delivered. + */ + return 0; + } +} + +long sys32_sigaction(int sig, struct old_sigaction32 __user *act, + struct old_sigaction32 __user *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (sig < 0) + sig = -sig; + + if (act) { + compat_old_sigset_t mask; + compat_uptr_t handler, restorer; + + if (get_user(handler, &act->sa_handler) || + __get_user(restorer, &act->sa_restorer) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) + return -EFAULT; + new_ka.sa.sa_handler = compat_ptr(handler); + new_ka.sa.sa_restorer = compat_ptr(restorer); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + if (!ret && oact) { + if (put_user((long)old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user((long)old_ka.sa.sa_restorer, &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) + return -EFAULT; + } + + return ret; +} + + + +/* + * Start of RT signal support + * + * sigset_t is 64 bits for rt signals + * + * System Calls + * sigaction sys32_rt_sigaction + * sigpending sys32_rt_sigpending + * sigprocmask sys32_rt_sigprocmask + * sigreturn sys32_rt_sigreturn + * sigqueueinfo sys32_rt_sigqueueinfo + * sigsuspend sys32_rt_sigsuspend + * + * Other routines + * setup_rt_frame32 + * copy_siginfo_to_user32 + * siginfo32to64 + */ + + +long sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, + struct sigaction32 __user *oact, size_t sigsetsize) +{ + struct k_sigaction new_ka, old_ka; + int ret; + compat_sigset_t set32; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + + if (act) { + compat_uptr_t handler; + + ret = get_user(handler, &act->sa_handler); + new_ka.sa.sa_handler = compat_ptr(handler); + ret |= __copy_from_user(&set32, &act->sa_mask, + sizeof(compat_sigset_t)); + sigset_from_compat(&new_ka.sa.sa_mask, &set32); + ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); + if (ret) + return -EFAULT; + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + if (!ret && oact) { + compat_from_sigset(&set32, &old_ka.sa.sa_mask); + ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler); + ret |= __copy_to_user(&oact->sa_mask, &set32, + sizeof(compat_sigset_t)); + ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + } + return ret; +} + +/* + * Note: it is necessary to treat how as an unsigned int, with the + * corresponding cast to a signed int to insure that the proper + * conversion (sign extension) between the register representation + * of a signed int (msr in 32-bit mode) and the register representation + * of a signed int (msr in 64-bit mode) is performed. + */ +long sys32_rt_sigprocmask(u32 how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, size_t sigsetsize) +{ + sigset_t s; + sigset_t __user *up; + compat_sigset_t s32; + int ret; + mm_segment_t old_fs = get_fs(); + + if (set) { + if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) + return -EFAULT; + sigset_from_compat(&s, &s32); + } + + set_fs(KERNEL_DS); + /* This is valid because of the set_fs() */ + up = (sigset_t __user *) &s; + ret = sys_rt_sigprocmask((int)how, set ? up : NULL, oset ? up : NULL, + sigsetsize); + set_fs(old_fs); + if (ret) + return ret; + if (oset) { + compat_from_sigset(&s32, &s); + if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) + return -EFAULT; + } + return 0; +} + +long sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize) +{ + sigset_t s; + compat_sigset_t s32; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + /* The __user pointer cast is valid because of the set_fs() */ + ret = sys_rt_sigpending((sigset_t __user *) &s, sigsetsize); + set_fs(old_fs); + if (!ret) { + compat_from_sigset(&s32, &s); + if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) + return -EFAULT; + } + return ret; +} + + +int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s) +{ + int err; + + if (!access_ok (VERIFY_WRITE, d, sizeof(*d))) + return -EFAULT; + + /* If you change siginfo_t structure, please be sure + * this code is fixed accordingly. + * It should never copy any pad contained in the structure + * to avoid security leaks, but must copy the generic + * 3 ints plus the relevant union member. + * This routine must convert siginfo from 64bit to 32bit as well + * at the same time. + */ + err = __put_user(s->si_signo, &d->si_signo); + err |= __put_user(s->si_errno, &d->si_errno); + err |= __put_user((short)s->si_code, &d->si_code); + if (s->si_code < 0) + err |= __copy_to_user(&d->_sifields._pad, &s->_sifields._pad, + SI_PAD_SIZE32); + else switch(s->si_code >> 16) { + case __SI_CHLD >> 16: + err |= __put_user(s->si_pid, &d->si_pid); + err |= __put_user(s->si_uid, &d->si_uid); + err |= __put_user(s->si_utime, &d->si_utime); + err |= __put_user(s->si_stime, &d->si_stime); + err |= __put_user(s->si_status, &d->si_status); + break; + case __SI_FAULT >> 16: + err |= __put_user((unsigned int)(unsigned long)s->si_addr, + &d->si_addr); + break; + case __SI_POLL >> 16: + err |= __put_user(s->si_band, &d->si_band); + err |= __put_user(s->si_fd, &d->si_fd); + break; + case __SI_TIMER >> 16: + err |= __put_user(s->si_tid, &d->si_tid); + err |= __put_user(s->si_overrun, &d->si_overrun); + err |= __put_user(s->si_int, &d->si_int); + break; + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: + err |= __put_user(s->si_int, &d->si_int); + /* fallthrough */ + case __SI_KILL >> 16: + default: + err |= __put_user(s->si_pid, &d->si_pid); + err |= __put_user(s->si_uid, &d->si_uid); + break; + } + return err; +} + +/* + * Note: it is necessary to treat pid and sig as unsigned ints, with the + * corresponding cast to a signed int to insure that the proper conversion + * (sign extension) between the register representation of a signed int + * (msr in 32-bit mode) and the register representation of a signed int + * (msr in 64-bit mode) is performed. + */ +long sys32_rt_sigqueueinfo(u32 pid, u32 sig, compat_siginfo_t __user *uinfo) +{ + siginfo_t info; + int ret; + mm_segment_t old_fs = get_fs(); + + if (copy_from_user (&info, uinfo, 3*sizeof(int)) || + copy_from_user (info._sifields._pad, uinfo->_sifields._pad, SI_PAD_SIZE32)) + return -EFAULT; + set_fs (KERNEL_DS); + /* The __user pointer cast is valid becasuse of the set_fs() */ + ret = sys_rt_sigqueueinfo((int)pid, (int)sig, (siginfo_t __user *) &info); + set_fs (old_fs); + return ret; +} + +int sys32_rt_sigsuspend(compat_sigset_t __user * unewset, size_t sigsetsize, int p3, + int p4, int p6, int p7, struct pt_regs *regs) +{ + sigset_t saveset, newset; + compat_sigset_t s32; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&s32, unewset, sizeof(s32))) + return -EFAULT; + + /* + * Swap the 2 words of the 64-bit sigset_t (they are stored + * in the "wrong" endian in 32-bit user storage). + */ + sigset_from_compat(&newset, &s32); + + sigdelsetmask(&newset, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + saveset = current->blocked; + current->blocked = newset; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + regs->result = -EINTR; + regs->gpr[3] = EINTR; + regs->ccr |= 0x10000000; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal32(&saveset, regs)) + /* + * Returning 0 means we return to userspace via + * ret_from_except and thus restore all user + * registers from *regs. This is what we need + * to do when a signal has been delivered. + */ + return 0; + } +} + +/* + * Start Alternate signal stack support + * + * System Calls + * sigaltatck sys32_sigaltstack + */ + +int sys32_sigaltstack(u32 __new, u32 __old, int r5, + int r6, int r7, int r8, struct pt_regs *regs) +{ + stack_32_t __user * newstack = (stack_32_t __user *)(long) __new; + stack_32_t __user * oldstack = (stack_32_t __user *)(long) __old; + stack_t uss, uoss; + int ret; + mm_segment_t old_fs; + unsigned long sp; + compat_uptr_t ss_sp; + + /* + * set sp to the user stack on entry to the system call + * the system call router sets R9 to the saved registers + */ + sp = regs->gpr[1]; + + /* Put new stack info in local 64 bit stack struct */ + if (newstack) { + if (get_user(ss_sp, &newstack->ss_sp) || + __get_user(uss.ss_flags, &newstack->ss_flags) || + __get_user(uss.ss_size, &newstack->ss_size)) + return -EFAULT; + uss.ss_sp = compat_ptr(ss_sp); + } + + old_fs = get_fs(); + set_fs(KERNEL_DS); + /* The __user pointer casts are valid because of the set_fs() */ + ret = do_sigaltstack( + newstack ? (stack_t __user *) &uss : NULL, + oldstack ? (stack_t __user *) &uoss : NULL, + sp); + set_fs(old_fs); + /* Copy the stack information to the user output buffer */ + if (!ret && oldstack && + (put_user((long)uoss.ss_sp, &oldstack->ss_sp) || + __put_user(uoss.ss_flags, &oldstack->ss_flags) || + __put_user(uoss.ss_size, &oldstack->ss_size))) + return -EFAULT; + return ret; +} + + +/* + * Set up a signal frame for a "real-time" signal handler + * (one which gets siginfo). + */ +static int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs * regs, unsigned long newsp) +{ + struct rt_sigframe32 __user *rt_sf; + struct mcontext32 __user *frame; + unsigned long origsp = newsp; + compat_sigset_t c_oldset; + + /* Set up Signal Frame */ + /* Put a Real Time Context onto stack */ + newsp -= sizeof(*rt_sf); + rt_sf = (struct rt_sigframe32 __user *)newsp; + + /* create a stack frame for the caller of the handler */ + newsp -= __SIGNAL_FRAMESIZE32 + 16; + + if (!access_ok(VERIFY_WRITE, (void __user *)newsp, origsp - newsp)) + goto badframe; + + compat_from_sigset(&c_oldset, oldset); + + /* Put the siginfo & fill in most of the ucontext */ + if (copy_siginfo_to_user32(&rt_sf->info, info) + || __put_user(0, &rt_sf->uc.uc_flags) + || __put_user(0, &rt_sf->uc.uc_link) + || __put_user(current->sas_ss_sp, &rt_sf->uc.uc_stack.ss_sp) + || __put_user(sas_ss_flags(regs->gpr[1]), + &rt_sf->uc.uc_stack.ss_flags) + || __put_user(current->sas_ss_size, &rt_sf->uc.uc_stack.ss_size) + || __put_user((u32)(u64)&rt_sf->uc.uc_mcontext, &rt_sf->uc.uc_regs) + || __copy_to_user(&rt_sf->uc.uc_sigmask, &c_oldset, sizeof(c_oldset))) + goto badframe; + + /* Save user registers on the stack */ + frame = &rt_sf->uc.uc_mcontext; + if (put_user(regs->gpr[1], (unsigned long __user *)newsp)) + goto badframe; + + if (vdso32_rt_sigtramp && current->thread.vdso_base) { + if (save_user_regs(regs, frame, 0)) + goto badframe; + regs->link = current->thread.vdso_base + vdso32_rt_sigtramp; + } else { + if (save_user_regs(regs, frame, __NR_rt_sigreturn)) + goto badframe; + regs->link = (unsigned long) frame->tramp; + } + regs->gpr[1] = (unsigned long) newsp; + regs->gpr[3] = sig; + regs->gpr[4] = (unsigned long) &rt_sf->info; + regs->gpr[5] = (unsigned long) &rt_sf->uc; + regs->gpr[6] = (unsigned long) rt_sf; + regs->nip = (unsigned long) ka->sa.sa_handler; + regs->trap = 0; + regs->result = 0; + + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + + return 1; + +badframe: +#if DEBUG_SIG + printk("badframe in handle_rt_signal, regs=%p frame=%p newsp=%lx\n", + regs, frame, newsp); +#endif + force_sigsegv(sig, current); + return 0; +} + +static long do_setcontext32(struct ucontext32 __user *ucp, struct pt_regs *regs, int sig) +{ + compat_sigset_t c_set; + sigset_t set; + u32 mcp; + + if (__copy_from_user(&c_set, &ucp->uc_sigmask, sizeof(c_set)) + || __get_user(mcp, &ucp->uc_regs)) + return -EFAULT; + sigset_from_compat(&set, &c_set); + restore_sigmask(&set); + if (restore_user_regs(regs, (struct mcontext32 __user *)(u64)mcp, sig)) + return -EFAULT; + + return 0; +} + +/* + * Handle {get,set,swap}_context operations for 32 bits processes + */ + +long sys32_swapcontext(struct ucontext32 __user *old_ctx, + struct ucontext32 __user *new_ctx, + int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) +{ + unsigned char tmp; + compat_sigset_t c_set; + + /* Context size is for future use. Right now, we only make sure + * we are passed something we understand + */ + if (ctx_size < sizeof(struct ucontext32)) + return -EINVAL; + + if (old_ctx != NULL) { + compat_from_sigset(&c_set, ¤t->blocked); + if (!access_ok(VERIFY_WRITE, old_ctx, sizeof(*old_ctx)) + || save_user_regs(regs, &old_ctx->uc_mcontext, 0) + || __copy_to_user(&old_ctx->uc_sigmask, &c_set, sizeof(c_set)) + || __put_user((u32)(u64)&old_ctx->uc_mcontext, &old_ctx->uc_regs)) + return -EFAULT; + } + if (new_ctx == NULL) + return 0; + if (!access_ok(VERIFY_READ, new_ctx, sizeof(*new_ctx)) + || __get_user(tmp, (u8 __user *) new_ctx) + || __get_user(tmp, (u8 __user *) (new_ctx + 1) - 1)) + return -EFAULT; + + /* + * If we get a fault copying the context into the kernel's + * image of the user's registers, we can't just return -EFAULT + * because the user's registers will be corrupted. For instance + * the NIP value may have been updated but not some of the + * other registers. Given that we have done the access_ok + * and successfully read the first and last bytes of the region + * above, this should only happen in an out-of-memory situation + * or if another thread unmaps the region containing the context. + * We kill the task with a SIGSEGV in this situation. + */ + if (do_setcontext32(new_ctx, regs, 0)) + do_exit(SIGSEGV); + + return 0; +} + +long sys32_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, + struct pt_regs *regs) +{ + struct rt_sigframe32 __user *rt_sf; + int ret; + + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + rt_sf = (struct rt_sigframe32 __user *) + (regs->gpr[1] + __SIGNAL_FRAMESIZE32 + 16); + if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf))) + goto bad; + if (do_setcontext32(&rt_sf->uc, regs, 1)) + goto bad; + + /* + * It's not clear whether or why it is desirable to save the + * sigaltstack setting on signal delivery and restore it on + * signal return. But other architectures do this and we have + * always done it up until now so it is probably better not to + * change it. -- paulus + * We use the sys32_ version that does the 32/64 bits conversion + * and takes userland pointer directly. What about error checking ? + * nobody does any... + */ + sys32_sigaltstack((u32)(u64)&rt_sf->uc.uc_stack, 0, 0, 0, 0, 0, regs); + + ret = regs->result; + + return ret; + + bad: + force_sig(SIGSEGV, current); + return 0; +} + + +/* + * OK, we're invoking a handler + */ +static int handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs * regs, unsigned long newsp) +{ + struct sigcontext32 __user *sc; + struct sigregs32 __user *frame; + unsigned long origsp = newsp; + + /* Set up Signal Frame */ + newsp -= sizeof(struct sigregs32); + frame = (struct sigregs32 __user *) newsp; + + /* Put a sigcontext on the stack */ + newsp -= sizeof(*sc); + sc = (struct sigcontext32 __user *) newsp; + + /* create a stack frame for the caller of the handler */ + newsp -= __SIGNAL_FRAMESIZE32; + + if (!access_ok(VERIFY_WRITE, (void __user *) newsp, origsp - newsp)) + goto badframe; + +#if _NSIG != 64 +#error "Please adjust handle_signal32()" +#endif + if (__put_user((u32)(u64)ka->sa.sa_handler, &sc->handler) + || __put_user(oldset->sig[0], &sc->oldmask) + || __put_user((oldset->sig[0] >> 32), &sc->_unused[3]) + || __put_user((u32)(u64)frame, &sc->regs) + || __put_user(sig, &sc->signal)) + goto badframe; + + if (vdso32_sigtramp && current->thread.vdso_base) { + if (save_user_regs(regs, &frame->mctx, 0)) + goto badframe; + regs->link = current->thread.vdso_base + vdso32_sigtramp; + } else { + if (save_user_regs(regs, &frame->mctx, __NR_sigreturn)) + goto badframe; + regs->link = (unsigned long) frame->mctx.tramp; + } + + if (put_user(regs->gpr[1], (unsigned long __user *)newsp)) + goto badframe; + regs->gpr[1] = (unsigned long) newsp; + regs->gpr[3] = sig; + regs->gpr[4] = (unsigned long) sc; + regs->nip = (unsigned long) ka->sa.sa_handler; + regs->trap = 0; + regs->result = 0; + + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + + return 1; + +badframe: +#if DEBUG_SIG + printk("badframe in handle_signal, regs=%p frame=%x newsp=%x\n", + regs, frame, *newspp); +#endif + force_sigsegv(sig, current); + return 0; +} + +/* + * Do a signal return; undo the signal stack. + */ +long sys32_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, + struct pt_regs *regs) +{ + struct sigcontext32 __user *sc; + struct sigcontext32 sigctx; + struct mcontext32 __user *sr; + sigset_t set; + int ret; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + sc = (struct sigcontext32 __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE32); + if (copy_from_user(&sigctx, sc, sizeof(sigctx))) + goto badframe; + + /* + * Note that PPC32 puts the upper 32 bits of the sigmask in the + * unused part of the signal stackframe + */ + set.sig[0] = sigctx.oldmask + ((long)(sigctx._unused[3]) << 32); + restore_sigmask(&set); + + sr = (struct mcontext32 __user *)(u64)sigctx.regs; + if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) + || restore_user_regs(regs, sr, 1)) + goto badframe; + + ret = regs->result; + return ret; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + + + +/* + * Start of do_signal32 routine + * + * This routine gets control when a pending signal needs to be processed + * in the 32 bit target thread - + * + * It handles both rt and non-rt signals + */ + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ + +int do_signal32(sigset_t *oldset, struct pt_regs *regs) +{ + siginfo_t info; + unsigned int frame, newsp; + int signr, ret; + struct k_sigaction ka; + + if (!oldset) + oldset = ¤t->blocked; + + newsp = frame = 0; + + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + + if (TRAP(regs) == 0x0C00 /* System Call! */ + && regs->ccr & 0x10000000 /* error signalled */ + && ((ret = regs->gpr[3]) == ERESTARTSYS + || ret == ERESTARTNOHAND || ret == ERESTARTNOINTR + || ret == ERESTART_RESTARTBLOCK)) { + + if (signr > 0 + && (ret == ERESTARTNOHAND || ret == ERESTART_RESTARTBLOCK + || (ret == ERESTARTSYS + && !(ka.sa.sa_flags & SA_RESTART)))) { + /* make the system call return an EINTR error */ + regs->result = -EINTR; + regs->gpr[3] = EINTR; + /* note that the cr0.SO bit is already set */ + } else { + regs->nip -= 4; /* Back up & retry system call */ + regs->result = 0; + regs->trap = 0; + if (ret == ERESTART_RESTARTBLOCK) + regs->gpr[0] = __NR_restart_syscall; + else + regs->gpr[3] = regs->orig_gpr3; + } + } + + if (signr == 0) + return 0; /* no signals delivered */ + + if ((ka.sa.sa_flags & SA_ONSTACK) && current->sas_ss_size + && (!on_sig_stack(regs->gpr[1]))) + newsp = (current->sas_ss_sp + current->sas_ss_size); + else + newsp = regs->gpr[1]; + newsp &= ~0xfUL; + + /* Whee! Actually deliver the signal. */ + if (ka.sa.sa_flags & SA_SIGINFO) + ret = handle_rt_signal32(signr, &ka, &info, oldset, regs, newsp); + else + ret = handle_signal32(signr, &ka, &info, oldset, regs, newsp); + + if (ret && !(ka.sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, + &ka.sa.sa_mask); + sigaddset(¤t->blocked, signr); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + } + + return ret; +} diff --git a/arch/ppc64/kernel/smp-tbsync.c b/arch/ppc64/kernel/smp-tbsync.c new file mode 100644 index 00000000000..7d8ec9996b3 --- /dev/null +++ b/arch/ppc64/kernel/smp-tbsync.c @@ -0,0 +1,179 @@ +/* + * Smp timebase synchronization for ppc. + * + * Copyright (C) 2003 Samuel Rydh (samuel@ibrium.se) + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NUM_ITER 300 + +enum { + kExit=0, kSetAndTest, kTest +}; + +static struct { + volatile long tb; + volatile long mark; + volatile int cmd; + volatile int handshake; + int filler[3]; + + volatile int ack; + int filler2[7]; + + volatile int race_result; +} *tbsync; + +static volatile int running; + +static void __devinit +enter_contest( long mark, long add ) +{ + while( (long)(mftb() - mark) < 0 ) + tbsync->race_result = add; +} + +void __devinit +smp_generic_take_timebase( void ) +{ + int cmd; + long tb; + + local_irq_disable(); + while( !running ) + ; + rmb(); + + for( ;; ) { + tbsync->ack = 1; + while( !tbsync->handshake ) + ; + rmb(); + + cmd = tbsync->cmd; + tb = tbsync->tb; + tbsync->ack = 0; + if( cmd == kExit ) + return; + + if( cmd == kSetAndTest ) { + while( tbsync->handshake ) + ; + asm volatile ("mttbl %0" :: "r" (tb & 0xfffffffful) ); + asm volatile ("mttbu %0" :: "r" (tb >> 32) ); + } else { + while( tbsync->handshake ) + ; + } + enter_contest( tbsync->mark, -1 ); + } + local_irq_enable(); +} + +static int __devinit +start_contest( int cmd, long offset, long num ) +{ + int i, score=0; + long tb, mark; + + tbsync->cmd = cmd; + + local_irq_disable(); + for( i=-3; itb = tb + offset; + tbsync->mark = mark = tb + 400; + + wmb(); + + tbsync->handshake = 1; + while( tbsync->ack ) + ; + + while( (long)(mftb() - tb) <= 0 ) + ; + tbsync->handshake = 0; + enter_contest( mark, 1 ); + + while( !tbsync->ack ) + ; + + if ((tbsync->tb ^ (long)mftb()) & 0x8000000000000000ul) + continue; + if( i++ > 0 ) + score += tbsync->race_result; + } + local_irq_enable(); + return score; +} + +void __devinit +smp_generic_give_timebase( void ) +{ + int i, score, score2, old, min=0, max=5000, offset=1000; + + printk("Synchronizing timebase\n"); + + /* if this fails then this kernel won't work anyway... */ + tbsync = kmalloc( sizeof(*tbsync), GFP_KERNEL ); + memset( tbsync, 0, sizeof(*tbsync) ); + mb(); + running = 1; + + while( !tbsync->ack ) + ; + + printk("Got ack\n"); + + /* binary search */ + for( old=-1 ; old != offset ; offset=(min+max)/2 ) { + score = start_contest( kSetAndTest, offset, NUM_ITER ); + + printk("score %d, offset %d\n", score, offset ); + + if( score > 0 ) + max = offset; + else + min = offset; + old = offset; + } + score = start_contest( kSetAndTest, min, NUM_ITER ); + score2 = start_contest( kSetAndTest, max, NUM_ITER ); + + printk( "Min %d (score %d), Max %d (score %d)\n", min, score, max, score2 ); + score = abs( score ); + score2 = abs( score2 ); + offset = (score < score2) ? min : max; + + /* guard against inaccurate mttb */ + for( i=0; i<10; i++ ) { + start_contest( kSetAndTest, offset, NUM_ITER/10 ); + + if( (score2=start_contest(kTest, offset, NUM_ITER)) < 0 ) + score2 = -score2; + if( score2 <= score || score2 < 20 ) + break; + } + printk("Final offset: %d (%d/%d)\n", offset, score2, NUM_ITER ); + + /* exiting */ + tbsync->cmd = kExit; + wmb(); + tbsync->handshake = 1; + while( tbsync->ack ) + ; + tbsync->handshake = 0; + kfree( tbsync ); + tbsync = NULL; + running = 0; +} diff --git a/arch/ppc64/kernel/smp.c b/arch/ppc64/kernel/smp.c new file mode 100644 index 00000000000..1c92da3e452 --- /dev/null +++ b/arch/ppc64/kernel/smp.c @@ -0,0 +1,622 @@ +/* + * SMP support for ppc. + * + * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great + * deal of code from the sparc and intel versions. + * + * Copyright (C) 1999 Cort Dougan + * + * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpic.h" + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +cpumask_t cpu_possible_map = CPU_MASK_NONE; +cpumask_t cpu_online_map = CPU_MASK_NONE; +cpumask_t cpu_sibling_map[NR_CPUS] = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; + +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL(cpu_possible_map); + +struct smp_ops_t *smp_ops; + +static volatile unsigned int cpu_callin_map[NR_CPUS]; + +extern unsigned char stab_array[]; + +void smp_call_function_interrupt(void); + +int smt_enabled_at_boot = 1; + +#ifdef CONFIG_PPC_MULTIPLATFORM +void smp_mpic_message_pass(int target, int msg) +{ + /* make sure we're sending something that translates to an IPI */ + if ( msg > 0x3 ){ + printk("SMP %d: smp_message_pass: unknown msg %d\n", + smp_processor_id(), msg); + return; + } + switch ( target ) + { + case MSG_ALL: + mpic_send_ipi(msg, 0xffffffff); + break; + case MSG_ALL_BUT_SELF: + mpic_send_ipi(msg, 0xffffffff & ~(1 << smp_processor_id())); + break; + default: + mpic_send_ipi(msg, 1 << target); + break; + } +} + +int __init smp_mpic_probe(void) +{ + int nr_cpus; + + DBG("smp_mpic_probe()...\n"); + + nr_cpus = cpus_weight(cpu_possible_map); + + DBG("nr_cpus: %d\n", nr_cpus); + + if (nr_cpus > 1) + mpic_request_ipis(); + + return nr_cpus; +} + +void __devinit smp_mpic_setup_cpu(int cpu) +{ + mpic_setup_this_cpu(); +} + +void __devinit smp_generic_kick_cpu(int nr) +{ + BUG_ON(nr < 0 || nr >= NR_CPUS); + + /* + * The processor is currently spinning, waiting for the + * cpu_start field to become non-zero After we set cpu_start, + * the processor will continue on to secondary_start + */ + paca[nr].cpu_start = 1; + mb(); +} + +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +static void __init smp_space_timers(unsigned int max_cpus) +{ + int i; + unsigned long offset = tb_ticks_per_jiffy / max_cpus; + unsigned long previous_tb = paca[boot_cpuid].next_jiffy_update_tb; + + for_each_cpu(i) { + if (i != boot_cpuid) { + paca[i].next_jiffy_update_tb = + previous_tb + offset; + previous_tb = paca[i].next_jiffy_update_tb; + } + } +} + +void smp_message_recv(int msg, struct pt_regs *regs) +{ + switch(msg) { + case PPC_MSG_CALL_FUNCTION: + smp_call_function_interrupt(); + break; + case PPC_MSG_RESCHEDULE: + /* XXX Do we have to do this? */ + set_need_resched(); + break; +#if 0 + case PPC_MSG_MIGRATE_TASK: + /* spare */ + break; +#endif +#ifdef CONFIG_DEBUGGER + case PPC_MSG_DEBUGGER_BREAK: + debugger_ipi(regs); + break; +#endif + default: + printk("SMP %d: smp_message_recv(): unknown msg %d\n", + smp_processor_id(), msg); + break; + } +} + +void smp_send_reschedule(int cpu) +{ + smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE); +} + +#ifdef CONFIG_DEBUGGER +void smp_send_debugger_break(int cpu) +{ + smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); +} +#endif + +static void stop_this_cpu(void *dummy) +{ + local_irq_disable(); + while (1) + ; +} + +void smp_send_stop(void) +{ + smp_call_function(stop_this_cpu, NULL, 1, 0); +} + +/* + * Structure and data for smp_call_function(). This is designed to minimise + * static memory requirements. It also looks cleaner. + * Stolen from the i386 version. + */ +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock); + +static struct call_data_struct { + void (*func) (void *info); + void *info; + atomic_t started; + atomic_t finished; + int wait; +} *call_data; + +/* delay of at least 8 seconds on 1GHz cpu */ +#define SMP_CALL_TIMEOUT (1UL << (30 + 3)) + +/* + * This function sends a 'generic call function' IPI to all other CPUs + * in the system. + * + * [SUMMARY] Run a function on all other CPUs. + * The function to run. This must be fast and non-blocking. + * An arbitrary pointer to pass to the function. + * currently unused. + * If true, wait (atomically) until function has completed on other CPUs. + * [RETURNS] 0 on success, else a negative status code. Does not return until + * remote CPUs are nearly ready to execute <> or are or have executed. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int smp_call_function (void (*func) (void *info), void *info, int nonatomic, + int wait) +{ + struct call_data_struct data; + int ret = -1, cpus; + unsigned long timeout; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + spin_lock(&call_lock); + /* Must grab online cpu count with preempt disabled, otherwise + * it can change. */ + cpus = num_online_cpus() - 1; + if (!cpus) { + ret = 0; + goto out; + } + + call_data = &data; + wmb(); + /* Send a message to all other CPUs and wait for them to respond */ + smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_CALL_FUNCTION); + + /* Wait for response */ + timeout = SMP_CALL_TIMEOUT; + while (atomic_read(&data.started) != cpus) { + HMT_low(); + if (--timeout == 0) { + printk("smp_call_function on cpu %d: other cpus not " + "responding (%d)\n", smp_processor_id(), + atomic_read(&data.started)); + debugger(NULL); + goto out; + } + } + + if (wait) { + timeout = SMP_CALL_TIMEOUT; + while (atomic_read(&data.finished) != cpus) { + HMT_low(); + if (--timeout == 0) { + printk("smp_call_function on cpu %d: other " + "cpus not finishing (%d/%d)\n", + smp_processor_id(), + atomic_read(&data.finished), + atomic_read(&data.started)); + debugger(NULL); + goto out; + } + } + } + + ret = 0; + +out: + call_data = NULL; + HMT_medium(); + spin_unlock(&call_lock); + return ret; +} + +EXPORT_SYMBOL(smp_call_function); + +void smp_call_function_interrupt(void) +{ + void (*func) (void *info); + void *info; + int wait; + + /* call_data will be NULL if the sender timed out while + * waiting on us to receive the call. + */ + if (!call_data) + return; + + func = call_data->func; + info = call_data->info; + wait = call_data->wait; + + if (!wait) + smp_mb__before_atomic_inc(); + + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function + */ + atomic_inc(&call_data->started); + /* + * At this point the info structure may be out of scope unless wait==1 + */ + (*func)(info); + if (wait) { + smp_mb__before_atomic_inc(); + atomic_inc(&call_data->finished); + } +} + +extern unsigned long decr_overclock; +extern struct gettimeofday_struct do_gtod; + +struct thread_info *current_set[NR_CPUS]; + +DECLARE_PER_CPU(unsigned int, pvr); + +static void __devinit smp_store_cpu_info(int id) +{ + per_cpu(pvr, id) = mfspr(SPRN_PVR); +} + +static void __init smp_create_idle(unsigned int cpu) +{ + struct task_struct *p; + + /* create a process for the processor */ + p = fork_idle(cpu); + if (IS_ERR(p)) + panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p)); + paca[cpu].__current = p; + current_set[cpu] = p->thread_info; +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + unsigned int cpu; + + DBG("smp_prepare_cpus\n"); + + /* + * setup_cpu may need to be called on the boot cpu. We havent + * spun any cpus up but lets be paranoid. + */ + BUG_ON(boot_cpuid != smp_processor_id()); + + /* Fixup boot cpu */ + smp_store_cpu_info(boot_cpuid); + cpu_callin_map[boot_cpuid] = 1; + +#ifndef CONFIG_PPC_ISERIES + paca[boot_cpuid].next_jiffy_update_tb = tb_last_stamp = get_tb(); + + /* + * Should update do_gtod.stamp_xsec. + * For now we leave it which means the time can be some + * number of msecs off until someone does a settimeofday() + */ + do_gtod.varp->tb_orig_stamp = tb_last_stamp; + systemcfg->tb_orig_stamp = tb_last_stamp; +#endif + + max_cpus = smp_ops->probe(); + + smp_space_timers(max_cpus); + + for_each_cpu(cpu) + if (cpu != boot_cpuid) + smp_create_idle(cpu); +} + +void __devinit smp_prepare_boot_cpu(void) +{ + BUG_ON(smp_processor_id() != boot_cpuid); + + cpu_set(boot_cpuid, cpu_online_map); + + paca[boot_cpuid].__current = current; + current_set[boot_cpuid] = current->thread_info; +} + +#ifdef CONFIG_HOTPLUG_CPU +/* State of each CPU during hotplug phases */ +DEFINE_PER_CPU(int, cpu_state) = { 0 }; + +int generic_cpu_disable(void) +{ + unsigned int cpu = smp_processor_id(); + + if (cpu == boot_cpuid) + return -EBUSY; + + systemcfg->processorCount--; + cpu_clear(cpu, cpu_online_map); + fixup_irqs(cpu_online_map); + return 0; +} + +int generic_cpu_enable(unsigned int cpu) +{ + /* Do the normal bootup if we haven't + * already bootstrapped. */ + if (system_state != SYSTEM_RUNNING) + return -ENOSYS; + + /* get the target out of it's holding state */ + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + wmb(); + + while (!cpu_online(cpu)) + cpu_relax(); + + fixup_irqs(cpu_online_map); + /* counter the irq disable in fixup_irqs */ + local_irq_enable(); + return 0; +} + +void generic_cpu_die(unsigned int cpu) +{ + int i; + + for (i = 0; i < 100; i++) { + rmb(); + if (per_cpu(cpu_state, cpu) == CPU_DEAD) + return; + msleep(100); + } + printk(KERN_ERR "CPU%d didn't die...\n", cpu); +} + +void generic_mach_cpu_die(void) +{ + unsigned int cpu; + + local_irq_disable(); + cpu = smp_processor_id(); + printk(KERN_DEBUG "CPU%d offline\n", cpu); + __get_cpu_var(cpu_state) = CPU_DEAD; + wmb(); + while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) + cpu_relax(); + + flush_tlb_pending(); + cpu_set(cpu, cpu_online_map); + local_irq_enable(); +} +#endif + +static int __devinit cpu_enable(unsigned int cpu) +{ + if (smp_ops->cpu_enable) + return smp_ops->cpu_enable(cpu); + + return -ENOSYS; +} + +int __devinit __cpu_up(unsigned int cpu) +{ + int c; + + if (!cpu_enable(cpu)) + return 0; + + if (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)) + return -EINVAL; + + paca[cpu].default_decr = tb_ticks_per_jiffy / decr_overclock; + + if (!cpu_has_feature(CPU_FTR_SLB)) { + void *tmp; + + /* maximum of 48 CPUs on machines with a segment table */ + if (cpu >= 48) + BUG(); + + tmp = &stab_array[PAGE_SIZE * cpu]; + memset(tmp, 0, PAGE_SIZE); + paca[cpu].stab_addr = (unsigned long)tmp; + paca[cpu].stab_real = virt_to_abs(tmp); + } + + /* Make sure callin-map entry is 0 (can be leftover a CPU + * hotplug + */ + cpu_callin_map[cpu] = 0; + + /* The information for processor bringup must + * be written out to main store before we release + * the processor. + */ + mb(); + + /* wake up cpus */ + DBG("smp: kicking cpu %d\n", cpu); + smp_ops->kick_cpu(cpu); + + /* + * wait to see if the cpu made a callin (is actually up). + * use this value that I found through experimentation. + * -- Cort + */ + if (system_state < SYSTEM_RUNNING) + for (c = 5000; c && !cpu_callin_map[cpu]; c--) + udelay(100); +#ifdef CONFIG_HOTPLUG_CPU + else + /* + * CPUs can take much longer to come up in the + * hotplug case. Wait five seconds. + */ + for (c = 25; c && !cpu_callin_map[cpu]; c--) { + msleep(200); + } +#endif + + if (!cpu_callin_map[cpu]) { + printk("Processor %u is stuck.\n", cpu); + return -ENOENT; + } + + printk("Processor %u found.\n", cpu); + + if (smp_ops->give_timebase) + smp_ops->give_timebase(); + + /* Wait until cpu puts itself in the online map */ + while (!cpu_online(cpu)) + cpu_relax(); + + return 0; +} + + +/* Activate a secondary processor. */ +int __devinit start_secondary(void *unused) +{ + unsigned int cpu = smp_processor_id(); + + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + + smp_store_cpu_info(cpu); + set_dec(paca[cpu].default_decr); + cpu_callin_map[cpu] = 1; + + smp_ops->setup_cpu(cpu); + if (smp_ops->take_timebase) + smp_ops->take_timebase(); + + spin_lock(&call_lock); + cpu_set(cpu, cpu_online_map); + spin_unlock(&call_lock); + + local_irq_enable(); + + cpu_idle(); + return 0; +} + +int setup_profiling_timer(unsigned int multiplier) +{ + return 0; +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ + cpumask_t old_mask; + + /* We want the setup_cpu() here to be called from CPU 0, but our + * init thread may have been "borrowed" by another CPU in the meantime + * se we pin us down to CPU 0 for a short while + */ + old_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid)); + + smp_ops->setup_cpu(boot_cpuid); + + set_cpus_allowed(current, old_mask); +} + +#ifdef CONFIG_HOTPLUG_CPU +int __cpu_disable(void) +{ + if (smp_ops->cpu_disable) + return smp_ops->cpu_disable(); + + return -ENOSYS; +} + +void __cpu_die(unsigned int cpu) +{ + if (smp_ops->cpu_die) + smp_ops->cpu_die(cpu); +} +#endif diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c new file mode 100644 index 00000000000..7cf7a960002 --- /dev/null +++ b/arch/ppc64/kernel/sys_ppc32.c @@ -0,0 +1,1329 @@ +/* + * sys_ppc32.c: Conversion between 32bit and 64bit native syscalls. + * + * Copyright (C) 2001 IBM + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * + * These routines maintain argument size conversion between 32bit and 64bit + * environment. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pci.h" + +/* readdir & getdents */ +#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de))) +#define ROUND_UP(x) (((x)+sizeof(u32)-1) & ~(sizeof(u32)-1)) + +struct old_linux_dirent32 { + u32 d_ino; + u32 d_offset; + unsigned short d_namlen; + char d_name[1]; +}; + +struct readdir_callback32 { + struct old_linux_dirent32 __user * dirent; + int count; +}; + +static int fillonedir(void * __buf, const char * name, int namlen, + off_t offset, ino_t ino, unsigned int d_type) +{ + struct readdir_callback32 * buf = (struct readdir_callback32 *) __buf; + struct old_linux_dirent32 __user * dirent; + + if (buf->count) + return -EINVAL; + buf->count++; + dirent = buf->dirent; + put_user(ino, &dirent->d_ino); + put_user(offset, &dirent->d_offset); + put_user(namlen, &dirent->d_namlen); + copy_to_user(dirent->d_name, name, namlen); + put_user(0, dirent->d_name + namlen); + return 0; +} + +asmlinkage int old32_readdir(unsigned int fd, struct old_linux_dirent32 __user *dirent, unsigned int count) +{ + int error = -EBADF; + struct file * file; + struct readdir_callback32 buf; + + file = fget(fd); + if (!file) + goto out; + + buf.count = 0; + buf.dirent = dirent; + + error = vfs_readdir(file, (filldir_t)fillonedir, &buf); + if (error < 0) + goto out_putf; + error = buf.count; + +out_putf: + fput(file); +out: + return error; +} + +struct linux_dirent32 { + u32 d_ino; + u32 d_off; + unsigned short d_reclen; + char d_name[1]; +}; + +struct getdents_callback32 { + struct linux_dirent32 __user * current_dir; + struct linux_dirent32 __user * previous; + int count; + int error; +}; + +static int filldir(void * __buf, const char * name, int namlen, off_t offset, + ino_t ino, unsigned int d_type) +{ + struct linux_dirent32 __user * dirent; + struct getdents_callback32 * buf = (struct getdents_callback32 *) __buf; + int reclen = ROUND_UP(NAME_OFFSET(dirent) + namlen + 2); + + buf->error = -EINVAL; /* only used if we fail.. */ + if (reclen > buf->count) + return -EINVAL; + dirent = buf->previous; + if (dirent) { + if (__put_user(offset, &dirent->d_off)) + goto efault; + } + dirent = buf->current_dir; + if (__put_user(ino, &dirent->d_ino)) + goto efault; + if (__put_user(reclen, &dirent->d_reclen)) + goto efault; + if (copy_to_user(dirent->d_name, name, namlen)) + goto efault; + if (__put_user(0, dirent->d_name + namlen)) + goto efault; + if (__put_user(d_type, (char __user *) dirent + reclen - 1)) + goto efault; + buf->previous = dirent; + dirent = (void __user *)dirent + reclen; + buf->current_dir = dirent; + buf->count -= reclen; + return 0; +efault: + buf->error = -EFAULT; + return -EFAULT; +} + +asmlinkage long sys32_getdents(unsigned int fd, struct linux_dirent32 __user *dirent, + unsigned int count) +{ + struct file * file; + struct linux_dirent32 __user * lastdirent; + struct getdents_callback32 buf; + int error; + + error = -EFAULT; + if (!access_ok(VERIFY_WRITE, dirent, count)) + goto out; + + error = -EBADF; + file = fget(fd); + if (!file) + goto out; + + buf.current_dir = dirent; + buf.previous = NULL; + buf.count = count; + buf.error = 0; + + error = vfs_readdir(file, (filldir_t)filldir, &buf); + if (error < 0) + goto out_putf; + error = buf.error; + lastdirent = buf.previous; + if (lastdirent) { + if (put_user(file->f_pos, &lastdirent->d_off)) + error = -EFAULT; + else + error = count - buf.count; + } + +out_putf: + fput(file); +out: + return error; +} + +asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp, + compat_ulong_t __user *outp, compat_ulong_t __user *exp, + compat_uptr_t tvp_x) +{ + /* sign extend n */ + return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x)); +} + +int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf) +{ + long err; + + if (stat->size > MAX_NON_LFS || !new_valid_dev(stat->dev) || + !new_valid_dev(stat->rdev)) + return -EOVERFLOW; + + err = access_ok(VERIFY_WRITE, statbuf, sizeof(*statbuf)) ? 0 : -EFAULT; + err |= __put_user(new_encode_dev(stat->dev), &statbuf->st_dev); + err |= __put_user(stat->ino, &statbuf->st_ino); + err |= __put_user(stat->mode, &statbuf->st_mode); + err |= __put_user(stat->nlink, &statbuf->st_nlink); + err |= __put_user(stat->uid, &statbuf->st_uid); + err |= __put_user(stat->gid, &statbuf->st_gid); + err |= __put_user(new_encode_dev(stat->rdev), &statbuf->st_rdev); + err |= __put_user(stat->size, &statbuf->st_size); + err |= __put_user(stat->atime.tv_sec, &statbuf->st_atime); + err |= __put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec); + err |= __put_user(stat->mtime.tv_sec, &statbuf->st_mtime); + err |= __put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec); + err |= __put_user(stat->ctime.tv_sec, &statbuf->st_ctime); + err |= __put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec); + err |= __put_user(stat->blksize, &statbuf->st_blksize); + err |= __put_user(stat->blocks, &statbuf->st_blocks); + err |= __put_user(0, &statbuf->__unused4[0]); + err |= __put_user(0, &statbuf->__unused4[1]); + + return err; +} + +/* Note: it is necessary to treat option as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sysfs(u32 option, u32 arg1, u32 arg2) +{ + return sys_sysfs((int)option, arg1, arg2); +} + +/* Handle adjtimex compatibility. */ +struct timex32 { + u32 modes; + s32 offset, freq, maxerror, esterror; + s32 status, constant, precision, tolerance; + struct compat_timeval time; + s32 tick; + s32 ppsfreq, jitter, shift, stabil; + s32 jitcnt, calcnt, errcnt, stbcnt; + s32 :32; s32 :32; s32 :32; s32 :32; + s32 :32; s32 :32; s32 :32; s32 :32; + s32 :32; s32 :32; s32 :32; s32 :32; +}; + +extern int do_adjtimex(struct timex *); +extern void ppc_adjtimex(void); + +asmlinkage long sys32_adjtimex(struct timex32 __user *utp) +{ + struct timex txc; + int ret; + + memset(&txc, 0, sizeof(struct timex)); + + if(get_user(txc.modes, &utp->modes) || + __get_user(txc.offset, &utp->offset) || + __get_user(txc.freq, &utp->freq) || + __get_user(txc.maxerror, &utp->maxerror) || + __get_user(txc.esterror, &utp->esterror) || + __get_user(txc.status, &utp->status) || + __get_user(txc.constant, &utp->constant) || + __get_user(txc.precision, &utp->precision) || + __get_user(txc.tolerance, &utp->tolerance) || + __get_user(txc.time.tv_sec, &utp->time.tv_sec) || + __get_user(txc.time.tv_usec, &utp->time.tv_usec) || + __get_user(txc.tick, &utp->tick) || + __get_user(txc.ppsfreq, &utp->ppsfreq) || + __get_user(txc.jitter, &utp->jitter) || + __get_user(txc.shift, &utp->shift) || + __get_user(txc.stabil, &utp->stabil) || + __get_user(txc.jitcnt, &utp->jitcnt) || + __get_user(txc.calcnt, &utp->calcnt) || + __get_user(txc.errcnt, &utp->errcnt) || + __get_user(txc.stbcnt, &utp->stbcnt)) + return -EFAULT; + + ret = do_adjtimex(&txc); + + /* adjust the conversion of TB to time of day to track adjtimex */ + ppc_adjtimex(); + + if(put_user(txc.modes, &utp->modes) || + __put_user(txc.offset, &utp->offset) || + __put_user(txc.freq, &utp->freq) || + __put_user(txc.maxerror, &utp->maxerror) || + __put_user(txc.esterror, &utp->esterror) || + __put_user(txc.status, &utp->status) || + __put_user(txc.constant, &utp->constant) || + __put_user(txc.precision, &utp->precision) || + __put_user(txc.tolerance, &utp->tolerance) || + __put_user(txc.time.tv_sec, &utp->time.tv_sec) || + __put_user(txc.time.tv_usec, &utp->time.tv_usec) || + __put_user(txc.tick, &utp->tick) || + __put_user(txc.ppsfreq, &utp->ppsfreq) || + __put_user(txc.jitter, &utp->jitter) || + __put_user(txc.shift, &utp->shift) || + __put_user(txc.stabil, &utp->stabil) || + __put_user(txc.jitcnt, &utp->jitcnt) || + __put_user(txc.calcnt, &utp->calcnt) || + __put_user(txc.errcnt, &utp->errcnt) || + __put_user(txc.stbcnt, &utp->stbcnt)) + ret = -EFAULT; + + return ret; +} + + +/* These are here just in case some old sparc32 binary calls it. */ +asmlinkage long sys32_pause(void) +{ + current->state = TASK_INTERRUPTIBLE; + schedule(); + + return -ERESTARTNOHAND; +} + + + +static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) +{ + long usec; + + if (!access_ok(VERIFY_READ, i, sizeof(*i))) + return -EFAULT; + if (__get_user(o->tv_sec, &i->tv_sec)) + return -EFAULT; + if (__get_user(usec, &i->tv_usec)) + return -EFAULT; + o->tv_nsec = usec * 1000; + return 0; +} + +static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) +{ + return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || + (__put_user(i->tv_sec, &o->tv_sec) | + __put_user(i->tv_usec, &o->tv_usec))); +} + +struct sysinfo32 { + s32 uptime; + u32 loads[3]; + u32 totalram; + u32 freeram; + u32 sharedram; + u32 bufferram; + u32 totalswap; + u32 freeswap; + unsigned short procs; + unsigned short pad; + u32 totalhigh; + u32 freehigh; + u32 mem_unit; + char _f[20-2*sizeof(int)-sizeof(int)]; +}; + +asmlinkage long sys32_sysinfo(struct sysinfo32 __user *info) +{ + struct sysinfo s; + int ret, err; + int bitcount=0; + mm_segment_t old_fs = get_fs (); + + /* The __user cast is valid due to set_fs() */ + set_fs (KERNEL_DS); + ret = sys_sysinfo((struct sysinfo __user *)&s); + set_fs (old_fs); + + /* Check to see if any memory value is too large for 32-bit and + * scale down if needed. + */ + if ((s.totalram >> 32) || (s.totalswap >> 32)) { + while (s.mem_unit < PAGE_SIZE) { + s.mem_unit <<= 1; + bitcount++; + } + s.totalram >>=bitcount; + s.freeram >>= bitcount; + s.sharedram >>= bitcount; + s.bufferram >>= bitcount; + s.totalswap >>= bitcount; + s.freeswap >>= bitcount; + s.totalhigh >>= bitcount; + s.freehigh >>= bitcount; + } + + err = put_user (s.uptime, &info->uptime); + err |= __put_user (s.loads[0], &info->loads[0]); + err |= __put_user (s.loads[1], &info->loads[1]); + err |= __put_user (s.loads[2], &info->loads[2]); + err |= __put_user (s.totalram, &info->totalram); + err |= __put_user (s.freeram, &info->freeram); + err |= __put_user (s.sharedram, &info->sharedram); + err |= __put_user (s.bufferram, &info->bufferram); + err |= __put_user (s.totalswap, &info->totalswap); + err |= __put_user (s.freeswap, &info->freeswap); + err |= __put_user (s.procs, &info->procs); + err |= __put_user (s.totalhigh, &info->totalhigh); + err |= __put_user (s.freehigh, &info->freehigh); + err |= __put_user (s.mem_unit, &info->mem_unit); + if (err) + return -EFAULT; + + return ret; +} + + + + +/* Translations due to time_t size differences. Which affects all + sorts of things, like timeval and itimerval. */ +extern struct timezone sys_tz; + +asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) +{ + if (tv) { + struct timeval ktv; + do_gettimeofday(&ktv); + if (put_tv32(tv, &ktv)) + return -EFAULT; + } + if (tz) { + if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) + return -EFAULT; + } + + return 0; +} + + + +asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) +{ + struct timespec kts; + struct timezone ktz; + + if (tv) { + if (get_ts32(&kts, tv)) + return -EFAULT; + } + if (tz) { + if (copy_from_user(&ktz, tz, sizeof(ktz))) + return -EFAULT; + } + + return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); +} + +#ifdef CONFIG_SYSVIPC +long sys32_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr, + u32 fifth) +{ + int version; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + + case SEMTIMEDOP: + if (fifth) + /* sign extend semid */ + return compat_sys_semtimedop((int)first, + compat_ptr(ptr), second, + compat_ptr(fifth)); + /* else fall through for normal semop() */ + case SEMOP: + /* struct sembuf is the same on 32 and 64bit :)) */ + /* sign extend semid */ + return sys_semtimedop((int)first, compat_ptr(ptr), second, + NULL); + case SEMGET: + /* sign extend key, nsems */ + return sys_semget((int)first, (int)second, third); + case SEMCTL: + /* sign extend semid, semnum */ + return compat_sys_semctl((int)first, (int)second, third, + compat_ptr(ptr)); + + case MSGSND: + /* sign extend msqid */ + return compat_sys_msgsnd((int)first, (int)second, third, + compat_ptr(ptr)); + case MSGRCV: + /* sign extend msqid, msgtyp */ + return compat_sys_msgrcv((int)first, second, (int)fifth, + third, version, compat_ptr(ptr)); + case MSGGET: + /* sign extend key */ + return sys_msgget((int)first, second); + case MSGCTL: + /* sign extend msqid */ + return compat_sys_msgctl((int)first, second, compat_ptr(ptr)); + + case SHMAT: + /* sign extend shmid */ + return compat_sys_shmat((int)first, second, third, version, + compat_ptr(ptr)); + case SHMDT: + return sys_shmdt(compat_ptr(ptr)); + case SHMGET: + /* sign extend key_t */ + return sys_shmget((int)first, second, third); + case SHMCTL: + /* sign extend shmid */ + return compat_sys_shmctl((int)first, second, compat_ptr(ptr)); + + default: + return -ENOSYS; + } + + return -ENOSYS; +} +#endif + +/* Note: it is necessary to treat out_fd and in_fd as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sendfile(u32 out_fd, u32 in_fd, compat_off_t __user * offset, u32 count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + off_t of; + off_t __user *up; + + if (offset && get_user(of, offset)) + return -EFAULT; + + /* The __user pointer cast is valid because of the set_fs() */ + set_fs(KERNEL_DS); + up = offset ? (off_t __user *) &of : NULL; + ret = sys_sendfile((int)out_fd, (int)in_fd, up, count); + set_fs(old_fs); + + if (offset && put_user(of, offset)) + return -EFAULT; + + return ret; +} + +asmlinkage int sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, s32 count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + loff_t lof; + loff_t __user *up; + + if (offset && get_user(lof, offset)) + return -EFAULT; + + /* The __user pointer cast is valid because of the set_fs() */ + set_fs(KERNEL_DS); + up = offset ? (loff_t __user *) &lof : NULL; + ret = sys_sendfile64(out_fd, in_fd, up, count); + set_fs(old_fs); + + if (offset && put_user(lof, offset)) + return -EFAULT; + + return ret; +} + +long sys32_execve(unsigned long a0, unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, unsigned long a5, + struct pt_regs *regs) +{ + int error; + char * filename; + + filename = getname((char __user *) a0); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + + error = compat_do_execve(filename, compat_ptr(a1), compat_ptr(a2), regs); + + if (error == 0) { + task_lock(current); + current->ptrace &= ~PT_DTRACE; + task_unlock(current); + } + putname(filename); + +out: + return error; +} + +/* Set up a thread for executing a new program. */ +void start_thread32(struct pt_regs* regs, unsigned long nip, unsigned long sp) +{ + set_fs(USER_DS); + + /* + * If we exec out of a kernel thread then thread.regs will not be + * set. Do it now. + */ + if (!current->thread.regs) { + unsigned long childregs = (unsigned long)current->thread_info + + THREAD_SIZE; + childregs -= sizeof(struct pt_regs); + current->thread.regs = (struct pt_regs *)childregs; + } + + /* + * ELF_PLAT_INIT already clears all registers but it also sets r2. + * So just clear r2 here. + */ + regs->gpr[2] = 0; + + regs->nip = nip; + regs->gpr[1] = sp; + regs->msr = MSR_USER32; +#ifndef CONFIG_SMP + if (last_task_used_math == current) + last_task_used_math = 0; +#endif /* CONFIG_SMP */ + current->thread.fpscr = 0; + memset(current->thread.fpr, 0, sizeof(current->thread.fpr)); +#ifdef CONFIG_ALTIVEC +#ifndef CONFIG_SMP + if (last_task_used_altivec == current) + last_task_used_altivec = 0; +#endif /* CONFIG_SMP */ + memset(current->thread.vr, 0, sizeof(current->thread.vr)); + current->thread.vscr.u[0] = 0; + current->thread.vscr.u[1] = 0; + current->thread.vscr.u[2] = 0; + current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */ + current->thread.vrsave = 0; + current->thread.used_vr = 0; +#endif /* CONFIG_ALTIVEC */ +} + +/* Note: it is necessary to treat option as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_prctl(u32 option, u32 arg2, u32 arg3, u32 arg4, u32 arg5) +{ + return sys_prctl((int)option, + (unsigned long) arg2, + (unsigned long) arg3, + (unsigned long) arg4, + (unsigned long) arg5); +} + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sched_rr_get_interval(u32 pid, struct compat_timespec __user *interval) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs (); + + /* The __user pointer cast is valid because of the set_fs() */ + set_fs (KERNEL_DS); + ret = sys_sched_rr_get_interval((int)pid, (struct timespec __user *) &t); + set_fs (old_fs); + if (put_compat_timespec(&t, interval)) + return -EFAULT; + return ret; +} + +asmlinkage int sys32_pciconfig_read(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) +{ + return sys_pciconfig_read((unsigned long) bus, + (unsigned long) dfn, + (unsigned long) off, + (unsigned long) len, + compat_ptr(ubuf)); +} + +asmlinkage int sys32_pciconfig_write(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) +{ + return sys_pciconfig_write((unsigned long) bus, + (unsigned long) dfn, + (unsigned long) off, + (unsigned long) len, + compat_ptr(ubuf)); +} + +#define IOBASE_BRIDGE_NUMBER 0 +#define IOBASE_MEMORY 1 +#define IOBASE_IO 2 +#define IOBASE_ISA_IO 3 +#define IOBASE_ISA_MEM 4 + +asmlinkage int sys32_pciconfig_iobase(u32 which, u32 in_bus, u32 in_devfn) +{ + struct pci_controller* hose; + struct list_head *ln; + struct pci_bus *bus = NULL; + struct device_node *hose_node; + + /* Argh ! Please forgive me for that hack, but that's the + * simplest way to get existing XFree to not lockup on some + * G5 machines... So when something asks for bus 0 io base + * (bus 0 is HT root), we return the AGP one instead. + */ +#ifdef CONFIG_PPC_PMAC + if (systemcfg->platform == PLATFORM_POWERMAC && + machine_is_compatible("MacRISC4")) + if (in_bus == 0) + in_bus = 0xf0; +#endif /* CONFIG_PPC_PMAC */ + + /* That syscall isn't quite compatible with PCI domains, but it's + * used on pre-domains setup. We return the first match + */ + + for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) { + bus = pci_bus_b(ln); + if (in_bus >= bus->number && in_bus < (bus->number + bus->subordinate)) + break; + bus = NULL; + } + if (bus == NULL || bus->sysdata == NULL) + return -ENODEV; + + hose_node = (struct device_node *)bus->sysdata; + hose = hose_node->phb; + + switch (which) { + case IOBASE_BRIDGE_NUMBER: + return (long)hose->first_busno; + case IOBASE_MEMORY: + return (long)hose->pci_mem_offset; + case IOBASE_IO: + return (long)hose->io_base_phys; + case IOBASE_ISA_IO: + return (long)isa_io_base; + case IOBASE_ISA_MEM: + return -EINVAL; + } + + return -EOPNOTSUPP; +} + + +asmlinkage int ppc64_newuname(struct new_utsname __user * name) +{ + int errno = sys_newuname(name); + + if (current->personality == PER_LINUX32 && !errno) { + if(copy_to_user(name->machine, "ppc\0\0", 8)) { + errno = -EFAULT; + } + } + return errno; +} + +asmlinkage int ppc64_personality(unsigned long personality) +{ + int ret; + if (current->personality == PER_LINUX32 && personality == PER_LINUX) + personality = PER_LINUX32; + ret = sys_personality(personality); + if (ret == PER_LINUX32) + ret = PER_LINUX; + return ret; +} + + + +/* Note: it is necessary to treat mode as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_access(const char __user * filename, u32 mode) +{ + return sys_access(filename, (int)mode); +} + + +/* Note: it is necessary to treat mode as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_creat(const char __user * pathname, u32 mode) +{ + return sys_creat(pathname, (int)mode); +} + + +/* Note: it is necessary to treat pid and options as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_waitpid(u32 pid, unsigned int __user * stat_addr, u32 options) +{ + return sys_waitpid((int)pid, stat_addr, (int)options); +} + + +/* Note: it is necessary to treat gidsetsize as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_getgroups(u32 gidsetsize, gid_t __user *grouplist) +{ + return sys_getgroups((int)gidsetsize, grouplist); +} + + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_getpgid(u32 pid) +{ + return sys_getpgid((int)pid); +} + + +/* Note: it is necessary to treat which and who as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_getpriority(u32 which, u32 who) +{ + return sys_getpriority((int)which, (int)who); +} + + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_getsid(u32 pid) +{ + return sys_getsid((int)pid); +} + + +/* Note: it is necessary to treat pid and sig as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_kill(u32 pid, u32 sig) +{ + return sys_kill((int)pid, (int)sig); +} + + +/* Note: it is necessary to treat mode as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_mkdir(const char __user * pathname, u32 mode) +{ + return sys_mkdir(pathname, (int)mode); +} + +long sys32_nice(u32 increment) +{ + /* sign extend increment */ + return sys_nice((int)increment); +} + +off_t ppc32_lseek(unsigned int fd, u32 offset, unsigned int origin) +{ + /* sign extend n */ + return sys_lseek(fd, (int)offset, origin); +} + +/* + * This is just a version for 32-bit applications which does + * not force O_LARGEFILE on. + */ +asmlinkage long sys32_open(const char __user * filename, int flags, int mode) +{ + char * tmp; + int fd, error; + + tmp = getname(filename); + fd = PTR_ERR(tmp); + if (!IS_ERR(tmp)) { + fd = get_unused_fd(); + if (fd >= 0) { + struct file * f = filp_open(tmp, flags, mode); + error = PTR_ERR(f); + if (IS_ERR(f)) + goto out_error; + fd_install(fd, f); + } +out: + putname(tmp); + } + return fd; + +out_error: + put_unused_fd(fd); + fd = error; + goto out; +} + +/* Note: it is necessary to treat bufsiz as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_readlink(const char __user * path, char __user * buf, u32 bufsiz) +{ + return sys_readlink(path, buf, (int)bufsiz); +} + +/* Note: it is necessary to treat option as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sched_get_priority_max(u32 policy) +{ + return sys_sched_get_priority_max((int)policy); +} + + +/* Note: it is necessary to treat policy as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sched_get_priority_min(u32 policy) +{ + return sys_sched_get_priority_min((int)policy); +} + + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sched_getparam(u32 pid, struct sched_param __user *param) +{ + return sys_sched_getparam((int)pid, param); +} + + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sched_getscheduler(u32 pid) +{ + return sys_sched_getscheduler((int)pid); +} + + +/* Note: it is necessary to treat pid as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sched_setparam(u32 pid, struct sched_param __user *param) +{ + return sys_sched_setparam((int)pid, param); +} + + +/* Note: it is necessary to treat pid and policy as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_sched_setscheduler(u32 pid, u32 policy, struct sched_param __user *param) +{ + return sys_sched_setscheduler((int)pid, (int)policy, param); +} + + +/* Note: it is necessary to treat len as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_setdomainname(char __user *name, u32 len) +{ + return sys_setdomainname(name, (int)len); +} + + +/* Note: it is necessary to treat gidsetsize as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_setgroups(u32 gidsetsize, gid_t __user *grouplist) +{ + return sys_setgroups((int)gidsetsize, grouplist); +} + + +asmlinkage long sys32_sethostname(char __user *name, u32 len) +{ + /* sign extend len */ + return sys_sethostname(name, (int)len); +} + + +/* Note: it is necessary to treat pid and pgid as unsigned ints, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_setpgid(u32 pid, u32 pgid) +{ + return sys_setpgid((int)pid, (int)pgid); +} + + +long sys32_setpriority(u32 which, u32 who, u32 niceval) +{ + /* sign extend which, who and niceval */ + return sys_setpriority((int)which, (int)who, (int)niceval); +} + +/* Note: it is necessary to treat newmask as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_ssetmask(u32 newmask) +{ + return sys_ssetmask((int) newmask); +} + +asmlinkage long sys32_syslog(u32 type, char __user * buf, u32 len) +{ + /* sign extend len */ + return sys_syslog(type, buf, (int)len); +} + + +/* Note: it is necessary to treat mask as an unsigned int, + * with the corresponding cast to a signed int to insure that the + * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) + * and the register representation of a signed int (msr in 64-bit mode) is performed. + */ +asmlinkage long sys32_umask(u32 mask) +{ + return sys_umask((int)mask); +} + +#ifdef CONFIG_SYSCTL +struct __sysctl_args32 { + u32 name; + int nlen; + u32 oldval; + u32 oldlenp; + u32 newval; + u32 newlen; + u32 __unused[4]; +}; + +asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args) +{ + struct __sysctl_args32 tmp; + int error; + size_t oldlen; + size_t __user *oldlenp = NULL; + unsigned long addr = (((unsigned long)&args->__unused[0]) + 7) & ~7; + + if (copy_from_user(&tmp, args, sizeof(tmp))) + return -EFAULT; + + if (tmp.oldval && tmp.oldlenp) { + /* Duh, this is ugly and might not work if sysctl_args + is in read-only memory, but do_sysctl does indirectly + a lot of uaccess in both directions and we'd have to + basically copy the whole sysctl.c here, and + glibc's __sysctl uses rw memory for the structure + anyway. */ + oldlenp = (size_t __user *)addr; + if (get_user(oldlen, (compat_size_t __user *)compat_ptr(tmp.oldlenp)) || + put_user(oldlen, oldlenp)) + return -EFAULT; + } + + lock_kernel(); + error = do_sysctl(compat_ptr(tmp.name), tmp.nlen, + compat_ptr(tmp.oldval), oldlenp, + compat_ptr(tmp.newval), tmp.newlen); + unlock_kernel(); + if (oldlenp) { + if (!error) { + if (get_user(oldlen, oldlenp) || + put_user(oldlen, (compat_size_t __user *)compat_ptr(tmp.oldlenp))) + error = -EFAULT; + } + copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused)); + } + return error; +} +#endif + +asmlinkage int sys32_olduname(struct oldold_utsname __user * name) +{ + int error; + + if (!name) + return -EFAULT; + if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) + return -EFAULT; + + down_read(&uts_sem); + error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + error -= __put_user(0,name->sysname+__OLD_UTS_LEN); + error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error -= __put_user(0,name->nodename+__OLD_UTS_LEN); + error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error -= __put_user(0,name->release+__OLD_UTS_LEN); + error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error -= __put_user(0,name->version+__OLD_UTS_LEN); + error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error = __put_user(0,name->machine+__OLD_UTS_LEN); + up_read(&uts_sem); + + error = error ? -EFAULT : 0; + + return error; +} + +unsigned long sys32_mmap2(unsigned long addr, size_t len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + /* This should remain 12 even if PAGE_SIZE changes */ + return sys_mmap(addr, len, prot, flags, fd, pgoff << 12); +} + +int get_compat_timeval(struct timeval *tv, struct compat_timeval __user *ctv) +{ + return (!access_ok(VERIFY_READ, ctv, sizeof(*ctv)) || + __get_user(tv->tv_sec, &ctv->tv_sec) || + __get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0; +} + +asmlinkage long sys32_utimes(char __user *filename, struct compat_timeval __user *tvs) +{ + struct timeval ktvs[2], *ptr; + + ptr = NULL; + if (tvs) { + if (get_compat_timeval(&ktvs[0], &tvs[0]) || + get_compat_timeval(&ktvs[1], &tvs[1])) + return -EFAULT; + ptr = ktvs; + } + + return do_utimes(filename, ptr); +} + +long sys32_tgkill(u32 tgid, u32 pid, int sig) +{ + /* sign extend tgid, pid */ + return sys_tgkill((int)tgid, (int)pid, sig); +} + +/* + * long long munging: + * The 32 bit ABI passes long longs in an odd even register pair. + */ + +compat_ssize_t sys32_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, + u32 reg6, u32 poshi, u32 poslo) +{ + return sys_pread64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); +} + +compat_ssize_t sys32_pwrite64(unsigned int fd, char __user *ubuf, compat_size_t count, + u32 reg6, u32 poshi, u32 poslo) +{ + return sys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); +} + +compat_ssize_t sys32_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count) +{ + return sys_readahead(fd, ((loff_t)offhi << 32) | offlo, count); +} + +asmlinkage int sys32_truncate64(const char __user * path, u32 reg4, + unsigned long high, unsigned long low) +{ + return sys_truncate(path, (high << 32) | low); +} + +asmlinkage int sys32_ftruncate64(unsigned int fd, u32 reg4, unsigned long high, + unsigned long low) +{ + return sys_ftruncate(fd, (high << 32) | low); +} + +long ppc32_lookup_dcookie(u32 cookie_high, u32 cookie_low, char __user *buf, + size_t len) +{ + return sys_lookup_dcookie((u64)cookie_high << 32 | cookie_low, + buf, len); +} + +long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low, + size_t len, int advice) +{ + return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low, len, + advice); +} + +long ppc32_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, + u32 len_high, u32 len_low) +{ + return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low, + (u64)len_high << 32 | len_low, advice); +} + +extern asmlinkage long sys_timer_create(clockid_t, sigevent_t __user *, timer_t __user *); + +long ppc32_timer_create(clockid_t clock, + struct compat_sigevent __user *ev32, + timer_t __user *timer_id) +{ + sigevent_t event; + timer_t t; + long err; + mm_segment_t savefs; + + if (ev32 == NULL) + return sys_timer_create(clock, NULL, timer_id); + + if (get_compat_sigevent(&event, ev32)) + return -EFAULT; + + if (!access_ok(VERIFY_WRITE, timer_id, sizeof(timer_t))) + return -EFAULT; + + savefs = get_fs(); + set_fs(KERNEL_DS); + /* The __user pointer casts are valid due to the set_fs() */ + err = sys_timer_create(clock, + (sigevent_t __user *) &event, + (timer_t __user *) &t); + set_fs(savefs); + + if (err == 0) + err = __put_user(t, timer_id); + + return err; +} + +asmlinkage long sys32_add_key(const char __user *_type, + const char __user *_description, + const void __user *_payload, + u32 plen, + u32 ringid) +{ + return sys_add_key(_type, _description, _payload, plen, ringid); +} + +asmlinkage long sys32_request_key(const char __user *_type, + const char __user *_description, + const char __user *_callout_info, + u32 destringid) +{ + return sys_request_key(_type, _description, _callout_info, destringid); +} + diff --git a/arch/ppc64/kernel/syscalls.c b/arch/ppc64/kernel/syscalls.c new file mode 100644 index 00000000000..f2865ff8d2f --- /dev/null +++ b/arch/ppc64/kernel/syscalls.c @@ -0,0 +1,258 @@ +/* + * linux/arch/ppc64/kernel/sys_ppc.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/i386/kernel/sys_i386.c" + * Adapted from the i386 version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras (paulus@cs.anu.edu.au). + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/PPC + * platform. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern unsigned long wall_jiffies; + +void +check_bugs(void) +{ +} + +/* + * sys_ipc() is the de-multiplexer for the SysV IPC calls.. + * + * This is really horribly ugly. + */ +asmlinkage int +sys_ipc (uint call, int first, unsigned long second, long third, + void __user *ptr, long fifth) +{ + int version, ret; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + ret = -ENOSYS; + switch (call) { + case SEMOP: + ret = sys_semtimedop(first, (struct sembuf __user *)ptr, + (unsigned)second, NULL); + break; + case SEMTIMEDOP: + ret = sys_semtimedop(first, (struct sembuf __user *)ptr, + (unsigned)second, + (const struct timespec __user *) fifth); + break; + case SEMGET: + ret = sys_semget (first, (int)second, third); + break; + case SEMCTL: { + union semun fourth; + + ret = -EINVAL; + if (!ptr) + break; + if ((ret = get_user(fourth.__pad, (void __user * __user *)ptr))) + break; + ret = sys_semctl(first, (int)second, third, fourth); + break; + } + case MSGSND: + ret = sys_msgsnd(first, (struct msgbuf __user *)ptr, + (size_t)second, third); + break; + case MSGRCV: + switch (version) { + case 0: { + struct ipc_kludge tmp; + + ret = -EINVAL; + if (!ptr) + break; + if ((ret = copy_from_user(&tmp, + (struct ipc_kludge __user *) ptr, + sizeof (tmp)) ? -EFAULT : 0)) + break; + ret = sys_msgrcv(first, tmp.msgp, (size_t) second, + tmp.msgtyp, third); + break; + } + default: + ret = sys_msgrcv (first, (struct msgbuf __user *) ptr, + (size_t)second, fifth, third); + break; + } + break; + case MSGGET: + ret = sys_msgget ((key_t)first, (int)second); + break; + case MSGCTL: + ret = sys_msgctl(first, (int)second, + (struct msqid_ds __user *)ptr); + break; + case SHMAT: + switch (version) { + default: { + ulong raddr; + ret = do_shmat(first, (char __user *) ptr, + (int)second, &raddr); + if (ret) + break; + ret = put_user (raddr, (ulong __user *) third); + break; + } + case 1: /* iBCS2 emulator entry point */ + ret = -EINVAL; + if (!segment_eq(get_fs(), get_ds())) + break; + ret = do_shmat(first, (char __user *)ptr, + (int)second, (ulong *)third); + break; + } + break; + case SHMDT: + ret = sys_shmdt ((char __user *)ptr); + break; + case SHMGET: + ret = sys_shmget (first, (size_t)second, third); + break; + case SHMCTL: + ret = sys_shmctl(first, (int)second, + (struct shmid_ds __user *)ptr); + break; + } + + return ret; +} + +/* + * sys_pipe() is the normal C calling standard for creating + * a pipe. It's not the way unix traditionally does this, though. + */ +asmlinkage int sys_pipe(int __user *fildes) +{ + int fd[2]; + int error; + + error = do_pipe(fd); + if (!error) { + if (copy_to_user(fildes, fd, 2*sizeof(int))) + error = -EFAULT; + } + + return error; +} + +unsigned long sys_mmap(unsigned long addr, size_t len, + unsigned long prot, unsigned long flags, + unsigned long fd, off_t offset) +{ + struct file * file = NULL; + unsigned long ret = -EBADF; + + if (!(flags & MAP_ANONYMOUS)) { + if (!(file = fget(fd))) + goto out; + } + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + down_write(¤t->mm->mmap_sem); + ret = do_mmap(file, addr, len, prot, flags, offset); + up_write(¤t->mm->mmap_sem); + if (file) + fput(file); + +out: + return ret; +} + +static int __init set_fakeppc(char *str) +{ + if (*str) + return 0; + init_task.personality = PER_LINUX32; + return 1; +} +__setup("fakeppc", set_fakeppc); + +asmlinkage int sys_uname(struct old_utsname __user * name) +{ + int err = -EFAULT; + + down_read(&uts_sem); + if (name && !copy_to_user(name, &system_utsname, sizeof (*name))) + err = 0; + up_read(&uts_sem); + + return err; +} + +asmlinkage time_t sys64_time(time_t __user * tloc) +{ + time_t secs; + time_t usecs; + + long tb_delta = tb_ticks_since(tb_last_stamp); + tb_delta += (jiffies - wall_jiffies) * tb_ticks_per_jiffy; + + secs = xtime.tv_sec; + usecs = (xtime.tv_nsec/1000) + tb_delta / tb_ticks_per_usec; + while (usecs >= USEC_PER_SEC) { + ++secs; + usecs -= USEC_PER_SEC; + } + + if (tloc) { + if (put_user(secs,tloc)) + secs = -EFAULT; + } + + return secs; +} + +void do_show_syscall(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7, unsigned long r8, + struct pt_regs *regs) +{ + printk("syscall %ld(%lx, %lx, %lx, %lx, %lx, %lx) regs=%p current=%p" + " cpu=%d\n", regs->gpr[0], r3, r4, r5, r6, r7, r8, regs, + current, smp_processor_id()); +} + +void do_show_syscall_exit(unsigned long r3) +{ + printk(" -> %lx, current=%p cpu=%d\n", r3, current, smp_processor_id()); +} diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c new file mode 100644 index 00000000000..0925694c3ce --- /dev/null +++ b/arch/ppc64/kernel/sysfs.c @@ -0,0 +1,431 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_PER_CPU(struct cpu, cpu_devices); + +/* SMT stuff */ + +#ifdef CONFIG_PPC_MULTIPLATFORM +/* default to snooze disabled */ +DEFINE_PER_CPU(unsigned long, smt_snooze_delay); + +static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf, + size_t count) +{ + struct cpu *cpu = container_of(dev, struct cpu, sysdev); + ssize_t ret; + unsigned long snooze; + + ret = sscanf(buf, "%lu", &snooze); + if (ret != 1) + return -EINVAL; + + per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze; + + return count; +} + +static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, sysdev); + + return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id)); +} + +static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, + store_smt_snooze_delay); + +/* Only parse OF options if the matching cmdline option was not specified */ +static int smt_snooze_cmdline; + +static int __init smt_setup(void) +{ + struct device_node *options; + unsigned int *val; + unsigned int cpu; + + if (!cpu_has_feature(CPU_FTR_SMT)) + return 1; + + options = find_path_device("/options"); + if (!options) + return 1; + + val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay", + NULL); + if (!smt_snooze_cmdline && val) { + for_each_cpu(cpu) + per_cpu(smt_snooze_delay, cpu) = *val; + } + + return 1; +} +__initcall(smt_setup); + +static int __init setup_smt_snooze_delay(char *str) +{ + unsigned int cpu; + int snooze; + + if (!cpu_has_feature(CPU_FTR_SMT)) + return 1; + + smt_snooze_cmdline = 1; + + if (get_option(&str, &snooze)) { + for_each_cpu(cpu) + per_cpu(smt_snooze_delay, cpu) = snooze; + } + + return 1; +} +__setup("smt-snooze-delay=", setup_smt_snooze_delay); + +/* + * Enabling PMCs will slow partition context switch times so we only do + * it the first time we write to the PMCs. + */ + +static DEFINE_PER_CPU(char, pmcs_enabled); + +void ppc64_enable_pmcs(void) +{ + unsigned long hid0; +#ifdef CONFIG_PPC_PSERIES + unsigned long set, reset; + int ret; + unsigned int ctrl; +#endif /* CONFIG_PPC_PSERIES */ + + /* Only need to enable them once */ + if (__get_cpu_var(pmcs_enabled)) + return; + + __get_cpu_var(pmcs_enabled) = 1; + + switch (systemcfg->platform) { + case PLATFORM_PSERIES: + case PLATFORM_POWERMAC: + hid0 = mfspr(HID0); + hid0 |= 1UL << (63 - 20); + + /* POWER4 requires the following sequence */ + asm volatile( + "sync\n" + "mtspr %1, %0\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0): + "memory"); + break; + +#ifdef CONFIG_PPC_PSERIES + case PLATFORM_PSERIES_LPAR: + set = 1UL << 63; + reset = 0; + ret = plpar_hcall_norets(H_PERFMON, set, reset); + if (ret) + printk(KERN_ERR "H_PERFMON call on cpu %u " + "returned %d\n", + smp_processor_id(), ret); + break; +#endif /* CONFIG_PPC_PSERIES */ + + default: + break; + } + +#ifdef CONFIG_PPC_PSERIES + /* instruct hypervisor to maintain PMCs */ + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) + get_paca()->lppaca.pmcregs_in_use = 1; + + /* + * On SMT machines we have to set the run latch in the ctrl register + * in order to make PMC6 spin. + */ + if (cpu_has_feature(CPU_FTR_SMT)) { + ctrl = mfspr(CTRLF); + ctrl |= RUNLATCH; + mtspr(CTRLT, ctrl); + } +#endif /* CONFIG_PPC_PSERIES */ +} + +#else + +/* PMC stuff */ +void ppc64_enable_pmcs(void) +{ + /* XXX Implement for iseries */ +} +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +EXPORT_SYMBOL(ppc64_enable_pmcs); + +/* XXX convert to rusty's on_one_cpu */ +static unsigned long run_on_cpu(unsigned long cpu, + unsigned long (*func)(unsigned long), + unsigned long arg) +{ + cpumask_t old_affinity = current->cpus_allowed; + unsigned long ret; + + /* should return -EINVAL to userspace */ + if (set_cpus_allowed(current, cpumask_of_cpu(cpu))) + return 0; + + ret = func(arg); + + set_cpus_allowed(current, old_affinity); + + return ret; +} + +#define SYSFS_PMCSETUP(NAME, ADDRESS) \ +static unsigned long read_##NAME(unsigned long junk) \ +{ \ + return mfspr(ADDRESS); \ +} \ +static unsigned long write_##NAME(unsigned long val) \ +{ \ + ppc64_enable_pmcs(); \ + mtspr(ADDRESS, val); \ + return 0; \ +} \ +static ssize_t show_##NAME(struct sys_device *dev, char *buf) \ +{ \ + struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ + unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \ + return sprintf(buf, "%lx\n", val); \ +} \ +static ssize_t __attribute_used__ \ + store_##NAME(struct sys_device *dev, const char *buf, size_t count) \ +{ \ + struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ + unsigned long val; \ + int ret = sscanf(buf, "%lx", &val); \ + if (ret != 1) \ + return -EINVAL; \ + run_on_cpu(cpu->sysdev.id, write_##NAME, val); \ + return count; \ +} + +SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0); +SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1); +SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); +SYSFS_PMCSETUP(pmc1, SPRN_PMC1); +SYSFS_PMCSETUP(pmc2, SPRN_PMC2); +SYSFS_PMCSETUP(pmc3, SPRN_PMC3); +SYSFS_PMCSETUP(pmc4, SPRN_PMC4); +SYSFS_PMCSETUP(pmc5, SPRN_PMC5); +SYSFS_PMCSETUP(pmc6, SPRN_PMC6); +SYSFS_PMCSETUP(pmc7, SPRN_PMC7); +SYSFS_PMCSETUP(pmc8, SPRN_PMC8); +SYSFS_PMCSETUP(purr, SPRN_PURR); + +static SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0); +static SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1); +static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); +static SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1); +static SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2); +static SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3); +static SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4); +static SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5); +static SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6); +static SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7); +static SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8); +static SYSDEV_ATTR(purr, 0600, show_purr, NULL); + +static void register_cpu_online(unsigned int cpu) +{ + struct cpu *c = &per_cpu(cpu_devices, cpu); + struct sys_device *s = &c->sysdev; + +#ifndef CONFIG_PPC_ISERIES + if (cpu_has_feature(CPU_FTR_SMT)) + sysdev_create_file(s, &attr_smt_snooze_delay); +#endif + + /* PMC stuff */ + + sysdev_create_file(s, &attr_mmcr0); + sysdev_create_file(s, &attr_mmcr1); + + if (cpu_has_feature(CPU_FTR_MMCRA)) + sysdev_create_file(s, &attr_mmcra); + + sysdev_create_file(s, &attr_pmc1); + sysdev_create_file(s, &attr_pmc2); + sysdev_create_file(s, &attr_pmc3); + sysdev_create_file(s, &attr_pmc4); + sysdev_create_file(s, &attr_pmc5); + sysdev_create_file(s, &attr_pmc6); + + if (cpu_has_feature(CPU_FTR_PMC8)) { + sysdev_create_file(s, &attr_pmc7); + sysdev_create_file(s, &attr_pmc8); + } + + if (cpu_has_feature(CPU_FTR_SMT)) + sysdev_create_file(s, &attr_purr); +} + +#ifdef CONFIG_HOTPLUG_CPU +static void unregister_cpu_online(unsigned int cpu) +{ + struct cpu *c = &per_cpu(cpu_devices, cpu); + struct sys_device *s = &c->sysdev; + + BUG_ON(c->no_control); + +#ifndef CONFIG_PPC_ISERIES + if (cpu_has_feature(CPU_FTR_SMT)) + sysdev_remove_file(s, &attr_smt_snooze_delay); +#endif + + /* PMC stuff */ + + sysdev_remove_file(s, &attr_mmcr0); + sysdev_remove_file(s, &attr_mmcr1); + + if (cpu_has_feature(CPU_FTR_MMCRA)) + sysdev_remove_file(s, &attr_mmcra); + + sysdev_remove_file(s, &attr_pmc1); + sysdev_remove_file(s, &attr_pmc2); + sysdev_remove_file(s, &attr_pmc3); + sysdev_remove_file(s, &attr_pmc4); + sysdev_remove_file(s, &attr_pmc5); + sysdev_remove_file(s, &attr_pmc6); + + if (cpu_has_feature(CPU_FTR_PMC8)) { + sysdev_remove_file(s, &attr_pmc7); + sysdev_remove_file(s, &attr_pmc8); + } + + if (cpu_has_feature(CPU_FTR_SMT)) + sysdev_remove_file(s, &attr_purr); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static int __devinit sysfs_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned int)(long)hcpu; + + switch (action) { + case CPU_ONLINE: + register_cpu_online(cpu); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + unregister_cpu_online(cpu); + break; +#endif + } + return NOTIFY_OK; +} + +static struct notifier_block __devinitdata sysfs_cpu_nb = { + .notifier_call = sysfs_cpu_notify, +}; + +/* NUMA stuff */ + +#ifdef CONFIG_NUMA +static struct node node_devices[MAX_NUMNODES]; + +static void register_nodes(void) +{ + int i; + + for (i = 0; i < MAX_NUMNODES; i++) { + if (node_online(i)) { + int p_node = parent_node(i); + struct node *parent = NULL; + + if (p_node != i) + parent = &node_devices[p_node]; + + register_node(&node_devices[i], i, parent); + } + } +} +#else +static void register_nodes(void) +{ + return; +} +#endif + +/* Only valid if CPU is present. */ +static ssize_t show_physical_id(struct sys_device *dev, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, sysdev); + + return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id)); +} +static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL); + +static int __init topology_init(void) +{ + int cpu; + struct node *parent = NULL; + + register_nodes(); + + register_cpu_notifier(&sysfs_cpu_nb); + + for_each_cpu(cpu) { + struct cpu *c = &per_cpu(cpu_devices, cpu); + +#ifdef CONFIG_NUMA + parent = &node_devices[cpu_to_node(cpu)]; +#endif + /* + * For now, we just see if the system supports making + * the RTAS calls for CPU hotplug. But, there may be a + * more comprehensive way to do this for an individual + * CPU. For instance, the boot cpu might never be valid + * for hotplugging. + */ + if (!ppc_md.cpu_die) + c->no_control = 1; + + if (cpu_online(cpu) || (c->no_control == 0)) { + register_cpu(c, cpu, parent); + + sysdev_create_file(&c->sysdev, &attr_physical_id); + } + + if (cpu_online(cpu)) + register_cpu_online(cpu); + } + + return 0; +} +__initcall(topology_init); diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c new file mode 100644 index 00000000000..77ded5a363b --- /dev/null +++ b/arch/ppc64/kernel/time.c @@ -0,0 +1,827 @@ +/* + * + * Common time routines among all ppc machines. + * + * Written by Cort Dougan (cort@cs.nmt.edu) to merge + * Paul Mackerras' version and mine for PReP and Pmac. + * MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net). + * Converted for 64-bit by Mike Corrigan (mikejc@us.ibm.com) + * + * First round of bugfixes by Gabriel Paubert (paubert@iram.es) + * to make clock more stable (2.4.0-test5). The only thing + * that this code assumes is that the timebases have been synchronized + * by firmware on SMP and are never stopped (never do sleep + * on SMP then, nap and doze are OK). + * + * Speeded up do_gettimeofday by getting rid of references to + * xtime (which required locks for consistency). (mikejc@us.ibm.com) + * + * TODO (not necessarily in this file): + * - improve precision and reproducibility of timebase frequency + * measurement at boot time. (for iSeries, we calibrate the timebase + * against the Titan chip's clock.) + * - for astronomical applications: add a new function to get + * non ambiguous timestamps even around leap seconds. This needs + * a new timestamp format and a good name. + * + * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PPC_ISERIES +#include +#include +#endif +#include +#include +#include +#include +#include +#include + +u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; + +EXPORT_SYMBOL(jiffies_64); + +/* keep track of when we need to update the rtc */ +time_t last_rtc_update; +extern int piranha_simulator; +#ifdef CONFIG_PPC_ISERIES +unsigned long iSeries_recal_titan = 0; +unsigned long iSeries_recal_tb = 0; +static unsigned long first_settimeofday = 1; +#endif + +#define XSEC_PER_SEC (1024*1024) + +unsigned long tb_ticks_per_jiffy; +unsigned long tb_ticks_per_usec = 100; /* sane default */ +EXPORT_SYMBOL(tb_ticks_per_usec); +unsigned long tb_ticks_per_sec; +unsigned long tb_to_xs; +unsigned tb_to_us; +unsigned long processor_freq; +DEFINE_SPINLOCK(rtc_lock); + +unsigned long tb_to_ns_scale; +unsigned long tb_to_ns_shift; + +struct gettimeofday_struct do_gtod; + +extern unsigned long wall_jiffies; +extern unsigned long lpevent_count; +extern int smp_tb_synchronized; + +extern struct timezone sys_tz; + +void ppc_adjtimex(void); + +static unsigned adjusting_time = 0; + +static __inline__ void timer_check_rtc(void) +{ + /* + * update the rtc when needed, this should be performed on the + * right fraction of a second. Half or full second ? + * Full second works on mk48t59 clocks, others need testing. + * Note that this update is basically only used through + * the adjtimex system calls. Setting the HW clock in + * any other way is a /dev/rtc and userland business. + * This is still wrong by -0.5/+1.5 jiffies because of the + * timer interrupt resolution and possible delay, but here we + * hit a quantization limit which can only be solved by higher + * resolution timers and decoupling time management from timer + * interrupts. This is also wrong on the clocks + * which require being written at the half second boundary. + * We should have an rtc call that only sets the minutes and + * seconds like on Intel to avoid problems with non UTC clocks. + */ + if ( (time_status & STA_UNSYNC) == 0 && + xtime.tv_sec - last_rtc_update >= 659 && + abs((xtime.tv_nsec/1000) - (1000000-1000000/HZ)) < 500000/HZ && + jiffies - wall_jiffies == 1) { + struct rtc_time tm; + to_tm(xtime.tv_sec+1, &tm); + tm.tm_year -= 1900; + tm.tm_mon -= 1; + if (ppc_md.set_rtc_time(&tm) == 0) + last_rtc_update = xtime.tv_sec+1; + else + /* Try again one minute later */ + last_rtc_update += 60; + } +} + +/* + * This version of gettimeofday has microsecond resolution. + */ +static inline void __do_gettimeofday(struct timeval *tv, unsigned long tb_val) +{ + unsigned long sec, usec, tb_ticks; + unsigned long xsec, tb_xsec; + struct gettimeofday_vars * temp_varp; + unsigned long temp_tb_to_xs, temp_stamp_xsec; + + /* + * These calculations are faster (gets rid of divides) + * if done in units of 1/2^20 rather than microseconds. + * The conversion to microseconds at the end is done + * without a divide (and in fact, without a multiply) + */ + temp_varp = do_gtod.varp; + tb_ticks = tb_val - temp_varp->tb_orig_stamp; + temp_tb_to_xs = temp_varp->tb_to_xs; + temp_stamp_xsec = temp_varp->stamp_xsec; + tb_xsec = mulhdu( tb_ticks, temp_tb_to_xs ); + xsec = temp_stamp_xsec + tb_xsec; + sec = xsec / XSEC_PER_SEC; + xsec -= sec * XSEC_PER_SEC; + usec = (xsec * USEC_PER_SEC)/XSEC_PER_SEC; + + tv->tv_sec = sec; + tv->tv_usec = usec; +} + +void do_gettimeofday(struct timeval *tv) +{ + __do_gettimeofday(tv, get_tb()); +} + +EXPORT_SYMBOL(do_gettimeofday); + +/* Synchronize xtime with do_gettimeofday */ + +static inline void timer_sync_xtime(unsigned long cur_tb) +{ + struct timeval my_tv; + + __do_gettimeofday(&my_tv, cur_tb); + + if (xtime.tv_sec <= my_tv.tv_sec) { + xtime.tv_sec = my_tv.tv_sec; + xtime.tv_nsec = my_tv.tv_usec * 1000; + } +} + +/* + * When the timebase - tb_orig_stamp gets too big, we do a manipulation + * between tb_orig_stamp and stamp_xsec. The goal here is to keep the + * difference tb - tb_orig_stamp small enough to always fit inside a + * 32 bits number. This is a requirement of our fast 32 bits userland + * implementation in the vdso. If we "miss" a call to this function + * (interrupt latency, CPU locked in a spinlock, ...) and we end up + * with a too big difference, then the vdso will fallback to calling + * the syscall + */ +static __inline__ void timer_recalc_offset(unsigned long cur_tb) +{ + struct gettimeofday_vars * temp_varp; + unsigned temp_idx; + unsigned long offset, new_stamp_xsec, new_tb_orig_stamp; + + if (((cur_tb - do_gtod.varp->tb_orig_stamp) & 0x80000000u) == 0) + return; + + temp_idx = (do_gtod.var_idx == 0); + temp_varp = &do_gtod.vars[temp_idx]; + + new_tb_orig_stamp = cur_tb; + offset = new_tb_orig_stamp - do_gtod.varp->tb_orig_stamp; + new_stamp_xsec = do_gtod.varp->stamp_xsec + mulhdu(offset, do_gtod.varp->tb_to_xs); + + temp_varp->tb_to_xs = do_gtod.varp->tb_to_xs; + temp_varp->tb_orig_stamp = new_tb_orig_stamp; + temp_varp->stamp_xsec = new_stamp_xsec; + mb(); + do_gtod.varp = temp_varp; + do_gtod.var_idx = temp_idx; + + ++(systemcfg->tb_update_count); + wmb(); + systemcfg->tb_orig_stamp = new_tb_orig_stamp; + systemcfg->stamp_xsec = new_stamp_xsec; + wmb(); + ++(systemcfg->tb_update_count); +} + +#ifdef CONFIG_SMP +unsigned long profile_pc(struct pt_regs *regs) +{ + unsigned long pc = instruction_pointer(regs); + + if (in_lock_functions(pc)) + return regs->link; + + return pc; +} +EXPORT_SYMBOL(profile_pc); +#endif + +#ifdef CONFIG_PPC_ISERIES + +/* + * This function recalibrates the timebase based on the 49-bit time-of-day + * value in the Titan chip. The Titan is much more accurate than the value + * returned by the service processor for the timebase frequency. + */ + +static void iSeries_tb_recal(void) +{ + struct div_result divres; + unsigned long titan, tb; + tb = get_tb(); + titan = HvCallXm_loadTod(); + if ( iSeries_recal_titan ) { + unsigned long tb_ticks = tb - iSeries_recal_tb; + unsigned long titan_usec = (titan - iSeries_recal_titan) >> 12; + unsigned long new_tb_ticks_per_sec = (tb_ticks * USEC_PER_SEC)/titan_usec; + unsigned long new_tb_ticks_per_jiffy = (new_tb_ticks_per_sec+(HZ/2))/HZ; + long tick_diff = new_tb_ticks_per_jiffy - tb_ticks_per_jiffy; + char sign = '+'; + /* make sure tb_ticks_per_sec and tb_ticks_per_jiffy are consistent */ + new_tb_ticks_per_sec = new_tb_ticks_per_jiffy * HZ; + + if ( tick_diff < 0 ) { + tick_diff = -tick_diff; + sign = '-'; + } + if ( tick_diff ) { + if ( tick_diff < tb_ticks_per_jiffy/25 ) { + printk( "Titan recalibrate: new tb_ticks_per_jiffy = %lu (%c%ld)\n", + new_tb_ticks_per_jiffy, sign, tick_diff ); + tb_ticks_per_jiffy = new_tb_ticks_per_jiffy; + tb_ticks_per_sec = new_tb_ticks_per_sec; + div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres ); + do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; + tb_to_xs = divres.result_low; + do_gtod.varp->tb_to_xs = tb_to_xs; + systemcfg->tb_ticks_per_sec = tb_ticks_per_sec; + systemcfg->tb_to_xs = tb_to_xs; + } + else { + printk( "Titan recalibrate: FAILED (difference > 4 percent)\n" + " new tb_ticks_per_jiffy = %lu\n" + " old tb_ticks_per_jiffy = %lu\n", + new_tb_ticks_per_jiffy, tb_ticks_per_jiffy ); + } + } + } + iSeries_recal_titan = titan; + iSeries_recal_tb = tb; +} +#endif + +/* + * For iSeries shared processors, we have to let the hypervisor + * set the hardware decrementer. We set a virtual decrementer + * in the lppaca and call the hypervisor if the virtual + * decrementer is less than the current value in the hardware + * decrementer. (almost always the new decrementer value will + * be greater than the current hardware decementer so the hypervisor + * call will not be needed) + */ + +unsigned long tb_last_stamp __cacheline_aligned_in_smp; + +/* + * timer_interrupt - gets called when the decrementer overflows, + * with interrupts disabled. + */ +int timer_interrupt(struct pt_regs * regs) +{ + int next_dec; + unsigned long cur_tb; + struct paca_struct *lpaca = get_paca(); + unsigned long cpu = smp_processor_id(); + + irq_enter(); + +#ifndef CONFIG_PPC_ISERIES + profile_tick(CPU_PROFILING, regs); +#endif + + lpaca->lppaca.int_dword.fields.decr_int = 0; + + while (lpaca->next_jiffy_update_tb <= (cur_tb = get_tb())) { + /* + * We cannot disable the decrementer, so in the period + * between this cpu's being marked offline in cpu_online_map + * and calling stop-self, it is taking timer interrupts. + * Avoid calling into the scheduler rebalancing code if this + * is the case. + */ + if (!cpu_is_offline(cpu)) + update_process_times(user_mode(regs)); + /* + * No need to check whether cpu is offline here; boot_cpuid + * should have been fixed up by now. + */ + if (cpu == boot_cpuid) { + write_seqlock(&xtime_lock); + tb_last_stamp = lpaca->next_jiffy_update_tb; + timer_recalc_offset(lpaca->next_jiffy_update_tb); + do_timer(regs); + timer_sync_xtime(lpaca->next_jiffy_update_tb); + timer_check_rtc(); + write_sequnlock(&xtime_lock); + if ( adjusting_time && (time_adjust == 0) ) + ppc_adjtimex(); + } + lpaca->next_jiffy_update_tb += tb_ticks_per_jiffy; + } + + next_dec = lpaca->next_jiffy_update_tb - cur_tb; + if (next_dec > lpaca->default_decr) + next_dec = lpaca->default_decr; + set_dec(next_dec); + +#ifdef CONFIG_PPC_ISERIES + { + struct ItLpQueue *lpq = lpaca->lpqueue_ptr; + if (lpq && ItLpQueue_isLpIntPending(lpq)) + lpevent_count += ItLpQueue_process(lpq, regs); + } +#endif + +/* collect purr register values often, for accurate calculations */ +#if defined(CONFIG_PPC_PSERIES) + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + cu->current_tb = mfspr(SPRN_PURR); + } +#endif + + irq_exit(); + + return 1; +} + +/* + * Scheduler clock - returns current time in nanosec units. + * + * Note: mulhdu(a, b) (multiply high double unsigned) returns + * the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b + * are 64-bit unsigned numbers. + */ +unsigned long long sched_clock(void) +{ + return mulhdu(get_tb(), tb_to_ns_scale) << tb_to_ns_shift; +} + +int do_settimeofday(struct timespec *tv) +{ + time_t wtm_sec, new_sec = tv->tv_sec; + long wtm_nsec, new_nsec = tv->tv_nsec; + unsigned long flags; + unsigned long delta_xsec; + long int tb_delta; + unsigned long new_xsec; + + if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) + return -EINVAL; + + write_seqlock_irqsave(&xtime_lock, flags); + /* Updating the RTC is not the job of this code. If the time is + * stepped under NTP, the RTC will be update after STA_UNSYNC + * is cleared. Tool like clock/hwclock either copy the RTC + * to the system time, in which case there is no point in writing + * to the RTC again, or write to the RTC but then they don't call + * settimeofday to perform this operation. + */ +#ifdef CONFIG_PPC_ISERIES + if ( first_settimeofday ) { + iSeries_tb_recal(); + first_settimeofday = 0; + } +#endif + tb_delta = tb_ticks_since(tb_last_stamp); + tb_delta += (jiffies - wall_jiffies) * tb_ticks_per_jiffy; + + new_nsec -= tb_delta / tb_ticks_per_usec / 1000; + + wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - new_sec); + wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - new_nsec); + + set_normalized_timespec(&xtime, new_sec, new_nsec); + set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); + + /* In case of a large backwards jump in time with NTP, we want the + * clock to be updated as soon as the PLL is again in lock. + */ + last_rtc_update = new_sec - 658; + + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; + + delta_xsec = mulhdu( (tb_last_stamp-do_gtod.varp->tb_orig_stamp), + do_gtod.varp->tb_to_xs ); + + new_xsec = (new_nsec * XSEC_PER_SEC) / NSEC_PER_SEC; + new_xsec += new_sec * XSEC_PER_SEC; + if ( new_xsec > delta_xsec ) { + do_gtod.varp->stamp_xsec = new_xsec - delta_xsec; + systemcfg->stamp_xsec = new_xsec - delta_xsec; + } + else { + /* This is only for the case where the user is setting the time + * way back to a time such that the boot time would have been + * before 1970 ... eg. we booted ten days ago, and we are setting + * the time to Jan 5, 1970 */ + do_gtod.varp->stamp_xsec = new_xsec; + do_gtod.varp->tb_orig_stamp = tb_last_stamp; + systemcfg->stamp_xsec = new_xsec; + systemcfg->tb_orig_stamp = tb_last_stamp; + } + + systemcfg->tz_minuteswest = sys_tz.tz_minuteswest; + systemcfg->tz_dsttime = sys_tz.tz_dsttime; + + write_sequnlock_irqrestore(&xtime_lock, flags); + clock_was_set(); + return 0; +} + +EXPORT_SYMBOL(do_settimeofday); + +void __init time_init(void) +{ + /* This function is only called on the boot processor */ + unsigned long flags; + struct rtc_time tm; + struct div_result res; + unsigned long scale, shift; + + ppc_md.calibrate_decr(); + + /* + * Compute scale factor for sched_clock. + * The calibrate_decr() function has set tb_ticks_per_sec, + * which is the timebase frequency. + * We compute 1e9 * 2^64 / tb_ticks_per_sec and interpret + * the 128-bit result as a 64.64 fixed-point number. + * We then shift that number right until it is less than 1.0, + * giving us the scale factor and shift count to use in + * sched_clock(). + */ + div128_by_32(1000000000, 0, tb_ticks_per_sec, &res); + scale = res.result_low; + for (shift = 0; res.result_high != 0; ++shift) { + scale = (scale >> 1) | (res.result_high << 63); + res.result_high >>= 1; + } + tb_to_ns_scale = scale; + tb_to_ns_shift = shift; + +#ifdef CONFIG_PPC_ISERIES + if (!piranha_simulator) +#endif + ppc_md.get_boot_time(&tm); + + write_seqlock_irqsave(&xtime_lock, flags); + xtime.tv_sec = mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); + tb_last_stamp = get_tb(); + do_gtod.varp = &do_gtod.vars[0]; + do_gtod.var_idx = 0; + do_gtod.varp->tb_orig_stamp = tb_last_stamp; + do_gtod.varp->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC; + do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; + do_gtod.varp->tb_to_xs = tb_to_xs; + do_gtod.tb_to_us = tb_to_us; + systemcfg->tb_orig_stamp = tb_last_stamp; + systemcfg->tb_update_count = 0; + systemcfg->tb_ticks_per_sec = tb_ticks_per_sec; + systemcfg->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC; + systemcfg->tb_to_xs = tb_to_xs; + + time_freq = 0; + + xtime.tv_nsec = 0; + last_rtc_update = xtime.tv_sec; + set_normalized_timespec(&wall_to_monotonic, + -xtime.tv_sec, -xtime.tv_nsec); + write_sequnlock_irqrestore(&xtime_lock, flags); + + /* Not exact, but the timer interrupt takes care of this */ + set_dec(tb_ticks_per_jiffy); +} + +/* + * After adjtimex is called, adjust the conversion of tb ticks + * to microseconds to keep do_gettimeofday synchronized + * with ntpd. + * + * Use the time_adjust, time_freq and time_offset computed by adjtimex to + * adjust the frequency. + */ + +/* #define DEBUG_PPC_ADJTIMEX 1 */ + +void ppc_adjtimex(void) +{ + unsigned long den, new_tb_ticks_per_sec, tb_ticks, old_xsec, new_tb_to_xs, new_xsec, new_stamp_xsec; + unsigned long tb_ticks_per_sec_delta; + long delta_freq, ltemp; + struct div_result divres; + unsigned long flags; + struct gettimeofday_vars * temp_varp; + unsigned temp_idx; + long singleshot_ppm = 0; + + /* Compute parts per million frequency adjustment to accomplish the time adjustment + implied by time_offset to be applied over the elapsed time indicated by time_constant. + Use SHIFT_USEC to get it into the same units as time_freq. */ + if ( time_offset < 0 ) { + ltemp = -time_offset; + ltemp <<= SHIFT_USEC - SHIFT_UPDATE; + ltemp >>= SHIFT_KG + time_constant; + ltemp = -ltemp; + } + else { + ltemp = time_offset; + ltemp <<= SHIFT_USEC - SHIFT_UPDATE; + ltemp >>= SHIFT_KG + time_constant; + } + + /* If there is a single shot time adjustment in progress */ + if ( time_adjust ) { +#ifdef DEBUG_PPC_ADJTIMEX + printk("ppc_adjtimex: "); + if ( adjusting_time == 0 ) + printk("starting "); + printk("single shot time_adjust = %ld\n", time_adjust); +#endif + + adjusting_time = 1; + + /* Compute parts per million frequency adjustment to match time_adjust */ + singleshot_ppm = tickadj * HZ; + /* + * The adjustment should be tickadj*HZ to match the code in + * linux/kernel/timer.c, but experiments show that this is too + * large. 3/4 of tickadj*HZ seems about right + */ + singleshot_ppm -= singleshot_ppm / 4; + /* Use SHIFT_USEC to get it into the same units as time_freq */ + singleshot_ppm <<= SHIFT_USEC; + if ( time_adjust < 0 ) + singleshot_ppm = -singleshot_ppm; + } + else { +#ifdef DEBUG_PPC_ADJTIMEX + if ( adjusting_time ) + printk("ppc_adjtimex: ending single shot time_adjust\n"); +#endif + adjusting_time = 0; + } + + /* Add up all of the frequency adjustments */ + delta_freq = time_freq + ltemp + singleshot_ppm; + + /* Compute a new value for tb_ticks_per_sec based on the frequency adjustment */ + den = 1000000 * (1 << (SHIFT_USEC - 8)); + if ( delta_freq < 0 ) { + tb_ticks_per_sec_delta = ( tb_ticks_per_sec * ( (-delta_freq) >> (SHIFT_USEC - 8))) / den; + new_tb_ticks_per_sec = tb_ticks_per_sec + tb_ticks_per_sec_delta; + } + else { + tb_ticks_per_sec_delta = ( tb_ticks_per_sec * ( delta_freq >> (SHIFT_USEC - 8))) / den; + new_tb_ticks_per_sec = tb_ticks_per_sec - tb_ticks_per_sec_delta; + } + +#ifdef DEBUG_PPC_ADJTIMEX + printk("ppc_adjtimex: ltemp = %ld, time_freq = %ld, singleshot_ppm = %ld\n", ltemp, time_freq, singleshot_ppm); + printk("ppc_adjtimex: tb_ticks_per_sec - base = %ld new = %ld\n", tb_ticks_per_sec, new_tb_ticks_per_sec); +#endif + + /* Compute a new value of tb_to_xs (used to convert tb to microseconds and a new value of + stamp_xsec which is the time (in 1/2^20 second units) corresponding to tb_orig_stamp. This + new value of stamp_xsec compensates for the change in frequency (implied by the new tb_to_xs) + which guarantees that the current time remains the same */ + write_seqlock_irqsave( &xtime_lock, flags ); + tb_ticks = get_tb() - do_gtod.varp->tb_orig_stamp; + div128_by_32( 1024*1024, 0, new_tb_ticks_per_sec, &divres ); + new_tb_to_xs = divres.result_low; + new_xsec = mulhdu( tb_ticks, new_tb_to_xs ); + + old_xsec = mulhdu( tb_ticks, do_gtod.varp->tb_to_xs ); + new_stamp_xsec = do_gtod.varp->stamp_xsec + old_xsec - new_xsec; + + /* There are two copies of tb_to_xs and stamp_xsec so that no lock is needed to access and use these + values in do_gettimeofday. We alternate the copies and as long as a reasonable time elapses between + changes, there will never be inconsistent values. ntpd has a minimum of one minute between updates */ + + temp_idx = (do_gtod.var_idx == 0); + temp_varp = &do_gtod.vars[temp_idx]; + + temp_varp->tb_to_xs = new_tb_to_xs; + temp_varp->stamp_xsec = new_stamp_xsec; + temp_varp->tb_orig_stamp = do_gtod.varp->tb_orig_stamp; + mb(); + do_gtod.varp = temp_varp; + do_gtod.var_idx = temp_idx; + + /* + * tb_update_count is used to allow the problem state gettimeofday code + * to assure itself that it sees a consistent view of the tb_to_xs and + * stamp_xsec variables. It reads the tb_update_count, then reads + * tb_to_xs and stamp_xsec and then reads tb_update_count again. If + * the two values of tb_update_count match and are even then the + * tb_to_xs and stamp_xsec values are consistent. If not, then it + * loops back and reads them again until this criteria is met. + */ + ++(systemcfg->tb_update_count); + wmb(); + systemcfg->tb_to_xs = new_tb_to_xs; + systemcfg->stamp_xsec = new_stamp_xsec; + wmb(); + ++(systemcfg->tb_update_count); + + write_sequnlock_irqrestore( &xtime_lock, flags ); + +} + + +#define TICK_SIZE tick +#define FEBRUARY 2 +#define STARTOFTIME 1970 +#define SECDAY 86400L +#define SECYR (SECDAY * 365) +#define leapyear(year) ((year) % 4 == 0) +#define days_in_year(a) (leapyear(a) ? 366 : 365) +#define days_in_month(a) (month_days[(a) - 1]) + +static int month_days[12] = { + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; + +/* + * This only works for the Gregorian calendar - i.e. after 1752 (in the UK) + */ +void GregorianDay(struct rtc_time * tm) +{ + int leapsToDate; + int lastYear; + int day; + int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; + + lastYear=tm->tm_year-1; + + /* + * Number of leap corrections to apply up to end of last year + */ + leapsToDate = lastYear/4 - lastYear/100 + lastYear/400; + + /* + * This year is a leap year if it is divisible by 4 except when it is + * divisible by 100 unless it is divisible by 400 + * + * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 will be + */ + if((tm->tm_year%4==0) && + ((tm->tm_year%100!=0) || (tm->tm_year%400==0)) && + (tm->tm_mon>2)) + { + /* + * We are past Feb. 29 in a leap year + */ + day=1; + } + else + { + day=0; + } + + day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] + + tm->tm_mday; + + tm->tm_wday=day%7; +} + +void to_tm(int tim, struct rtc_time * tm) +{ + register int i; + register long hms, day; + + day = tim / SECDAY; + hms = tim % SECDAY; + + /* Hours, minutes, seconds are easy */ + tm->tm_hour = hms / 3600; + tm->tm_min = (hms % 3600) / 60; + tm->tm_sec = (hms % 3600) % 60; + + /* Number of years in days */ + for (i = STARTOFTIME; day >= days_in_year(i); i++) + day -= days_in_year(i); + tm->tm_year = i; + + /* Number of months in days left */ + if (leapyear(tm->tm_year)) + days_in_month(FEBRUARY) = 29; + for (i = 1; day >= days_in_month(i); i++) + day -= days_in_month(i); + days_in_month(FEBRUARY) = 28; + tm->tm_mon = i; + + /* Days are what is left over (+1) from all that. */ + tm->tm_mday = day + 1; + + /* + * Determine the day of week + */ + GregorianDay(tm); +} + +/* Auxiliary function to compute scaling factors */ +/* Actually the choice of a timebase running at 1/4 the of the bus + * frequency giving resolution of a few tens of nanoseconds is quite nice. + * It makes this computation very precise (27-28 bits typically) which + * is optimistic considering the stability of most processor clock + * oscillators and the precision with which the timebase frequency + * is measured but does not harm. + */ +unsigned mulhwu_scale_factor(unsigned inscale, unsigned outscale) { + unsigned mlt=0, tmp, err; + /* No concern for performance, it's done once: use a stupid + * but safe and compact method to find the multiplier. + */ + + for (tmp = 1U<<31; tmp != 0; tmp >>= 1) { + if (mulhwu(inscale, mlt|tmp) < outscale) mlt|=tmp; + } + + /* We might still be off by 1 for the best approximation. + * A side effect of this is that if outscale is too large + * the returned value will be zero. + * Many corner cases have been checked and seem to work, + * some might have been forgotten in the test however. + */ + + err = inscale*(mlt+1); + if (err <= inscale/2) mlt++; + return mlt; + } + +/* + * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit + * result. + */ + +void div128_by_32( unsigned long dividend_high, unsigned long dividend_low, + unsigned divisor, struct div_result *dr ) +{ + unsigned long a,b,c,d, w,x,y,z, ra,rb,rc; + + a = dividend_high >> 32; + b = dividend_high & 0xffffffff; + c = dividend_low >> 32; + d = dividend_low & 0xffffffff; + + w = a/divisor; + ra = (a - (w * divisor)) << 32; + + x = (ra + b)/divisor; + rb = ((ra + b) - (x * divisor)) << 32; + + y = (rb + c)/divisor; + rc = ((rb + b) - (y * divisor)) << 32; + + z = (rc + d)/divisor; + + dr->result_high = (w << 32) + x; + dr->result_low = (y << 32) + z; + +} + diff --git a/arch/ppc64/kernel/traps.c b/arch/ppc64/kernel/traps.c new file mode 100644 index 00000000000..10fc61f3f6a --- /dev/null +++ b/arch/ppc64/kernel/traps.c @@ -0,0 +1,565 @@ +/* + * linux/arch/ppc64/kernel/traps.c + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Modified by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras (paulus@cs.anu.edu.au) + */ + +/* + * This file handles the architecture-dependent parts of hardware exceptions + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_DEBUGGER +int (*__debugger)(struct pt_regs *regs); +int (*__debugger_ipi)(struct pt_regs *regs); +int (*__debugger_bpt)(struct pt_regs *regs); +int (*__debugger_sstep)(struct pt_regs *regs); +int (*__debugger_iabr_match)(struct pt_regs *regs); +int (*__debugger_dabr_match)(struct pt_regs *regs); +int (*__debugger_fault_handler)(struct pt_regs *regs); + +EXPORT_SYMBOL(__debugger); +EXPORT_SYMBOL(__debugger_ipi); +EXPORT_SYMBOL(__debugger_bpt); +EXPORT_SYMBOL(__debugger_sstep); +EXPORT_SYMBOL(__debugger_iabr_match); +EXPORT_SYMBOL(__debugger_dabr_match); +EXPORT_SYMBOL(__debugger_fault_handler); +#endif + +struct notifier_block *ppc64_die_chain; +static DEFINE_SPINLOCK(die_notifier_lock); + +int register_die_notifier(struct notifier_block *nb) +{ + int err = 0; + unsigned long flags; + + spin_lock_irqsave(&die_notifier_lock, flags); + err = notifier_chain_register(&ppc64_die_chain, nb); + spin_unlock_irqrestore(&die_notifier_lock, flags); + return err; +} + +/* + * Trap & Exception support + */ + +static DEFINE_SPINLOCK(die_lock); + +int die(const char *str, struct pt_regs *regs, long err) +{ + static int die_counter; + int nl = 0; + + if (debugger(regs)) + return 1; + + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); + nl = 1; +#endif +#ifdef CONFIG_SMP + printk("SMP NR_CPUS=%d ", NR_CPUS); + nl = 1; +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC "); + nl = 1; +#endif +#ifdef CONFIG_NUMA + printk("NUMA "); + nl = 1; +#endif + switch(systemcfg->platform) { + case PLATFORM_PSERIES: + printk("PSERIES "); + nl = 1; + break; + case PLATFORM_PSERIES_LPAR: + printk("PSERIES LPAR "); + nl = 1; + break; + case PLATFORM_ISERIES_LPAR: + printk("ISERIES LPAR "); + nl = 1; + break; + case PLATFORM_POWERMAC: + printk("POWERMAC "); + nl = 1; + break; + } + if (nl) + printk("\n"); + print_modules(); + show_regs(regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + + if (panic_on_oops) { + printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); + ssleep(5); + panic("Fatal exception"); + } + do_exit(SIGSEGV); + + return 0; +} + +void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) +{ + siginfo_t info; + + if (!user_mode(regs)) { + if (die("Exception in kernel mode", regs, signr)) + return; + } + + memset(&info, 0, sizeof(info)); + info.si_signo = signr; + info.si_code = code; + info.si_addr = (void __user *) addr; + force_sig_info(signr, &info, current); +} + +void system_reset_exception(struct pt_regs *regs) +{ + /* See if any machine dependent calls */ + if (ppc_md.system_reset_exception) + ppc_md.system_reset_exception(regs); + + die("System Reset", regs, 0); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable System Reset"); + + /* What should we do here? We could issue a shutdown or hard reset. */ +} + +void machine_check_exception(struct pt_regs *regs) +{ + int recover = 0; + + /* See if any machine dependent calls */ + if (ppc_md.machine_check_exception) + recover = ppc_md.machine_check_exception(regs); + + if (recover) + return; + + if (debugger_fault_handler(regs)) + return; + die("Machine check", regs, 0); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable Machine check"); +} + +void unknown_exception(struct pt_regs *regs) +{ + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + + _exception(SIGTRAP, regs, 0, 0); +} + +void instruction_breakpoint_exception(struct pt_regs *regs) +{ + if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_iabr_match(regs)) + return; + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); +} + +void single_step_exception(struct pt_regs *regs) +{ + regs->msr &= ~MSR_SE; /* Turn off 'trace' bit */ + + if (notify_die(DIE_SSTEP, "single_step", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_sstep(regs)) + return; + + _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); +} + +/* + * After we have successfully emulated an instruction, we have to + * check if the instruction was being single-stepped, and if so, + * pretend we got a single-step exception. This was pointed out + * by Kumar Gala. -- paulus + */ +static inline void emulate_single_step(struct pt_regs *regs) +{ + if (regs->msr & MSR_SE) + single_step_exception(regs); +} + +static void parse_fpe(struct pt_regs *regs) +{ + int code = 0; + unsigned long fpscr; + + flush_fp_to_thread(current); + + fpscr = current->thread.fpscr; + + /* Invalid operation */ + if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) + code = FPE_FLTINV; + + /* Overflow */ + else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX)) + code = FPE_FLTOVF; + + /* Underflow */ + else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX)) + code = FPE_FLTUND; + + /* Divide by zero */ + else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX)) + code = FPE_FLTDIV; + + /* Inexact result */ + else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX)) + code = FPE_FLTRES; + + _exception(SIGFPE, regs, code, regs->nip); +} + +/* + * Illegal instruction emulation support. Return non-zero if we can't + * emulate, or -EFAULT if the associated memory access caused an access + * fault. Return zero on success. + */ + +#define INST_MFSPR_PVR 0x7c1f42a6 +#define INST_MFSPR_PVR_MASK 0xfc1fffff + +#define INST_DCBA 0x7c0005ec +#define INST_DCBA_MASK 0x7c0007fe + +#define INST_MCRXR 0x7c000400 +#define INST_MCRXR_MASK 0x7c0007fe + +static int emulate_instruction(struct pt_regs *regs) +{ + unsigned int instword; + + if (!user_mode(regs)) + return -EINVAL; + + CHECK_FULL_REGS(regs); + + if (get_user(instword, (unsigned int __user *)(regs->nip))) + return -EFAULT; + + /* Emulate the mfspr rD, PVR. */ + if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) { + unsigned int rd; + + rd = (instword >> 21) & 0x1f; + regs->gpr[rd] = mfspr(SPRN_PVR); + return 0; + } + + /* Emulating the dcba insn is just a no-op. */ + if ((instword & INST_DCBA_MASK) == INST_DCBA) { + static int warned; + + if (!warned) { + printk(KERN_WARNING + "process %d (%s) uses obsolete 'dcba' insn\n", + current->pid, current->comm); + warned = 1; + } + return 0; + } + + /* Emulate the mcrxr insn. */ + if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { + static int warned; + unsigned int shift; + + if (!warned) { + printk(KERN_WARNING + "process %d (%s) uses obsolete 'mcrxr' insn\n", + current->pid, current->comm); + warned = 1; + } + + shift = (instword >> 21) & 0x1c; + regs->ccr &= ~(0xf0000000 >> shift); + regs->ccr |= (regs->xer & 0xf0000000) >> shift; + regs->xer &= ~0xf0000000; + return 0; + } + + return -EINVAL; +} + +/* + * Look through the list of trap instructions that are used for BUG(), + * BUG_ON() and WARN_ON() and see if we hit one. At this point we know + * that the exception was caused by a trap instruction of some kind. + * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0 + * otherwise. + */ +extern struct bug_entry __start___bug_table[], __stop___bug_table[]; + +#ifndef CONFIG_MODULES +#define module_find_bug(x) NULL +#endif + +struct bug_entry *find_bug(unsigned long bugaddr) +{ + struct bug_entry *bug; + + for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) + if (bugaddr == bug->bug_addr) + return bug; + return module_find_bug(bugaddr); +} + +static int +check_bug_trap(struct pt_regs *regs) +{ + struct bug_entry *bug; + unsigned long addr; + + if (regs->msr & MSR_PR) + return 0; /* not in kernel */ + addr = regs->nip; /* address of trap instruction */ + if (addr < PAGE_OFFSET) + return 0; + bug = find_bug(regs->nip); + if (bug == NULL) + return 0; + if (bug->line & BUG_WARNING_TRAP) { + /* this is a WARN_ON rather than BUG/BUG_ON */ + printk(KERN_ERR "Badness in %s at %s:%d\n", + bug->function, bug->file, + (unsigned int)bug->line & ~BUG_WARNING_TRAP); + show_stack(current, (void *)regs->gpr[1]); + return 1; + } + printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", + bug->function, bug->file, (unsigned int)bug->line); + return 0; +} + +void program_check_exception(struct pt_regs *regs) +{ + if (debugger_fault_handler(regs)) + return; + + if (regs->msr & 0x100000) { + /* IEEE FP exception */ + parse_fpe(regs); + } else if (regs->msr & 0x20000) { + /* trap exception */ + + if (notify_die(DIE_BPT, "breakpoint", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_bpt(regs)) + return; + + if (check_bug_trap(regs)) { + regs->nip += 4; + return; + } + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + + } else { + /* Privileged or illegal instruction; try to emulate it. */ + switch (emulate_instruction(regs)) { + case 0: + regs->nip += 4; + emulate_single_step(regs); + break; + + case -EFAULT: + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + break; + + default: + if (regs->msr & 0x40000) + /* priveleged */ + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + else + /* illegal */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + break; + } + } +} + +void kernel_fp_unavailable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); +} + +void altivec_unavailable_exception(struct pt_regs *regs) +{ +#ifndef CONFIG_ALTIVEC + if (user_mode(regs)) { + /* A user program has executed an altivec instruction, + but this kernel doesn't support altivec. */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } +#endif + printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); +} + +extern perf_irq_t perf_irq; + +void performance_monitor_exception(struct pt_regs *regs) +{ + perf_irq(regs); +} + +void alignment_exception(struct pt_regs *regs) +{ + int fixed; + + fixed = fix_alignment(regs); + + if (fixed == 1) { + regs->nip += 4; /* skip over emulated instruction */ + emulate_single_step(regs); + return; + } + + /* Operand address was bad */ + if (fixed == -EFAULT) { + if (user_mode(regs)) { + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->dar); + } else { + /* Search exception table */ + bad_page_fault(regs, regs->dar, SIGSEGV); + } + + return; + } + + _exception(SIGBUS, regs, BUS_ADRALN, regs->nip); +} + +#ifdef CONFIG_ALTIVEC +void altivec_assist_exception(struct pt_regs *regs) +{ + int err; + siginfo_t info; + + if (!user_mode(regs)) { + printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" + " at %lx\n", regs->nip); + die("Kernel VMX/Altivec assist exception", regs, SIGILL); + } + + flush_altivec_to_thread(current); + + err = emulate_altivec(regs); + if (err == 0) { + regs->nip += 4; /* skip emulated instruction */ + emulate_single_step(regs); + return; + } + + if (err == -EFAULT) { + /* got an error reading the instruction */ + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *) regs->nip; + force_sig_info(SIGSEGV, &info, current); + } else { + /* didn't recognize the instruction */ + /* XXX quick hack for now: set the non-Java bit in the VSCR */ + if (printk_ratelimit()) + printk(KERN_ERR "Unrecognized altivec instruction " + "in %s at %lx\n", current->comm, regs->nip); + current->thread.vscr.u[3] |= 0x10000; + } +} +#endif /* CONFIG_ALTIVEC */ + +/* + * We enter here if we get an unrecoverable exception, that is, one + * that happened at a point where the RI (recoverable interrupt) bit + * in the MSR is 0. This indicates that SRR0/1 are live, and that + * we therefore lost state by taking this exception. + */ +void unrecoverable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", + regs->trap, regs->nip); + die("Unrecoverable exception", regs, SIGABRT); +} + +/* + * We enter here if we discover during exception entry that we are + * running in supervisor mode with a userspace value in the stack pointer. + */ +void kernel_bad_stack(struct pt_regs *regs) +{ + printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", + regs->gpr[1], regs->nip); + die("Bad kernel stack pointer", regs, SIGABRT); +} + +void __init trap_init(void) +{ +} diff --git a/arch/ppc64/kernel/u3_iommu.c b/arch/ppc64/kernel/u3_iommu.c new file mode 100644 index 00000000000..b6e3bca4102 --- /dev/null +++ b/arch/ppc64/kernel/u3_iommu.c @@ -0,0 +1,349 @@ +/* + * arch/ppc64/kernel/u3_iommu.c + * + * Copyright (C) 2004 Olof Johansson , IBM Corporation + * + * Based on pSeries_iommu.c: + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * Copyright (C) 2004 Olof Johansson , IBM Corporation + * + * Dynamic DMA mapping support, Apple U3 & IBM CPC925 "DART" iommu. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pci.h" + +extern int iommu_force_on; + +/* physical base of DART registers */ +#define DART_BASE 0xf8033000UL + +/* Offset from base to control register */ +#define DARTCNTL 0 +/* Offset from base to exception register */ +#define DARTEXCP 0x10 +/* Offset from base to TLB tag registers */ +#define DARTTAG 0x1000 + + +/* Control Register fields */ + +/* base address of table (pfn) */ +#define DARTCNTL_BASE_MASK 0xfffff +#define DARTCNTL_BASE_SHIFT 12 + +#define DARTCNTL_FLUSHTLB 0x400 +#define DARTCNTL_ENABLE 0x200 + +/* size of table in pages */ +#define DARTCNTL_SIZE_MASK 0x1ff +#define DARTCNTL_SIZE_SHIFT 0 + +/* DART table fields */ +#define DARTMAP_VALID 0x80000000 +#define DARTMAP_RPNMASK 0x00ffffff + +/* Physical base address and size of the DART table */ +unsigned long dart_tablebase; /* exported to htab_initialize */ +static unsigned long dart_tablesize; + +/* Virtual base address of the DART table */ +static u32 *dart_vbase; + +/* Mapped base address for the dart */ +static unsigned int *dart; + +/* Dummy val that entries are set to when unused */ +static unsigned int dart_emptyval; + +static struct iommu_table iommu_table_u3; +static int iommu_table_u3_inited; +static int dart_dirty; + +#define DBG(...) + +static inline void dart_tlb_invalidate_all(void) +{ + unsigned long l = 0; + unsigned int reg; + unsigned long limit; + + DBG("dart: flush\n"); + + /* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the + * control register and wait for it to clear. + * + * Gotcha: Sometimes, the DART won't detect that the bit gets + * set. If so, clear it and set it again. + */ + + limit = 0; + +retry: + reg = in_be32((unsigned int *)dart+DARTCNTL); + reg |= DARTCNTL_FLUSHTLB; + out_be32((unsigned int *)dart+DARTCNTL, reg); + + l = 0; + while ((in_be32((unsigned int *)dart+DARTCNTL) & DARTCNTL_FLUSHTLB) && + l < (1L<it_base) + index; + + /* On U3, all memory is contigous, so we can move this + * out of the loop. + */ + while (npages--) { + rpn = virt_to_abs(uaddr) >> PAGE_SHIFT; + + *(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK); + + rpn++; + uaddr += PAGE_SIZE; + } + + dart_dirty = 1; +} + + +static void dart_free(struct iommu_table *tbl, long index, long npages) +{ + unsigned int *dp; + + /* We don't worry about flushing the TLB cache. The only drawback of + * not doing it is that we won't catch buggy device drivers doing + * bad DMAs, but then no 32-bit architecture ever does either. + */ + + DBG("dart: free at: %lx, %lx\n", index, npages); + + dp = ((unsigned int *)tbl->it_base) + index; + + while (npages--) + *(dp++) = dart_emptyval; +} + + +static int dart_init(struct device_node *dart_node) +{ + unsigned int regword; + unsigned int i; + unsigned long tmp; + + if (dart_tablebase == 0 || dart_tablesize == 0) { + printk(KERN_INFO "U3-DART: table not allocated, using direct DMA\n"); + return -ENODEV; + } + + /* Make sure nothing from the DART range remains in the CPU cache + * from a previous mapping that existed before the kernel took + * over + */ + flush_dcache_phys_range(dart_tablebase, dart_tablebase + dart_tablesize); + + /* Allocate a spare page to map all invalid DART pages. We need to do + * that to work around what looks like a problem with the HT bridge + * prefetching into invalid pages and corrupting data + */ + tmp = lmb_alloc(PAGE_SIZE, PAGE_SIZE); + if (!tmp) + panic("U3-DART: Cannot allocate spare page!"); + dart_emptyval = DARTMAP_VALID | ((tmp >> PAGE_SHIFT) & DARTMAP_RPNMASK); + + /* Map in DART registers. FIXME: Use device node to get base address */ + dart = ioremap(DART_BASE, 0x7000); + if (dart == NULL) + panic("U3-DART: Cannot map registers!"); + + /* Set initial control register contents: table base, + * table size and enable bit + */ + regword = DARTCNTL_ENABLE | + ((dart_tablebase >> PAGE_SHIFT) << DARTCNTL_BASE_SHIFT) | + (((dart_tablesize >> PAGE_SHIFT) & DARTCNTL_SIZE_MASK) + << DARTCNTL_SIZE_SHIFT); + dart_vbase = ioremap(virt_to_abs(dart_tablebase), dart_tablesize); + + /* Fill initial table */ + for (i = 0; i < dart_tablesize/4; i++) + dart_vbase[i] = dart_emptyval; + + /* Initialize DART with table base and enable it. */ + out_be32((unsigned int *)dart, regword); + + /* Invalidate DART to get rid of possible stale TLBs */ + dart_tlb_invalidate_all(); + + printk(KERN_INFO "U3/CPC925 DART IOMMU initialized\n"); + + return 0; +} + +static void iommu_table_u3_setup(void) +{ + iommu_table_u3.it_busno = 0; + iommu_table_u3.it_offset = 0; + /* it_size is in number of entries */ + iommu_table_u3.it_size = dart_tablesize / sizeof(u32); + + /* Initialize the common IOMMU code */ + iommu_table_u3.it_base = (unsigned long)dart_vbase; + iommu_table_u3.it_index = 0; + iommu_table_u3.it_blocksize = 1; + iommu_init_table(&iommu_table_u3); + + /* Reserve the last page of the DART to avoid possible prefetch + * past the DART mapped area + */ + set_bit(iommu_table_u3.it_size - 1, iommu_table_u3.it_map); +} + +static void iommu_dev_setup_u3(struct pci_dev *dev) +{ + struct device_node *dn; + + /* We only have one iommu table on the mac for now, which makes + * things simple. Setup all PCI devices to point to this table + * + * We must use pci_device_to_OF_node() to make sure that + * we get the real "final" pointer to the device in the + * pci_dev sysdata and not the temporary PHB one + */ + dn = pci_device_to_OF_node(dev); + + if (dn) + dn->iommu_table = &iommu_table_u3; +} + +static void iommu_bus_setup_u3(struct pci_bus *bus) +{ + struct device_node *dn; + + if (!iommu_table_u3_inited) { + iommu_table_u3_inited = 1; + iommu_table_u3_setup(); + } + + dn = pci_bus_to_OF_node(bus); + + if (dn) + dn->iommu_table = &iommu_table_u3; +} + +static void iommu_dev_setup_null(struct pci_dev *dev) { } +static void iommu_bus_setup_null(struct pci_bus *bus) { } + +void iommu_init_early_u3(void) +{ + struct device_node *dn; + + /* Find the DART in the device-tree */ + dn = of_find_compatible_node(NULL, "dart", "u3-dart"); + if (dn == NULL) + return; + + /* Setup low level TCE operations for the core IOMMU code */ + ppc_md.tce_build = dart_build; + ppc_md.tce_free = dart_free; + ppc_md.tce_flush = dart_flush; + + /* Initialize the DART HW */ + if (dart_init(dn)) { + /* If init failed, use direct iommu and null setup functions */ + ppc_md.iommu_dev_setup = iommu_dev_setup_null; + ppc_md.iommu_bus_setup = iommu_bus_setup_null; + + /* Setup pci_dma ops */ + pci_direct_iommu_init(); + } else { + ppc_md.iommu_dev_setup = iommu_dev_setup_u3; + ppc_md.iommu_bus_setup = iommu_bus_setup_u3; + + /* Setup pci_dma ops */ + pci_iommu_init(); + } +} + + +void __init alloc_u3_dart_table(void) +{ + /* Only reserve DART space if machine has more than 2GB of RAM + * or if requested with iommu=on on cmdline. + */ + if (lmb_end_of_DRAM() <= 0x80000000ull && !iommu_force_on) + return; + + /* 512 pages (2MB) is max DART tablesize. */ + dart_tablesize = 1UL << 21; + /* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we + * will blow up an entire large page anyway in the kernel mapping + */ + dart_tablebase = (unsigned long) + abs_to_virt(lmb_alloc_base(1UL<<24, 1UL<<24, 0x80000000L)); + + printk(KERN_INFO "U3-DART allocated at: %lx\n", dart_tablebase); +} diff --git a/arch/ppc64/kernel/udbg.c b/arch/ppc64/kernel/udbg.c new file mode 100644 index 00000000000..d4ccd6f1ef4 --- /dev/null +++ b/arch/ppc64/kernel/udbg.c @@ -0,0 +1,360 @@ +/* + * NS16550 Serial Port (uart) debugging stuff. + * + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#define WANT_PPCDBG_TAB /* Only defined here */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern u8 real_readb(volatile u8 __iomem *addr); +extern void real_writeb(u8 data, volatile u8 __iomem *addr); + +struct NS16550 { + /* this struct must be packed */ + unsigned char rbr; /* 0 */ + unsigned char ier; /* 1 */ + unsigned char fcr; /* 2 */ + unsigned char lcr; /* 3 */ + unsigned char mcr; /* 4 */ + unsigned char lsr; /* 5 */ + unsigned char msr; /* 6 */ + unsigned char scr; /* 7 */ +}; + +#define thr rbr +#define iir fcr +#define dll rbr +#define dlm ier +#define dlab lcr + +#define LSR_DR 0x01 /* Data ready */ +#define LSR_OE 0x02 /* Overrun */ +#define LSR_PE 0x04 /* Parity error */ +#define LSR_FE 0x08 /* Framing error */ +#define LSR_BI 0x10 /* Break */ +#define LSR_THRE 0x20 /* Xmit holding register empty */ +#define LSR_TEMT 0x40 /* Xmitter empty */ +#define LSR_ERR 0x80 /* Error */ + +static volatile struct NS16550 __iomem *udbg_comport; + +void udbg_init_uart(void __iomem *comport, unsigned int speed) +{ + u16 dll = speed ? (115200 / speed) : 12; + + if (comport) { + udbg_comport = (struct NS16550 __iomem *)comport; + out_8(&udbg_comport->lcr, 0x00); + out_8(&udbg_comport->ier, 0xff); + out_8(&udbg_comport->ier, 0x00); + out_8(&udbg_comport->lcr, 0x80); /* Access baud rate */ + out_8(&udbg_comport->dll, dll & 0xff); /* 1 = 115200, 2 = 57600, + 3 = 38400, 12 = 9600 baud */ + out_8(&udbg_comport->dlm, dll >> 8); /* dll >> 8 which should be zero + for fast rates; */ + out_8(&udbg_comport->lcr, 0x03); /* 8 data, 1 stop, no parity */ + out_8(&udbg_comport->mcr, 0x03); /* RTS/DTR */ + out_8(&udbg_comport->fcr ,0x07); /* Clear & enable FIFOs */ + } +} + +#ifdef CONFIG_PPC_PMAC + +#define SCC_TXRDY 4 +#define SCC_RXRDY 1 + +static volatile u8 __iomem *sccc; +static volatile u8 __iomem *sccd; + +static unsigned char scc_inittab[] = { + 13, 0, /* set baud rate divisor */ + 12, 0, + 14, 1, /* baud rate gen enable, src=rtxc */ + 11, 0x50, /* clocks = br gen */ + 5, 0xea, /* tx 8 bits, assert DTR & RTS */ + 4, 0x46, /* x16 clock, 1 stop */ + 3, 0xc1, /* rx enable, 8 bits */ +}; + +void udbg_init_scc(struct device_node *np) +{ + u32 *reg; + unsigned long addr; + int i, x; + + if (np == NULL) + np = of_find_node_by_name(NULL, "escc"); + if (np == NULL || np->parent == NULL) + return; + + udbg_printf("found SCC...\n"); + /* Get address within mac-io ASIC */ + reg = (u32 *)get_property(np, "reg", NULL); + if (reg == NULL) + return; + addr = reg[0]; + udbg_printf("local addr: %lx\n", addr); + /* Get address of mac-io PCI itself */ + reg = (u32 *)get_property(np->parent, "assigned-addresses", NULL); + if (reg == NULL) + return; + addr += reg[2]; + udbg_printf("final addr: %lx\n", addr); + + /* Setup for 57600 8N1 */ + addr += 0x20; + sccc = (volatile u8 * __iomem) ioremap(addr & PAGE_MASK, PAGE_SIZE) ; + sccc += addr & ~PAGE_MASK; + sccd = sccc + 0x10; + + udbg_printf("ioremap result sccc: %p\n", sccc); + mb(); + + for (i = 20000; i != 0; --i) + x = in_8(sccc); + out_8(sccc, 0x09); /* reset A or B side */ + out_8(sccc, 0xc0); + for (i = 0; i < sizeof(scc_inittab); ++i) + out_8(sccc, scc_inittab[i]); + + ppc_md.udbg_putc = udbg_putc; + ppc_md.udbg_getc = udbg_getc; + ppc_md.udbg_getc_poll = udbg_getc_poll; + + udbg_puts("Hello World !\n"); +} + +#endif /* CONFIG_PPC_PMAC */ + +#if CONFIG_PPC_PMAC +static void udbg_real_putc(unsigned char c) +{ + while ((real_readb(sccc) & SCC_TXRDY) == 0) + ; + real_writeb(c, sccd); + if (c == '\n') + udbg_real_putc('\r'); +} + +void udbg_init_pmac_realmode(void) +{ + sccc = (volatile u8 __iomem *)0x80013020ul; + sccd = (volatile u8 __iomem *)0x80013030ul; + + ppc_md.udbg_putc = udbg_real_putc; + ppc_md.udbg_getc = NULL; + ppc_md.udbg_getc_poll = NULL; +} +#endif /* CONFIG_PPC_PMAC */ + +#ifdef CONFIG_PPC_MAPLE +void udbg_maple_real_putc(unsigned char c) +{ + if (udbg_comport) { + while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + real_writeb(c, &udbg_comport->thr); eieio(); + if (c == '\n') { + /* Also put a CR. This is for convenience. */ + while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + real_writeb('\r', &udbg_comport->thr); eieio(); + } + } +} + +void udbg_init_maple_realmode(void) +{ + udbg_comport = (volatile struct NS16550 __iomem *)0xf40003f8; + + ppc_md.udbg_putc = udbg_maple_real_putc; + ppc_md.udbg_getc = NULL; + ppc_md.udbg_getc_poll = NULL; +} +#endif /* CONFIG_PPC_MAPLE */ + +void udbg_putc(unsigned char c) +{ + if (udbg_comport) { + while ((in_8(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + out_8(&udbg_comport->thr, c); + if (c == '\n') { + /* Also put a CR. This is for convenience. */ + while ((in_8(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + out_8(&udbg_comport->thr, '\r'); + } + } +#ifdef CONFIG_PPC_PMAC + else if (sccc) { + while ((in_8(sccc) & SCC_TXRDY) == 0) + ; + out_8(sccd, c); + if (c == '\n') + udbg_putc('\r'); + } +#endif /* CONFIG_PPC_PMAC */ +} + +int udbg_getc_poll(void) +{ + if (udbg_comport) { + if ((in_8(&udbg_comport->lsr) & LSR_DR) != 0) + return in_8(&udbg_comport->rbr); + else + return -1; + } +#ifdef CONFIG_PPC_PMAC + else if (sccc) { + if ((in_8(sccc) & SCC_RXRDY) != 0) + return in_8(sccd); + else + return -1; + } +#endif /* CONFIG_PPC_PMAC */ + return -1; +} + +unsigned char udbg_getc(void) +{ + if (udbg_comport) { + while ((in_8(&udbg_comport->lsr) & LSR_DR) == 0) + /* wait for char */; + return in_8(&udbg_comport->rbr); + } +#ifdef CONFIG_PPC_PMAC + else if (sccc) { + while ((in_8(sccc) & SCC_RXRDY) == 0) + ; + return in_8(sccd); + } +#endif /* CONFIG_PPC_PMAC */ + return 0; +} + +void udbg_puts(const char *s) +{ + if (ppc_md.udbg_putc) { + char c; + + if (s && *s != '\0') { + while ((c = *s++) != '\0') + ppc_md.udbg_putc(c); + } + } +#if 0 + else { + printk("%s", s); + } +#endif +} + +int udbg_write(const char *s, int n) +{ + int remain = n; + char c; + + if (!ppc_md.udbg_putc) + return 0; + + if (s && *s != '\0') { + while (((c = *s++) != '\0') && (remain-- > 0)) { + ppc_md.udbg_putc(c); + } + } + + return n - remain; +} + +int udbg_read(char *buf, int buflen) +{ + char c, *p = buf; + int i; + + if (!ppc_md.udbg_getc) + return 0; + + for (i = 0; i < buflen; ++i) { + do { + c = ppc_md.udbg_getc(); + } while (c == 0x11 || c == 0x13); + if (c == 0) + break; + *p++ = c; + } + + return i; +} + +void udbg_console_write(struct console *con, const char *s, unsigned int n) +{ + udbg_write(s, n); +} + +#define UDBG_BUFSIZE 256 +void udbg_printf(const char *fmt, ...) +{ + unsigned char buf[UDBG_BUFSIZE]; + va_list args; + + va_start(args, fmt); + vsnprintf(buf, UDBG_BUFSIZE, fmt, args); + udbg_puts(buf); + va_end(args); +} + +/* Special print used by PPCDBG() macro */ +void udbg_ppcdbg(unsigned long debug_flags, const char *fmt, ...) +{ + unsigned long active_debugs = debug_flags & ppc64_debug_switch; + + if (active_debugs) { + va_list ap; + unsigned char buf[UDBG_BUFSIZE]; + unsigned long i, len = 0; + + for (i=0; i < PPCDBG_NUM_FLAGS; i++) { + if (((1U << i) & active_debugs) && + trace_names[i]) { + len += strlen(trace_names[i]); + udbg_puts(trace_names[i]); + break; + } + } + + snprintf(buf, UDBG_BUFSIZE, " [%s]: ", current->comm); + len += strlen(buf); + udbg_puts(buf); + + while (len < 18) { + udbg_puts(" "); + len++; + } + + va_start(ap, fmt); + vsnprintf(buf, UDBG_BUFSIZE, fmt, ap); + udbg_puts(buf); + va_end(ap); + } +} + +unsigned long udbg_ifdebug(unsigned long flags) +{ + return (flags & ppc64_debug_switch); +} diff --git a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c new file mode 100644 index 00000000000..8c4597224b7 --- /dev/null +++ b/arch/ppc64/kernel/vdso.c @@ -0,0 +1,614 @@ +/* + * linux/arch/ppc64/kernel/vdso.c + * + * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + + +/* + * The vDSOs themselves are here + */ +extern char vdso64_start, vdso64_end; +extern char vdso32_start, vdso32_end; + +static void *vdso64_kbase = &vdso64_start; +static void *vdso32_kbase = &vdso32_start; + +unsigned int vdso64_pages; +unsigned int vdso32_pages; + +/* Signal trampolines user addresses */ + +unsigned long vdso64_rt_sigtramp; +unsigned long vdso32_sigtramp; +unsigned long vdso32_rt_sigtramp; + +/* Format of the patch table */ +struct vdso_patch_def +{ + u32 pvr_mask, pvr_value; + const char *gen_name; + const char *fix_name; +}; + +/* Table of functions to patch based on the CPU type/revision + * + * TODO: Improve by adding whole lists for each entry + */ +static struct vdso_patch_def vdso_patches[] = { + { + 0xffff0000, 0x003a0000, /* POWER5 */ + "__kernel_sync_dicache", "__kernel_sync_dicache_p5" + }, + { + 0xffff0000, 0x003b0000, /* POWER5 */ + "__kernel_sync_dicache", "__kernel_sync_dicache_p5" + }, +}; + +/* + * Some infos carried around for each of them during parsing at + * boot time. + */ +struct lib32_elfinfo +{ + Elf32_Ehdr *hdr; /* ptr to ELF */ + Elf32_Sym *dynsym; /* ptr to .dynsym section */ + unsigned long dynsymsize; /* size of .dynsym section */ + char *dynstr; /* ptr to .dynstr section */ + unsigned long text; /* offset of .text section in .so */ +}; + +struct lib64_elfinfo +{ + Elf64_Ehdr *hdr; + Elf64_Sym *dynsym; + unsigned long dynsymsize; + char *dynstr; + unsigned long text; +}; + + +#ifdef __DEBUG +static void dump_one_vdso_page(struct page *pg, struct page *upg) +{ + printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT), + page_count(pg), + pg->flags); + if (upg/* && pg != upg*/) { + printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) << PAGE_SHIFT), + page_count(upg), + upg->flags); + } + printk("\n"); +} + +static void dump_vdso_pages(struct vm_area_struct * vma) +{ + int i; + + if (!vma || test_thread_flag(TIF_32BIT)) { + printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase); + for (i=0; ivm_mm) ? + follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0) + : NULL; + dump_one_vdso_page(pg, upg); + } + } + if (!vma || !test_thread_flag(TIF_32BIT)) { + printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase); + for (i=0; ivm_mm) ? + follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0) + : NULL; + dump_one_vdso_page(pg, upg); + } + } +} +#endif /* DEBUG */ + +/* + * Keep a dummy vma_close for now, it will prevent VMA merging. + */ +static void vdso_vma_close(struct vm_area_struct * vma) +{ +} + +/* + * Our nopage() function, maps in the actual vDSO kernel pages, they will + * be mapped read-only by do_no_page(), and eventually COW'ed, either + * right away for an initial write access, or by do_wp_page(). + */ +static struct page * vdso_vma_nopage(struct vm_area_struct * vma, + unsigned long address, int *type) +{ + unsigned long offset = address - vma->vm_start; + struct page *pg; + void *vbase = test_thread_flag(TIF_32BIT) ? vdso32_kbase : vdso64_kbase; + + DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n", + current->comm, address, offset); + + if (address < vma->vm_start || address > vma->vm_end) + return NOPAGE_SIGBUS; + + /* + * Last page is systemcfg, special handling here, no get_page() a + * this is a reserved page + */ + if ((vma->vm_end - address) <= PAGE_SIZE) + return virt_to_page(systemcfg); + + pg = virt_to_page(vbase + offset); + get_page(pg); + DBG(" ->page count: %d\n", page_count(pg)); + + return pg; +} + +static struct vm_operations_struct vdso_vmops = { + .close = vdso_vma_close, + .nopage = vdso_vma_nopage, +}; + +/* + * This is called from binfmt_elf, we create the special vma for the + * vDSO and insert it into the mm struct tree + */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long vdso_pages; + unsigned long vdso_base; + + if (test_thread_flag(TIF_32BIT)) { + vdso_pages = vdso32_pages; + vdso_base = VDSO32_MBASE; + } else { + vdso_pages = vdso64_pages; + vdso_base = VDSO64_MBASE; + } + + /* vDSO has a problem and was disabled, just don't "enable" it for the + * process + */ + if (vdso_pages == 0) { + current->thread.vdso_base = 0; + return 0; + } + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma == NULL) + return -ENOMEM; + if (security_vm_enough_memory(vdso_pages)) { + kmem_cache_free(vm_area_cachep, vma); + return -ENOMEM; + } + memset(vma, 0, sizeof(*vma)); + + /* + * pick a base address for the vDSO in process space. We have a default + * base of 1Mb on which we had a random offset up to 1Mb. + * XXX: Add possibility for a program header to specify that location + */ + current->thread.vdso_base = vdso_base; + /* + ((unsigned long)vma & 0x000ff000); */ + + vma->vm_mm = mm; + vma->vm_start = current->thread.vdso_base; + + /* + * the VMA size is one page more than the vDSO since systemcfg + * is mapped in the last one + */ + vma->vm_end = vma->vm_start + ((vdso_pages + 1) << PAGE_SHIFT); + + /* + * our vma flags don't have VM_WRITE so by default, the process isn't allowed + * to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW on those + * pages but it's then your responsibility to never do that on the "data" page + * of the vDSO or you'll stop getting kernel updates and your nice userland + * gettimeofday will be totally dead. It's fine to use that for setting + * breakpoints in the vDSO code pages though + */ + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + vma->vm_flags |= mm->def_flags; + vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; + vma->vm_ops = &vdso_vmops; + + down_write(&mm->mmap_sem); + insert_vm_struct(mm, vma); + mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + up_write(&mm->mmap_sem); + + return 0; +} + +static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, + unsigned long *size) +{ + Elf32_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + *size = 0; + return NULL; +} + +static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, + unsigned long *size) +{ + Elf64_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + if (size) + *size = 0; + return NULL; +} + +static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, const char *symname) +{ + unsigned int i; + char name[32], *c; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { + if (lib->dynsym[i].st_name == 0) + continue; + strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, 32); + c = strchr(name, '@'); + if (c) + *c = 0; + if (strcmp(symname, name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, const char *symname) +{ + unsigned int i; + char name[32], *c; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) { + if (lib->dynsym[i].st_name == 0) + continue; + strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, 32); + c = strchr(name, '@'); + if (c) + *c = 0; + if (strcmp(symname, name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +/* Note that we assume the section is .text and the symbol is relative to + * the library base + */ +static unsigned long __init find_function32(struct lib32_elfinfo *lib, const char *symname) +{ + Elf32_Sym *sym = find_symbol32(lib, symname); + + if (sym == NULL) { + printk(KERN_WARNING "vDSO32: function %s not found !\n", symname); + return 0; + } + return sym->st_value - VDSO32_LBASE; +} + +/* Note that we assume the section is .text and the symbol is relative to + * the library base + */ +static unsigned long __init find_function64(struct lib64_elfinfo *lib, const char *symname) +{ + Elf64_Sym *sym = find_symbol64(lib, symname); + + if (sym == NULL) { + printk(KERN_WARNING "vDSO64: function %s not found !\n", symname); + return 0; + } +#ifdef VDS64_HAS_DESCRIPTORS + return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) - VDSO64_LBASE; +#else + return sym->st_value - VDSO64_LBASE; +#endif +} + + +static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + void *sect; + + /* + * Locate symbol tables & text section + */ + + v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize); + v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL); + if (v32->dynsym == NULL || v32->dynstr == NULL) { + printk(KERN_ERR "vDSO32: a required symbol section was not found\n"); + return -1; + } + sect = find_section32(v32->hdr, ".text", NULL); + if (sect == NULL) { + printk(KERN_ERR "vDSO32: the .text section was not found\n"); + return -1; + } + v32->text = sect - vdso32_kbase; + + v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize); + v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL); + if (v64->dynsym == NULL || v64->dynstr == NULL) { + printk(KERN_ERR "vDSO64: a required symbol section was not found\n"); + return -1; + } + sect = find_section64(v64->hdr, ".text", NULL); + if (sect == NULL) { + printk(KERN_ERR "vDSO64: the .text section was not found\n"); + return -1; + } + v64->text = sect - vdso64_kbase; + + return 0; +} + +static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + /* + * Find signal trampolines + */ + + vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64"); + vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32"); + vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32"); +} + +static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + Elf32_Sym *sym32; + Elf64_Sym *sym64; + + sym32 = find_symbol32(v32, "__kernel_datapage_offset"); + if (sym32 == NULL) { + printk(KERN_ERR "vDSO32: Can't find symbol __kernel_datapage_offset !\n"); + return -1; + } + *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) = + (vdso32_pages << PAGE_SHIFT) - (sym32->st_value - VDSO32_LBASE); + + sym64 = find_symbol64(v64, "__kernel_datapage_offset"); + if (sym64 == NULL) { + printk(KERN_ERR "vDSO64: Can't find symbol __kernel_datapage_offset !\n"); + return -1; + } + *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) = + (vdso64_pages << PAGE_SHIFT) - (sym64->st_value - VDSO64_LBASE); + + return 0; +} + +static int vdso_do_func_patch32(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64, + const char *orig, const char *fix) +{ + Elf32_Sym *sym32_gen, *sym32_fix; + + sym32_gen = find_symbol32(v32, orig); + if (sym32_gen == NULL) { + printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig); + return -1; + } + sym32_fix = find_symbol32(v32, fix); + if (sym32_fix == NULL) { + printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix); + return -1; + } + sym32_gen->st_value = sym32_fix->st_value; + sym32_gen->st_size = sym32_fix->st_size; + sym32_gen->st_info = sym32_fix->st_info; + sym32_gen->st_other = sym32_fix->st_other; + sym32_gen->st_shndx = sym32_fix->st_shndx; + + return 0; +} + +static int vdso_do_func_patch64(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64, + const char *orig, const char *fix) +{ + Elf64_Sym *sym64_gen, *sym64_fix; + + sym64_gen = find_symbol64(v64, orig); + if (sym64_gen == NULL) { + printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig); + return -1; + } + sym64_fix = find_symbol64(v64, fix); + if (sym64_fix == NULL) { + printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix); + return -1; + } + sym64_gen->st_value = sym64_fix->st_value; + sym64_gen->st_size = sym64_fix->st_size; + sym64_gen->st_info = sym64_fix->st_info; + sym64_gen->st_other = sym64_fix->st_other; + sym64_gen->st_shndx = sym64_fix->st_shndx; + + return 0; +} + +static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + u32 pvr; + int i; + + pvr = mfspr(SPRN_PVR); + for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) { + struct vdso_patch_def *patch = &vdso_patches[i]; + int match = (pvr & patch->pvr_mask) == patch->pvr_value; + + DBG("patch %d (mask: %x, pvr: %x) : %s\n", + i, patch->pvr_mask, patch->pvr_value, match ? "match" : "skip"); + + if (!match) + continue; + + DBG("replacing %s with %s...\n", patch->gen_name, patch->fix_name); + + /* + * Patch the 32 bits and 64 bits symbols. Note that we do not patch + * the "." symbol on 64 bits. It would be easy to do, but doesn't + * seem to be necessary, patching the OPD symbol is enough. + */ + vdso_do_func_patch32(v32, v64, patch->gen_name, patch->fix_name); + vdso_do_func_patch64(v32, v64, patch->gen_name, patch->fix_name); + } + + return 0; +} + + +static __init int vdso_setup(void) +{ + struct lib32_elfinfo v32; + struct lib64_elfinfo v64; + + v32.hdr = vdso32_kbase; + v64.hdr = vdso64_kbase; + + if (vdso_do_find_sections(&v32, &v64)) + return -1; + + if (vdso_fixup_datapage(&v32, &v64)) + return -1; + + if (vdso_fixup_alt_funcs(&v32, &v64)) + return -1; + + vdso_setup_trampolines(&v32, &v64); + + return 0; +} + +void __init vdso_init(void) +{ + int i; + + vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; + vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; + + DBG("vdso64_kbase: %p, 0x%x pages, vdso32_kbase: %p, 0x%x pages\n", + vdso64_kbase, vdso64_pages, vdso32_kbase, vdso32_pages); + + /* + * Initialize the vDSO images in memory, that is do necessary + * fixups of vDSO symbols, locate trampolines, etc... + */ + if (vdso_setup()) { + printk(KERN_ERR "vDSO setup failure, not enabled !\n"); + /* XXX should free pages here ? */ + vdso64_pages = vdso32_pages = 0; + return; + } + + /* Make sure pages are in the correct state */ + for (i = 0; i < vdso64_pages; i++) { + struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + } + for (i = 0; i < vdso32_pages; i++) { + struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + } +} + +int in_gate_area_no_task(unsigned long addr) +{ + return 0; +} + +int in_gate_area(struct task_struct *task, unsigned long addr) +{ + return 0; +} + +struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +{ + return NULL; +} + diff --git a/arch/ppc64/kernel/vdso32/Makefile b/arch/ppc64/kernel/vdso32/Makefile new file mode 100644 index 00000000000..ede2f7e477c --- /dev/null +++ b/arch/ppc64/kernel/vdso32/Makefile @@ -0,0 +1,36 @@ + +# List of files in the vdso, has to be asm only for now + +obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o + +# Build rules + +targets := $(obj-vdso32) vdso32.so +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + + +EXTRA_CFLAGS := -shared -s -fno-common -fno-builtin +EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1 +EXTRA_AFLAGS := -D__VDSO32__ -s + +obj-y += vdso32_wrapper.o +extra-y += vdso32.lds +CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) + $(call if_changed,vdso32ld) + +# assembly rules for the .S files +$(obj-vdso32): %.o: %.S + $(call if_changed_dep,vdso32as) + +# actual build commands +quiet_cmd_vdso32ld = VDSO32L $@ + cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< + diff --git a/arch/ppc64/kernel/vdso32/cacheflush.S b/arch/ppc64/kernel/vdso32/cacheflush.S new file mode 100644 index 00000000000..c74fddb6afd --- /dev/null +++ b/arch/ppc64/kernel/vdso32/cacheflush.S @@ -0,0 +1,65 @@ +/* + * vDSO provided cache flush routines + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include + + .text + +/* + * Default "generic" version of __kernel_sync_dicache. + * + * void __kernel_sync_dicache(unsigned long start, unsigned long end) + * + * Flushes the data cache & invalidate the instruction cache for the + * provided range [start, end[ + * + * Note: all CPUs supported by this kernel have a 128 bytes cache + * line size so we don't have to peek that info from the datapage + */ +V_FUNCTION_BEGIN(__kernel_sync_dicache) + .cfi_startproc + li r5,127 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + srwi. r8,r8,7 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 + mr r3,r6 +1: dcbst 0,r3 + addi r3,r3,128 + bdnz 1b + sync + mtctr r8 +1: icbi 0,r6 + addi r6,r6,128 + bdnz 1b + isync + blr + .cfi_endproc +V_FUNCTION_END(__kernel_sync_dicache) + + +/* + * POWER5 version of __kernel_sync_dicache + */ +V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) + .cfi_startproc + sync + isync + blr + .cfi_endproc +V_FUNCTION_END(__kernel_sync_dicache_p5) + diff --git a/arch/ppc64/kernel/vdso32/datapage.S b/arch/ppc64/kernel/vdso32/datapage.S new file mode 100644 index 00000000000..29b6bd32e1f --- /dev/null +++ b/arch/ppc64/kernel/vdso32/datapage.S @@ -0,0 +1,68 @@ +/* + * Access to the shared data page by the vDSO & syscall map + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + + .text +V_FUNCTION_BEGIN(__get_datapage) + .cfi_startproc + /* We don't want that exposed or overridable as we want other objects + * to be able to bl directly to here + */ + .protected __get_datapage + .hidden __get_datapage + + mflr r0 + .cfi_register lr,r0 + + bcl 20,31,1f + .global __kernel_datapage_offset; +__kernel_datapage_offset: + .long 0 +1: + mflr r3 + mtlr r0 + lwz r0,0(r3) + add r3,r0,r3 + blr + .cfi_endproc +V_FUNCTION_END(__get_datapage) + +/* + * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; + * + * returns a pointer to the syscall map. the map is agnostic to the + * size of "long", unlike kernel bitops, it stores bits from top to + * bottom so that memory actually contains a linear bitmap + * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of + * 32 bits int at N >> 5. + */ +V_FUNCTION_BEGIN(__kernel_get_syscall_map) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + + mr r4,r3 + bl __get_datapage@local + mtlr r12 + addi r3,r3,CFG_SYSCALL_MAP32 + cmpli cr0,r4,0 + beqlr + li r0,__NR_syscalls + stw r0,0(r4) + blr + .cfi_endproc +V_FUNCTION_END(__kernel_get_syscall_map) diff --git a/arch/ppc64/kernel/vdso32/gettimeofday.S b/arch/ppc64/kernel/vdso32/gettimeofday.S new file mode 100644 index 00000000000..ca7f415195c --- /dev/null +++ b/arch/ppc64/kernel/vdso32/gettimeofday.S @@ -0,0 +1,139 @@ +/* + * Userland implementation of gettimeofday() for 32 bits processes in a + * ppc64 kernel for use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include + + .text +/* + * Exact prototype of gettimeofday + * + * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); + * + */ +V_FUNCTION_BEGIN(__kernel_gettimeofday) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + + mr r10,r3 /* r10 saves tv */ + mr r11,r4 /* r11 saves tz */ + bl __get_datapage@local /* get data page */ + mr r9, r3 /* datapage ptr in r9 */ + bl __do_get_xsec@local /* get xsec from tb & kernel */ + bne- 2f /* out of line -> do syscall */ + + /* seconds are xsec >> 20 */ + rlwinm r5,r4,12,20,31 + rlwimi r5,r3,12,0,19 + stw r5,TVAL32_TV_SEC(r10) + + /* get remaining xsec and convert to usec. we scale + * up remaining xsec by 12 bits and get the top 32 bits + * of the multiplication + */ + rlwinm r5,r4,12,0,19 + lis r6,1000000@h + ori r6,r6,1000000@l + mulhwu r5,r5,r6 + stw r5,TVAL32_TV_USEC(r10) + + cmpli cr0,r11,0 /* check if tz is NULL */ + beq 1f + lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */ + lwz r5,CFG_TZ_DSTTIME(r9) + stw r4,TZONE_TZ_MINWEST(r11) + stw r5,TZONE_TZ_DSTTIME(r11) + +1: mtlr r12 + blr + +2: mr r3,r10 + mr r4,r11 + li r0,__NR_gettimeofday + sc + b 1b + .cfi_endproc +V_FUNCTION_END(__kernel_gettimeofday) + +/* + * This is the core of gettimeofday(), it returns the xsec + * value in r3 & r4 and expects the datapage ptr (non clobbered) + * in r9. clobbers r0,r4,r5,r6,r7,r8 +*/ +__do_get_xsec: + .cfi_startproc + /* Check for update count & load values. We use the low + * order 32 bits of the update count + */ +1: lwz r8,(CFG_TB_UPDATE_COUNT+4)(r9) + andi. r0,r8,1 /* pending update ? loop */ + bne- 1b + xor r0,r8,r8 /* create dependency */ + add r9,r9,r0 + + /* Load orig stamp (offset to TB) */ + lwz r5,CFG_TB_ORIG_STAMP(r9) + lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) + + /* Get a stable TB value */ +2: mftbu r3 + mftbl r4 + mftbu r0 + cmpl cr0,r3,r0 + bne- 2b + + /* Substract tb orig stamp. If the high part is non-zero, we jump to the + * slow path which call the syscall. If it's ok, then we have our 32 bits + * tb_ticks value in r7 + */ + subfc r7,r6,r4 + subfe. r0,r5,r3 + bne- 3f + + /* Load scale factor & do multiplication */ + lwz r5,CFG_TB_TO_XS(r9) /* load values */ + lwz r6,(CFG_TB_TO_XS+4)(r9) + mulhwu r4,r7,r5 + mulhwu r6,r7,r6 + mullw r6,r7,r5 + addc r6,r6,r0 + + /* At this point, we have the scaled xsec value in r4 + XER:CA + * we load & add the stamp since epoch + */ + lwz r5,CFG_STAMP_XSEC(r9) + lwz r6,(CFG_STAMP_XSEC+4)(r9) + adde r4,r4,r6 + addze r3,r5 + + /* We now have our result in r3,r4. We create a fake dependency + * on that result and re-check the counter + */ + xor r0,r4,r4 + add r9,r9,r0 + lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9) + cmpl cr0,r8,r0 /* check if updated */ + bne- 1b + + /* Warning ! The caller expects CR:EQ to be set to indicate a + * successful calculation (so it won't fallback to the syscall + * method). We have overriden that CR bit in the counter check, + * but fortunately, the loop exit condition _is_ CR:EQ set, so + * we can exit safely here. If you change this code, be careful + * of that side effect. + */ +3: blr + .cfi_endproc diff --git a/arch/ppc64/kernel/vdso32/sigtramp.S b/arch/ppc64/kernel/vdso32/sigtramp.S new file mode 100644 index 00000000000..e0464278191 --- /dev/null +++ b/arch/ppc64/kernel/vdso32/sigtramp.S @@ -0,0 +1,300 @@ +/* + * Signal trampolines for 32 bits processes in a ppc64 kernel for + * use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. + * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include + + .text + +/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from + the return address to get an address in the middle of the presumed + call instruction. Since we don't have a call here, we artifically + extend the range covered by the unwind info by adding a nop before + the real start. */ + nop +V_FUNCTION_BEGIN(__kernel_sigtramp32) +.Lsig_start = . - 4 + li r0,__NR_sigreturn + sc +.Lsig_end: +V_FUNCTION_END(__kernel_sigtramp32) + +.Lsigrt_start: + nop +V_FUNCTION_BEGIN(__kernel_sigtramp_rt32) + li r0,__NR_rt_sigreturn + sc +.Lsigrt_end: +V_FUNCTION_END(__kernel_sigtramp_rt32) + + .section .eh_frame,"a",@progbits + +/* Register r1 can be found at offset 4 of a pt_regs structure. + A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */ +#define cfa_save \ + .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \ + .byte 0x06; /* DW_OP_deref */ \ +9: + +/* Register REGNO can be found at offset OFS of a pt_regs structure. + A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */ +#define rsave(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .ifne ofs; \ + .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \ + .endif; \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16 + of the VMX reg struct. The VMX reg struct is at offset VREGS of + the pt_regs struct. This macro is for REGNO == 0, and contains + 'subroutines' that the other macros jump to. */ +#define vsave_msr0(regno) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x30 + regno; /* DW_OP_lit0 */ \ +2: \ + .byte 0x40; /* DW_OP_lit16 */ \ + .byte 0x1e; /* DW_OP_mul */ \ +3: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x12; /* DW_OP_dup */ \ + .byte 0x23; /* DW_OP_plus_uconst */ \ + .uleb128 33*RSIZE; /* msr offset */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \ + .byte 0x1a; /* DW_OP_and */ \ + .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \ + .byte 0x30; /* DW_OP_lit0 */ \ + .byte 0x29; /* DW_OP_eq */ \ + .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \ + .byte 0x13; /* DW_OP_drop, pop the 0 */ \ + .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \ + .byte 0x22; /* DW_OP_plus */ \ + .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16 + of the VMX reg struct. REGNO is 1 thru 31. */ +#define vsave_msr1(regno) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x30 + regno; /* DW_OP_lit n */ \ + .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of + the VMX save block. */ +#define vsave_msr2(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x0a; .short ofs; /* DW_OP_const2u */ \ + .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \ +9: + +/* VMX register REGNO is at offset OFS of the VMX save area. */ +#define vsave(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \ + .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \ +9: + +/* This is where the pt_regs pointer can be found on the stack. */ +#define PTREGS 64+28 + +/* Size of regs. */ +#define RSIZE 4 + +/* This is the offset of the VMX regs. */ +#define VREGS 48*RSIZE+34*8 + +/* Describe where general purpose regs are saved. */ +#define EH_FRAME_GEN \ + cfa_save; \ + rsave ( 0, 0*RSIZE); \ + rsave ( 2, 2*RSIZE); \ + rsave ( 3, 3*RSIZE); \ + rsave ( 4, 4*RSIZE); \ + rsave ( 5, 5*RSIZE); \ + rsave ( 6, 6*RSIZE); \ + rsave ( 7, 7*RSIZE); \ + rsave ( 8, 8*RSIZE); \ + rsave ( 9, 9*RSIZE); \ + rsave (10, 10*RSIZE); \ + rsave (11, 11*RSIZE); \ + rsave (12, 12*RSIZE); \ + rsave (13, 13*RSIZE); \ + rsave (14, 14*RSIZE); \ + rsave (15, 15*RSIZE); \ + rsave (16, 16*RSIZE); \ + rsave (17, 17*RSIZE); \ + rsave (18, 18*RSIZE); \ + rsave (19, 19*RSIZE); \ + rsave (20, 20*RSIZE); \ + rsave (21, 21*RSIZE); \ + rsave (22, 22*RSIZE); \ + rsave (23, 23*RSIZE); \ + rsave (24, 24*RSIZE); \ + rsave (25, 25*RSIZE); \ + rsave (26, 26*RSIZE); \ + rsave (27, 27*RSIZE); \ + rsave (28, 28*RSIZE); \ + rsave (29, 29*RSIZE); \ + rsave (30, 30*RSIZE); \ + rsave (31, 31*RSIZE); \ + rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \ + rsave (65, 36*RSIZE); /* lr */ \ + rsave (70, 38*RSIZE) /* cr */ + +/* Describe where the FP regs are saved. */ +#define EH_FRAME_FP \ + rsave (32, 48*RSIZE + 0*8); \ + rsave (33, 48*RSIZE + 1*8); \ + rsave (34, 48*RSIZE + 2*8); \ + rsave (35, 48*RSIZE + 3*8); \ + rsave (36, 48*RSIZE + 4*8); \ + rsave (37, 48*RSIZE + 5*8); \ + rsave (38, 48*RSIZE + 6*8); \ + rsave (39, 48*RSIZE + 7*8); \ + rsave (40, 48*RSIZE + 8*8); \ + rsave (41, 48*RSIZE + 9*8); \ + rsave (42, 48*RSIZE + 10*8); \ + rsave (43, 48*RSIZE + 11*8); \ + rsave (44, 48*RSIZE + 12*8); \ + rsave (45, 48*RSIZE + 13*8); \ + rsave (46, 48*RSIZE + 14*8); \ + rsave (47, 48*RSIZE + 15*8); \ + rsave (48, 48*RSIZE + 16*8); \ + rsave (49, 48*RSIZE + 17*8); \ + rsave (50, 48*RSIZE + 18*8); \ + rsave (51, 48*RSIZE + 19*8); \ + rsave (52, 48*RSIZE + 20*8); \ + rsave (53, 48*RSIZE + 21*8); \ + rsave (54, 48*RSIZE + 22*8); \ + rsave (55, 48*RSIZE + 23*8); \ + rsave (56, 48*RSIZE + 24*8); \ + rsave (57, 48*RSIZE + 25*8); \ + rsave (58, 48*RSIZE + 26*8); \ + rsave (59, 48*RSIZE + 27*8); \ + rsave (60, 48*RSIZE + 28*8); \ + rsave (61, 48*RSIZE + 29*8); \ + rsave (62, 48*RSIZE + 30*8); \ + rsave (63, 48*RSIZE + 31*8) + +/* Describe where the VMX regs are saved. */ +#ifdef CONFIG_ALTIVEC +#define EH_FRAME_VMX \ + vsave_msr0 ( 0); \ + vsave_msr1 ( 1); \ + vsave_msr1 ( 2); \ + vsave_msr1 ( 3); \ + vsave_msr1 ( 4); \ + vsave_msr1 ( 5); \ + vsave_msr1 ( 6); \ + vsave_msr1 ( 7); \ + vsave_msr1 ( 8); \ + vsave_msr1 ( 9); \ + vsave_msr1 (10); \ + vsave_msr1 (11); \ + vsave_msr1 (12); \ + vsave_msr1 (13); \ + vsave_msr1 (14); \ + vsave_msr1 (15); \ + vsave_msr1 (16); \ + vsave_msr1 (17); \ + vsave_msr1 (18); \ + vsave_msr1 (19); \ + vsave_msr1 (20); \ + vsave_msr1 (21); \ + vsave_msr1 (22); \ + vsave_msr1 (23); \ + vsave_msr1 (24); \ + vsave_msr1 (25); \ + vsave_msr1 (26); \ + vsave_msr1 (27); \ + vsave_msr1 (28); \ + vsave_msr1 (29); \ + vsave_msr1 (30); \ + vsave_msr1 (31); \ + vsave_msr2 (33, 32*16+12); \ + vsave (32, 32*16) +#else +#define EH_FRAME_VMX +#endif + +.Lcie: + .long .Lcie_end - .Lcie_start +.Lcie_start: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zR" /* NUL-terminated augmentation string */ + .uleb128 4 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 67 /* Return address register column, ap */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */ + .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */ + .balign 4 +.Lcie_end: + + .long .Lfde0_end - .Lfde0_start +.Lfde0_start: + .long .Lfde0_start - .Lcie /* CIE pointer. */ + .long .Lsig_start - . /* PC start, length */ + .long .Lsig_end - .Lsig_start + .uleb128 0 /* Augmentation */ + EH_FRAME_GEN + EH_FRAME_FP + EH_FRAME_VMX + .balign 4 +.Lfde0_end: + +/* We have a different stack layout for rt_sigreturn. */ +#undef PTREGS +#define PTREGS 64+16+128+20+28 + + .long .Lfde1_end - .Lfde1_start +.Lfde1_start: + .long .Lfde1_start - .Lcie /* CIE pointer. */ + .long .Lsigrt_start - . /* PC start, length */ + .long .Lsigrt_end - .Lsigrt_start + .uleb128 0 /* Augmentation */ + EH_FRAME_GEN + EH_FRAME_FP + EH_FRAME_VMX + .balign 4 +.Lfde1_end: diff --git a/arch/ppc64/kernel/vdso32/vdso32.lds.S b/arch/ppc64/kernel/vdso32/vdso32.lds.S new file mode 100644 index 00000000000..cca27bd03a5 --- /dev/null +++ b/arch/ppc64/kernel/vdso32/vdso32.lds.S @@ -0,0 +1,111 @@ + +/* + * This is the infamous ld script for the 32 bits vdso + * library + */ +#include + +/* Default link addresses for the vDSOs */ +OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc") +OUTPUT_ARCH(powerpc:common) +ENTRY(_start) + +SECTIONS +{ + . = VDSO32_LBASE + SIZEOF_HEADERS; + .hash : { *(.hash) } :text + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + . = ALIGN (16); + .text : + { + *(.text .stub .text.* .gnu.linkonce.t.*) + } + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + /* Other stuff is appended to the text segment: */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table) } + .fixup : { *(.fixup) } + + .got ALIGN(4) : { *(.got.plt) *(.got) } + + .dynamic : { *(.dynamic) } :text :dynamic + + _end = .; + __end = .; + PROVIDE (end = .); + + + /* Stabs debugging sections are here too + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /DISCARD/ : { *(.note.GNU-stack) } + /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) } + /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) } +} + + +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ +} + + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + __kernel_datapage_offset; /* Has to be there for the kernel to find it */ + __kernel_get_syscall_map; + __kernel_gettimeofday; + __kernel_sync_dicache; + __kernel_sync_dicache_p5; + __kernel_sigtramp32; + __kernel_sigtramp_rt32; + local: *; + }; +} diff --git a/arch/ppc64/kernel/vdso32/vdso32_wrapper.S b/arch/ppc64/kernel/vdso32/vdso32_wrapper.S new file mode 100644 index 00000000000..76ca28e09d2 --- /dev/null +++ b/arch/ppc64/kernel/vdso32/vdso32_wrapper.S @@ -0,0 +1,13 @@ +#include +#include + + .section ".data.page_aligned" + + .globl vdso32_start, vdso32_end + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/ppc64/kernel/vdso32/vdso32.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/ppc64/kernel/vdso64/Makefile b/arch/ppc64/kernel/vdso64/Makefile new file mode 100644 index 00000000000..bd3f70b1a38 --- /dev/null +++ b/arch/ppc64/kernel/vdso64/Makefile @@ -0,0 +1,35 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o + +# Build rules + +targets := $(obj-vdso64) vdso64.so +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) + +EXTRA_CFLAGS := -shared -s -fno-common -fno-builtin +EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1 +EXTRA_AFLAGS := -D__VDSO64__ -s + +obj-y += vdso64_wrapper.o +extra-y += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) + $(call if_changed,vdso64ld) + +# assembly rules for the .S files +$(obj-vdso64): %.o: %.S + $(call if_changed_dep,vdso64as) + +# actual build commands +quiet_cmd_vdso64ld = VDSO64L $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< + + diff --git a/arch/ppc64/kernel/vdso64/cacheflush.S b/arch/ppc64/kernel/vdso64/cacheflush.S new file mode 100644 index 00000000000..d9696ffcf33 --- /dev/null +++ b/arch/ppc64/kernel/vdso64/cacheflush.S @@ -0,0 +1,64 @@ +/* + * vDSO provided cache flush routines + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include + + .text + +/* + * Default "generic" version of __kernel_sync_dicache. + * + * void __kernel_sync_dicache(unsigned long start, unsigned long end) + * + * Flushes the data cache & invalidate the instruction cache for the + * provided range [start, end[ + * + * Note: all CPUs supported by this kernel have a 128 bytes cache + * line size so we don't have to peek that info from the datapage + */ +V_FUNCTION_BEGIN(__kernel_sync_dicache) + .cfi_startproc + li r5,127 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + srwi. r8,r8,7 /* compute line count */ + beqlr /* nothing to do? */ + mtctr r8 + mr r3,r6 +1: dcbst 0,r3 + addi r3,r3,128 + bdnz 1b + sync + mtctr r8 +1: icbi 0,r6 + addi r6,r6,128 + bdnz 1b + isync + blr + .cfi_endproc +V_FUNCTION_END(__kernel_sync_dicache) + + +/* + * POWER5 version of __kernel_sync_dicache + */ +V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) + .cfi_startproc + sync + isync + blr + .cfi_endproc +V_FUNCTION_END(__kernel_sync_dicache_p5) diff --git a/arch/ppc64/kernel/vdso64/datapage.S b/arch/ppc64/kernel/vdso64/datapage.S new file mode 100644 index 00000000000..18afd971c9d --- /dev/null +++ b/arch/ppc64/kernel/vdso64/datapage.S @@ -0,0 +1,68 @@ +/* + * Access to the shared data page by the vDSO & syscall map + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + + .text +V_FUNCTION_BEGIN(__get_datapage) + .cfi_startproc + /* We don't want that exposed or overridable as we want other objects + * to be able to bl directly to here + */ + .protected __get_datapage + .hidden __get_datapage + + mflr r0 + .cfi_register lr,r0 + + bcl 20,31,1f + .global __kernel_datapage_offset; +__kernel_datapage_offset: + .long 0 +1: + mflr r3 + mtlr r0 + lwz r0,0(r3) + add r3,r0,r3 + blr + .cfi_endproc +V_FUNCTION_END(__get_datapage) + +/* + * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; + * + * returns a pointer to the syscall map. the map is agnostic to the + * size of "long", unlike kernel bitops, it stores bits from top to + * bottom so that memory actually contains a linear bitmap + * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of + * 32 bits int at N >> 5. + */ +V_FUNCTION_BEGIN(__kernel_get_syscall_map) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + + mr r4,r3 + bl V_LOCAL_FUNC(__get_datapage) + mtlr r12 + addi r3,r3,CFG_SYSCALL_MAP64 + cmpli cr0,r4,0 + beqlr + li r0,__NR_syscalls + stw r0,0(r4) + blr + .cfi_endproc +V_FUNCTION_END(__kernel_get_syscall_map) diff --git a/arch/ppc64/kernel/vdso64/gettimeofday.S b/arch/ppc64/kernel/vdso64/gettimeofday.S new file mode 100644 index 00000000000..ed3f970ff05 --- /dev/null +++ b/arch/ppc64/kernel/vdso64/gettimeofday.S @@ -0,0 +1,91 @@ +/* + * Userland implementation of gettimeofday() for 64 bits processes in a + * ppc64 kernel for use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include + + .text +/* + * Exact prototype of gettimeofday + * + * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); + * + */ +V_FUNCTION_BEGIN(__kernel_gettimeofday) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + + mr r11,r3 /* r11 holds tv */ + mr r10,r4 /* r10 holds tz */ + bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ + lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */ + ori r7,r7,16960 + rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ + rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ + std r5,TVAL64_TV_SEC(r11) /* store sec in tv */ + subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ + mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / XSEC_PER_SEC */ + rldicl r0,r0,44,20 + cmpldi cr0,r10,0 /* check if tz is NULL */ + std r0,TVAL64_TV_USEC(r11) /* store usec in tv */ + beq 1f + lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ + lwz r5,CFG_TZ_DSTTIME(r3) + stw r4,TZONE_TZ_MINWEST(r10) + stw r5,TZONE_TZ_DSTTIME(r10) +1: mtlr r12 + li r3,0 /* always success */ + blr + .cfi_endproc +V_FUNCTION_END(__kernel_gettimeofday) + + +/* + * This is the core of gettimeofday(), it returns the xsec + * value in r4 and expects the datapage ptr (non clobbered) + * in r3. clobbers r0,r4,r5,r6,r7,r8 +*/ +V_FUNCTION_BEGIN(__do_get_xsec) + .cfi_startproc + /* check for update count & load values */ +1: ld r7,CFG_TB_UPDATE_COUNT(r3) + andi. r0,r4,1 /* pending update ? loop */ + bne- 1b + xor r0,r4,r4 /* create dependency */ + add r3,r3,r0 + + /* Get TB & offset it */ + mftb r8 + ld r9,CFG_TB_ORIG_STAMP(r3) + subf r8,r9,r8 + + /* Scale result */ + ld r5,CFG_TB_TO_XS(r3) + mulhdu r8,r8,r5 + + /* Add stamp since epoch */ + ld r6,CFG_STAMP_XSEC(r3) + add r4,r6,r8 + + xor r0,r4,r4 + add r3,r3,r0 + ld r0,CFG_TB_UPDATE_COUNT(r3) + cmpld cr0,r0,r7 /* check if updated */ + bne- 1b + blr + .cfi_endproc +V_FUNCTION_END(__do_get_xsec) diff --git a/arch/ppc64/kernel/vdso64/sigtramp.S b/arch/ppc64/kernel/vdso64/sigtramp.S new file mode 100644 index 00000000000..8ae8f205e47 --- /dev/null +++ b/arch/ppc64/kernel/vdso64/sigtramp.S @@ -0,0 +1,294 @@ +/* + * Signal trampoline for 64 bits processes in a ppc64 kernel for + * use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. + * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include + + .text + +/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from + the return address to get an address in the middle of the presumed + call instruction. Since we don't have a call here, we artifically + extend the range covered by the unwind info by padding before the + real start. */ + nop + .balign 8 +V_FUNCTION_BEGIN(__kernel_sigtramp_rt64) +.Lsigrt_start = . - 4 + addi r1, r1, __SIGNAL_FRAMESIZE + li r0,__NR_rt_sigreturn + sc +.Lsigrt_end: +V_FUNCTION_END(__kernel_sigtramp_rt64) +/* The ".balign 8" above and the following zeros mimic the old stack + trampoline layout. The last magic value is the ucontext pointer, + chosen in such a way that older libgcc unwind code returns a zero + for a sigcontext pointer. */ + .long 0,0,0 + .quad 0,-21*8 + +/* Register r1 can be found at offset 8 of a pt_regs structure. + A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */ +#define cfa_save \ + .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \ + .byte 0x06; /* DW_OP_deref */ \ +9: + +/* Register REGNO can be found at offset OFS of a pt_regs structure. + A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */ +#define rsave(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .ifne ofs; \ + .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \ + .endif; \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16 + of the VMX reg struct. A pointer to the VMX reg struct is at VREGS in + the pt_regs struct. This macro is for REGNO == 0, and contains + 'subroutines' that the other macros jump to. */ +#define vsave_msr0(regno) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x30 + regno; /* DW_OP_lit0 */ \ +2: \ + .byte 0x40; /* DW_OP_lit16 */ \ + .byte 0x1e; /* DW_OP_mul */ \ +3: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x12; /* DW_OP_dup */ \ + .byte 0x23; /* DW_OP_plus_uconst */ \ + .uleb128 33*RSIZE; /* msr offset */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \ + .byte 0x1a; /* DW_OP_and */ \ + .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \ + .byte 0x30; /* DW_OP_lit0 */ \ + .byte 0x29; /* DW_OP_eq */ \ + .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \ + .byte 0x13; /* DW_OP_drop, pop the 0 */ \ + .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x22; /* DW_OP_plus */ \ + .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16 + of the VMX reg struct. REGNO is 1 thru 31. */ +#define vsave_msr1(regno) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x30 + regno; /* DW_OP_lit n */ \ + .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of + the VMX save block. */ +#define vsave_msr2(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x0a; .short ofs; /* DW_OP_const2u */ \ + .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \ +9: + +/* VMX register REGNO is at offset OFS of the VMX save area. */ +#define vsave(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \ +9: + +/* This is where the pt_regs pointer can be found on the stack. */ +#define PTREGS 128+168+56 + +/* Size of regs. */ +#define RSIZE 8 + +/* This is the offset of the VMX reg pointer. */ +#define VREGS 48*RSIZE+33*8 + +/* Describe where general purpose regs are saved. */ +#define EH_FRAME_GEN \ + cfa_save; \ + rsave ( 0, 0*RSIZE); \ + rsave ( 2, 2*RSIZE); \ + rsave ( 3, 3*RSIZE); \ + rsave ( 4, 4*RSIZE); \ + rsave ( 5, 5*RSIZE); \ + rsave ( 6, 6*RSIZE); \ + rsave ( 7, 7*RSIZE); \ + rsave ( 8, 8*RSIZE); \ + rsave ( 9, 9*RSIZE); \ + rsave (10, 10*RSIZE); \ + rsave (11, 11*RSIZE); \ + rsave (12, 12*RSIZE); \ + rsave (13, 13*RSIZE); \ + rsave (14, 14*RSIZE); \ + rsave (15, 15*RSIZE); \ + rsave (16, 16*RSIZE); \ + rsave (17, 17*RSIZE); \ + rsave (18, 18*RSIZE); \ + rsave (19, 19*RSIZE); \ + rsave (20, 20*RSIZE); \ + rsave (21, 21*RSIZE); \ + rsave (22, 22*RSIZE); \ + rsave (23, 23*RSIZE); \ + rsave (24, 24*RSIZE); \ + rsave (25, 25*RSIZE); \ + rsave (26, 26*RSIZE); \ + rsave (27, 27*RSIZE); \ + rsave (28, 28*RSIZE); \ + rsave (29, 29*RSIZE); \ + rsave (30, 30*RSIZE); \ + rsave (31, 31*RSIZE); \ + rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \ + rsave (65, 36*RSIZE); /* lr */ \ + rsave (70, 38*RSIZE) /* cr */ + +/* Describe where the FP regs are saved. */ +#define EH_FRAME_FP \ + rsave (32, 48*RSIZE + 0*8); \ + rsave (33, 48*RSIZE + 1*8); \ + rsave (34, 48*RSIZE + 2*8); \ + rsave (35, 48*RSIZE + 3*8); \ + rsave (36, 48*RSIZE + 4*8); \ + rsave (37, 48*RSIZE + 5*8); \ + rsave (38, 48*RSIZE + 6*8); \ + rsave (39, 48*RSIZE + 7*8); \ + rsave (40, 48*RSIZE + 8*8); \ + rsave (41, 48*RSIZE + 9*8); \ + rsave (42, 48*RSIZE + 10*8); \ + rsave (43, 48*RSIZE + 11*8); \ + rsave (44, 48*RSIZE + 12*8); \ + rsave (45, 48*RSIZE + 13*8); \ + rsave (46, 48*RSIZE + 14*8); \ + rsave (47, 48*RSIZE + 15*8); \ + rsave (48, 48*RSIZE + 16*8); \ + rsave (49, 48*RSIZE + 17*8); \ + rsave (50, 48*RSIZE + 18*8); \ + rsave (51, 48*RSIZE + 19*8); \ + rsave (52, 48*RSIZE + 20*8); \ + rsave (53, 48*RSIZE + 21*8); \ + rsave (54, 48*RSIZE + 22*8); \ + rsave (55, 48*RSIZE + 23*8); \ + rsave (56, 48*RSIZE + 24*8); \ + rsave (57, 48*RSIZE + 25*8); \ + rsave (58, 48*RSIZE + 26*8); \ + rsave (59, 48*RSIZE + 27*8); \ + rsave (60, 48*RSIZE + 28*8); \ + rsave (61, 48*RSIZE + 29*8); \ + rsave (62, 48*RSIZE + 30*8); \ + rsave (63, 48*RSIZE + 31*8) + +/* Describe where the VMX regs are saved. */ +#ifdef CONFIG_ALTIVEC +#define EH_FRAME_VMX \ + vsave_msr0 ( 0); \ + vsave_msr1 ( 1); \ + vsave_msr1 ( 2); \ + vsave_msr1 ( 3); \ + vsave_msr1 ( 4); \ + vsave_msr1 ( 5); \ + vsave_msr1 ( 6); \ + vsave_msr1 ( 7); \ + vsave_msr1 ( 8); \ + vsave_msr1 ( 9); \ + vsave_msr1 (10); \ + vsave_msr1 (11); \ + vsave_msr1 (12); \ + vsave_msr1 (13); \ + vsave_msr1 (14); \ + vsave_msr1 (15); \ + vsave_msr1 (16); \ + vsave_msr1 (17); \ + vsave_msr1 (18); \ + vsave_msr1 (19); \ + vsave_msr1 (20); \ + vsave_msr1 (21); \ + vsave_msr1 (22); \ + vsave_msr1 (23); \ + vsave_msr1 (24); \ + vsave_msr1 (25); \ + vsave_msr1 (26); \ + vsave_msr1 (27); \ + vsave_msr1 (28); \ + vsave_msr1 (29); \ + vsave_msr1 (30); \ + vsave_msr1 (31); \ + vsave_msr2 (33, 32*16+12); \ + vsave (32, 33*16) +#else +#define EH_FRAME_VMX +#endif + + .section .eh_frame,"a",@progbits +.Lcie: + .long .Lcie_end - .Lcie_start +.Lcie_start: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zR" /* NUL-terminated augmentation string */ + .uleb128 4 /* Code alignment factor */ + .sleb128 -8 /* Data alignment factor */ + .byte 67 /* Return address register column, ap */ + .uleb128 1 /* Augmentation value length */ + .byte 0x14 /* DW_EH_PE_pcrel | DW_EH_PE_udata8. */ + .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */ + .balign 8 +.Lcie_end: + + .long .Lfde0_end - .Lfde0_start +.Lfde0_start: + .long .Lfde0_start - .Lcie /* CIE pointer. */ + .quad .Lsigrt_start - . /* PC start, length */ + .quad .Lsigrt_end - .Lsigrt_start + .uleb128 0 /* Augmentation */ + EH_FRAME_GEN + EH_FRAME_FP + EH_FRAME_VMX +# Do we really need to describe the frame at this point? ie. will +# we ever have some call chain that returns somewhere past the addi? +# I don't think so, since gcc doesn't support async signals. +# .byte 0x41 /* DW_CFA_advance_loc 1*4 */ +#undef PTREGS +#define PTREGS 168+56 +# EH_FRAME_GEN +# EH_FRAME_FP +# EH_FRAME_VMX + .balign 8 +.Lfde0_end: diff --git a/arch/ppc64/kernel/vdso64/vdso64.lds.S b/arch/ppc64/kernel/vdso64/vdso64.lds.S new file mode 100644 index 00000000000..942c815c7bc --- /dev/null +++ b/arch/ppc64/kernel/vdso64/vdso64.lds.S @@ -0,0 +1,110 @@ +/* + * This is the infamous ld script for the 64 bits vdso + * library + */ +#include + +OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc") +OUTPUT_ARCH(powerpc:common64) +ENTRY(_start) + +SECTIONS +{ + . = VDSO64_LBASE + SIZEOF_HEADERS; + .hash : { *(.hash) } :text + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + . = ALIGN (16); + .text : + { + *(.text .stub .text.* .gnu.linkonce.t.*) + *(.sfpr .glink) + } + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + /* Other stuff is appended to the text segment: */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table) } + + .opd ALIGN(8) : { KEEP (*(.opd)) } + .got ALIGN(8) : { *(.got .toc) } + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + + .dynamic : { *(.dynamic) } :text :dynamic + + _end = .; + PROVIDE (end = .); + + /* Stabs debugging sections are here too + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sectio/ns. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /DISCARD/ : { *(.note.GNU-stack) } + /DISCARD/ : { *(.branch_lt) } + /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.*) } + /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) } +} + +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + __kernel_datapage_offset; /* Has to be there for the kernel to find it */ + __kernel_get_syscall_map; + __kernel_gettimeofday; + __kernel_sync_dicache; + __kernel_sync_dicache_p5; + __kernel_sigtramp_rt64; + local: *; + }; +} diff --git a/arch/ppc64/kernel/vdso64/vdso64_wrapper.S b/arch/ppc64/kernel/vdso64/vdso64_wrapper.S new file mode 100644 index 00000000000..771c2741c49 --- /dev/null +++ b/arch/ppc64/kernel/vdso64/vdso64_wrapper.S @@ -0,0 +1,13 @@ +#include +#include + + .section ".data.page_aligned" + + .globl vdso64_start, vdso64_end + .balign PAGE_SIZE +vdso64_start: + .incbin "arch/ppc64/kernel/vdso64/vdso64.so" + .balign PAGE_SIZE +vdso64_end: + + .previous diff --git a/arch/ppc64/kernel/vecemu.c b/arch/ppc64/kernel/vecemu.c new file mode 100644 index 00000000000..cb207629f21 --- /dev/null +++ b/arch/ppc64/kernel/vecemu.c @@ -0,0 +1,346 @@ +/* + * Routines to emulate some Altivec/VMX instructions, specifically + * those that can trap when given denormalized operands in Java mode. + */ +#include +#include +#include +#include +#include +#include + +/* Functions in vector.S */ +extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); +extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); +extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); +extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); +extern void vrefp(vector128 *dst, vector128 *src); +extern void vrsqrtefp(vector128 *dst, vector128 *src); +extern void vexptep(vector128 *dst, vector128 *src); + +static unsigned int exp2s[8] = { + 0x800000, + 0x8b95c2, + 0x9837f0, + 0xa5fed7, + 0xb504f3, + 0xc5672a, + 0xd744fd, + 0xeac0c7 +}; + +/* + * Computes an estimate of 2^x. The `s' argument is the 32-bit + * single-precision floating-point representation of x. + */ +static unsigned int eexp2(unsigned int s) +{ + int exp, pwr; + unsigned int mant, frac; + + /* extract exponent field from input */ + exp = ((s >> 23) & 0xff) - 127; + if (exp > 7) { + /* check for NaN input */ + if (exp == 128 && (s & 0x7fffff) != 0) + return s | 0x400000; /* return QNaN */ + /* 2^-big = 0, 2^+big = +Inf */ + return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ + } + if (exp < -23) + return 0x3f800000; /* 1.0 */ + + /* convert to fixed point integer in 9.23 representation */ + pwr = (s & 0x7fffff) | 0x800000; + if (exp > 0) + pwr <<= exp; + else + pwr >>= -exp; + if (s & 0x80000000) + pwr = -pwr; + + /* extract integer part, which becomes exponent part of result */ + exp = (pwr >> 23) + 126; + if (exp >= 254) + return 0x7f800000; + if (exp < -23) + return 0; + + /* table lookup on top 3 bits of fraction to get mantissa */ + mant = exp2s[(pwr >> 20) & 7]; + + /* linear interpolation using remaining 20 bits of fraction */ + asm("mulhwu %0,%1,%2" : "=r" (frac) + : "r" (pwr << 12), "r" (0x172b83ff)); + asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); + mant += frac; + + if (exp >= 0) + return mant + (exp << 23); + + /* denormalized result */ + exp = -exp; + mant += 1 << (exp - 1); + return mant >> exp; +} + +/* + * Computes an estimate of log_2(x). The `s' argument is the 32-bit + * single-precision floating-point representation of x. + */ +static unsigned int elog2(unsigned int s) +{ + int exp, mant, lz, frac; + + exp = s & 0x7f800000; + mant = s & 0x7fffff; + if (exp == 0x7f800000) { /* Inf or NaN */ + if (mant != 0) + s |= 0x400000; /* turn NaN into QNaN */ + return s; + } + if ((exp | mant) == 0) /* +0 or -0 */ + return 0xff800000; /* return -Inf */ + + if (exp == 0) { + /* denormalized */ + asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); + mant <<= lz - 8; + exp = (-118 - lz) << 23; + } else { + mant |= 0x800000; + exp -= 127 << 23; + } + + if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ + exp |= 0x400000; /* 0.5 * 2^23 */ + asm("mulhwu %0,%1,%2" : "=r" (mant) + : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ + } + if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ + exp |= 0x200000; /* 0.25 * 2^23 */ + asm("mulhwu %0,%1,%2" : "=r" (mant) + : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ + } + if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ + exp |= 0x100000; /* 0.125 * 2^23 */ + asm("mulhwu %0,%1,%2" : "=r" (mant) + : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ + } + if (mant > 0x800000) { /* 1.0 * 2^23 */ + /* calculate (mant - 1) * 1.381097463 */ + /* 1.381097463 == 0.125 / (2^0.125 - 1) */ + asm("mulhwu %0,%1,%2" : "=r" (frac) + : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); + exp += frac; + } + s = exp & 0x80000000; + if (exp != 0) { + if (s) + exp = -exp; + asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); + lz = 8 - lz; + if (lz > 0) + exp >>= lz; + else if (lz < 0) + exp <<= -lz; + s += ((lz + 126) << 23) + exp; + } + return s; +} + +#define VSCR_SAT 1 + +static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) +{ + int exp, mant; + + exp = (x >> 23) & 0xff; + mant = x & 0x7fffff; + if (exp == 255 && mant != 0) + return 0; /* NaN -> 0 */ + exp = exp - 127 + scale; + if (exp < 0) + return 0; /* round towards zero */ + if (exp >= 31) { + /* saturate, unless the result would be -2^31 */ + if (x + (scale << 23) != 0xcf000000) + *vscrp |= VSCR_SAT; + return (x & 0x80000000)? 0x80000000: 0x7fffffff; + } + mant |= 0x800000; + mant = (mant << 7) >> (30 - exp); + return (x & 0x80000000)? -mant: mant; +} + +static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) +{ + int exp; + unsigned int mant; + + exp = (x >> 23) & 0xff; + mant = x & 0x7fffff; + if (exp == 255 && mant != 0) + return 0; /* NaN -> 0 */ + exp = exp - 127 + scale; + if (exp < 0) + return 0; /* round towards zero */ + if (x & 0x80000000) { + /* negative => saturate to 0 */ + *vscrp |= VSCR_SAT; + return 0; + } + if (exp >= 32) { + /* saturate */ + *vscrp |= VSCR_SAT; + return 0xffffffff; + } + mant |= 0x800000; + mant = (mant << 8) >> (31 - exp); + return mant; +} + +/* Round to floating integer, towards 0 */ +static unsigned int rfiz(unsigned int x) +{ + int exp; + + exp = ((x >> 23) & 0xff) - 127; + if (exp == 128 && (x & 0x7fffff) != 0) + return x | 0x400000; /* NaN -> make it a QNaN */ + if (exp >= 23) + return x; /* it's an integer already (or Inf) */ + if (exp < 0) + return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ + return x & ~(0x7fffff >> exp); +} + +/* Round to floating integer, towards +/- Inf */ +static unsigned int rfii(unsigned int x) +{ + int exp, mask; + + exp = ((x >> 23) & 0xff) - 127; + if (exp == 128 && (x & 0x7fffff) != 0) + return x | 0x400000; /* NaN -> make it a QNaN */ + if (exp >= 23) + return x; /* it's an integer already (or Inf) */ + if ((x & 0x7fffffff) == 0) + return x; /* +/-0 -> +/-0 */ + if (exp < 0) + /* 0 < |x| < 1.0 rounds to +/- 1.0 */ + return (x & 0x80000000) | 0x3f800000; + mask = 0x7fffff >> exp; + /* mantissa overflows into exponent - that's OK, + it can't overflow into the sign bit */ + return (x + mask) & ~mask; +} + +/* Round to floating integer, to nearest */ +static unsigned int rfin(unsigned int x) +{ + int exp, half; + + exp = ((x >> 23) & 0xff) - 127; + if (exp == 128 && (x & 0x7fffff) != 0) + return x | 0x400000; /* NaN -> make it a QNaN */ + if (exp >= 23) + return x; /* it's an integer already (or Inf) */ + if (exp < -1) + return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ + if (exp == -1) + /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ + return (x & 0x80000000) | 0x3f800000; + half = 0x400000 >> exp; + /* add 0.5 to the magnitude and chop off the fraction bits */ + return (x + half) & ~(0x7fffff >> exp); +} + +int +emulate_altivec(struct pt_regs *regs) +{ + unsigned int instr, i; + unsigned int va, vb, vc, vd; + vector128 *vrs; + + if (get_user(instr, (unsigned int __user *) regs->nip)) + return -EFAULT; + if ((instr >> 26) != 4) + return -EINVAL; /* not an altivec instruction */ + vd = (instr >> 21) & 0x1f; + va = (instr >> 16) & 0x1f; + vb = (instr >> 11) & 0x1f; + vc = (instr >> 6) & 0x1f; + + vrs = current->thread.vr; + switch (instr & 0x3f) { + case 10: + switch (vc) { + case 0: /* vaddfp */ + vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); + break; + case 1: /* vsubfp */ + vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); + break; + case 4: /* vrefp */ + vrefp(&vrs[vd], &vrs[vb]); + break; + case 5: /* vrsqrtefp */ + vrsqrtefp(&vrs[vd], &vrs[vb]); + break; + case 6: /* vexptefp */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = eexp2(vrs[vb].u[i]); + break; + case 7: /* vlogefp */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = elog2(vrs[vb].u[i]); + break; + case 8: /* vrfin */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = rfin(vrs[vb].u[i]); + break; + case 9: /* vrfiz */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = rfiz(vrs[vb].u[i]); + break; + case 10: /* vrfip */ + for (i = 0; i < 4; ++i) { + u32 x = vrs[vb].u[i]; + x = (x & 0x80000000)? rfiz(x): rfii(x); + vrs[vd].u[i] = x; + } + break; + case 11: /* vrfim */ + for (i = 0; i < 4; ++i) { + u32 x = vrs[vb].u[i]; + x = (x & 0x80000000)? rfii(x): rfiz(x); + vrs[vd].u[i] = x; + } + break; + case 14: /* vctuxs */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, + ¤t->thread.vscr.u[3]); + break; + case 15: /* vctsxs */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, + ¤t->thread.vscr.u[3]); + break; + default: + return -EINVAL; + } + break; + case 46: /* vmaddfp */ + vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); + break; + case 47: /* vnmsubfp */ + vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/arch/ppc64/kernel/vector.S b/arch/ppc64/kernel/vector.S new file mode 100644 index 00000000000..b79d33e4001 --- /dev/null +++ b/arch/ppc64/kernel/vector.S @@ -0,0 +1,172 @@ +#include +#include + +/* + * The routines below are in assembler so we can closely control the + * usage of floating-point registers. These routines must be called + * with preempt disabled. + */ + .section ".toc","aw" +fpzero: + .tc FD_0_0[TC],0 +fpone: + .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ +fphalf: + .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ + + .text +/* + * Internal routine to enable floating point and set FPSCR to 0. + * Don't call it from C; it doesn't use the normal calling convention. + */ +fpenable: + mfmsr r10 + ori r11,r10,MSR_FP + mtmsr r11 + isync + stfd fr31,-8(r1) + stfd fr0,-16(r1) + stfd fr1,-24(r1) + mffs fr31 + lfd fr1,fpzero@toc(r2) + mtfsf 0xff,fr1 + blr + +fpdisable: + mtlr r12 + mtfsf 0xff,fr31 + lfd fr1,-24(r1) + lfd fr0,-16(r1) + lfd fr31,-8(r1) + mtmsr r10 + isync + blr + +/* + * Vector add, floating point. + */ +_GLOBAL(vaddfp) + mflr r12 + bl fpenable + li r0,4 + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + lfsx fr1,r5,r6 + fadds fr0,fr0,fr1 + stfsx fr0,r3,r6 + addi r6,r6,4 + bdnz 1b + b fpdisable + +/* + * Vector subtract, floating point. + */ +_GLOBAL(vsubfp) + mflr r12 + bl fpenable + li r0,4 + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + lfsx fr1,r5,r6 + fsubs fr0,fr0,fr1 + stfsx fr0,r3,r6 + addi r6,r6,4 + bdnz 1b + b fpdisable + +/* + * Vector multiply and add, floating point. + */ +_GLOBAL(vmaddfp) + mflr r12 + bl fpenable + stfd fr2,-32(r1) + li r0,4 + mtctr r0 + li r7,0 +1: lfsx fr0,r4,r7 + lfsx fr1,r5,r7 + lfsx fr2,r6,r7 + fmadds fr0,fr0,fr2,fr1 + stfsx fr0,r3,r7 + addi r7,r7,4 + bdnz 1b + lfd fr2,-32(r1) + b fpdisable + +/* + * Vector negative multiply and subtract, floating point. + */ +_GLOBAL(vnmsubfp) + mflr r12 + bl fpenable + stfd fr2,-32(r1) + li r0,4 + mtctr r0 + li r7,0 +1: lfsx fr0,r4,r7 + lfsx fr1,r5,r7 + lfsx fr2,r6,r7 + fnmsubs fr0,fr0,fr2,fr1 + stfsx fr0,r3,r7 + addi r7,r7,4 + bdnz 1b + lfd fr2,-32(r1) + b fpdisable + +/* + * Vector reciprocal estimate. We just compute 1.0/x. + * r3 -> destination, r4 -> source. + */ +_GLOBAL(vrefp) + mflr r12 + bl fpenable + li r0,4 + lfd fr1,fpone@toc(r2) + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + fdivs fr0,fr1,fr0 + stfsx fr0,r3,r6 + addi r6,r6,4 + bdnz 1b + b fpdisable + +/* + * Vector reciprocal square-root estimate, floating point. + * We use the frsqrte instruction for the initial estimate followed + * by 2 iterations of Newton-Raphson to get sufficient accuracy. + * r3 -> destination, r4 -> source. + */ +_GLOBAL(vrsqrtefp) + mflr r12 + bl fpenable + stfd fr2,-32(r1) + stfd fr3,-40(r1) + stfd fr4,-48(r1) + stfd fr5,-56(r1) + li r0,4 + lfd fr4,fpone@toc(r2) + lfd fr5,fphalf@toc(r2) + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + frsqrte fr1,fr0 /* r = frsqrte(s) */ + fmuls fr3,fr1,fr0 /* r * s */ + fmuls fr2,fr1,fr5 /* r * 0.5 */ + fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ + fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ + fmuls fr3,fr1,fr0 /* r * s */ + fmuls fr2,fr1,fr5 /* r * 0.5 */ + fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ + fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ + stfsx fr1,r3,r6 + addi r6,r6,4 + bdnz 1b + lfd fr5,-56(r1) + lfd fr4,-48(r1) + lfd fr3,-40(r1) + lfd fr2,-32(r1) + b fpdisable diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c new file mode 100644 index 00000000000..cdd830cb276 --- /dev/null +++ b/arch/ppc64/kernel/vio.c @@ -0,0 +1,640 @@ +/* + * IBM PowerPC Virtual I/O Infrastructure Support. + * + * Copyright (c) 2003 IBM Corp. + * Dave Engebretsen engebret@us.ibm.com + * Santiago Leon santil@us.ibm.com + * Hollis Blanchard + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DBGENTER() pr_debug("%s entered\n", __FUNCTION__) + +extern struct subsystem devices_subsys; /* needed for vio_find_name() */ + +static const struct vio_device_id *vio_match_device( + const struct vio_device_id *, const struct vio_dev *); + +#ifdef CONFIG_PPC_PSERIES +static struct iommu_table *vio_build_iommu_table(struct vio_dev *); +static int vio_num_address_cells; +#endif +static struct vio_dev *vio_bus_device; /* fake "parent" device */ + +#ifdef CONFIG_PPC_ISERIES +static struct vio_dev *__init vio_register_device_iseries(char *type, + uint32_t unit_num); + +static struct iommu_table veth_iommu_table; +static struct iommu_table vio_iommu_table; + +static struct vio_dev _vio_dev = { + .iommu_table = &vio_iommu_table, + .dev.bus = &vio_bus_type +}; +struct device *iSeries_vio_dev = &_vio_dev.dev; +EXPORT_SYMBOL(iSeries_vio_dev); + +#define device_is_compatible(a, b) 1 + +#endif + +/* convert from struct device to struct vio_dev and pass to driver. + * dev->driver has already been set by generic code because vio_bus_match + * succeeded. */ +static int vio_bus_probe(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv = to_vio_driver(dev->driver); + const struct vio_device_id *id; + int error = -ENODEV; + + DBGENTER(); + + if (!viodrv->probe) + return error; + + id = vio_match_device(viodrv->id_table, viodev); + if (id) { + error = viodrv->probe(viodev, id); + } + + return error; +} + +/* convert from struct device to struct vio_dev and pass to driver. */ +static int vio_bus_remove(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv = to_vio_driver(dev->driver); + + DBGENTER(); + + if (viodrv->remove) { + return viodrv->remove(viodev); + } + + /* driver can't remove */ + return 1; +} + +/** + * vio_register_driver: - Register a new vio driver + * @drv: The vio_driver structure to be registered. + */ +int vio_register_driver(struct vio_driver *viodrv) +{ + printk(KERN_DEBUG "%s: driver %s registering\n", __FUNCTION__, + viodrv->name); + + /* fill in 'struct driver' fields */ + viodrv->driver.name = viodrv->name; + viodrv->driver.bus = &vio_bus_type; + viodrv->driver.probe = vio_bus_probe; + viodrv->driver.remove = vio_bus_remove; + + return driver_register(&viodrv->driver); +} +EXPORT_SYMBOL(vio_register_driver); + +/** + * vio_unregister_driver - Remove registration of vio driver. + * @driver: The vio_driver struct to be removed form registration + */ +void vio_unregister_driver(struct vio_driver *viodrv) +{ + driver_unregister(&viodrv->driver); +} +EXPORT_SYMBOL(vio_unregister_driver); + +/** + * vio_match_device: - Tell if a VIO device has a matching VIO device id structure. + * @ids: array of VIO device id structures to search in + * @dev: the VIO device structure to match against + * + * Used by a driver to check whether a VIO device present in the + * system is in its list of supported devices. Returns the matching + * vio_device_id structure or NULL if there is no match. + */ +static const struct vio_device_id * vio_match_device(const struct vio_device_id *ids, + const struct vio_dev *dev) +{ + DBGENTER(); + + while (ids->type) { + if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) && + device_is_compatible(dev->dev.platform_data, ids->compat)) + return ids; + ids++; + } + return NULL; +} + +#ifdef CONFIG_PPC_ISERIES +void __init iommu_vio_init(void) +{ + struct iommu_table *t; + struct iommu_table_cb cb; + unsigned long cbp; + unsigned long itc_entries; + + cb.itc_busno = 255; /* Bus 255 is the virtual bus */ + cb.itc_virtbus = 0xff; /* Ask for virtual bus */ + + cbp = virt_to_abs(&cb); + HvCallXm_getTceTableParms(cbp); + + itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry); + veth_iommu_table.it_size = itc_entries / 2; + veth_iommu_table.it_busno = cb.itc_busno; + veth_iommu_table.it_offset = cb.itc_offset; + veth_iommu_table.it_index = cb.itc_index; + veth_iommu_table.it_type = TCE_VB; + veth_iommu_table.it_blocksize = 1; + + t = iommu_init_table(&veth_iommu_table); + + if (!t) + printk("Virtual Bus VETH TCE table failed.\n"); + + vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size; + vio_iommu_table.it_busno = cb.itc_busno; + vio_iommu_table.it_offset = cb.itc_offset + + veth_iommu_table.it_size; + vio_iommu_table.it_index = cb.itc_index; + vio_iommu_table.it_type = TCE_VB; + vio_iommu_table.it_blocksize = 1; + + t = iommu_init_table(&vio_iommu_table); + + if (!t) + printk("Virtual Bus VIO TCE table failed.\n"); +} +#endif + +#ifdef CONFIG_PPC_PSERIES +static void probe_bus_pseries(void) +{ + struct device_node *node_vroot, *of_node; + + node_vroot = find_devices("vdevice"); + if ((node_vroot == NULL) || (node_vroot->child == NULL)) + /* this machine doesn't do virtual IO, and that's ok */ + return; + + vio_num_address_cells = prom_n_addr_cells(node_vroot->child); + + /* + * Create struct vio_devices for each virtual device in the device tree. + * Drivers will associate with them later. + */ + for (of_node = node_vroot->child; of_node != NULL; + of_node = of_node->sibling) { + printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node); + vio_register_device_node(of_node); + } +} +#endif + +#ifdef CONFIG_PPC_ISERIES +static void probe_bus_iseries(void) +{ + HvLpIndexMap vlan_map = HvLpConfig_getVirtualLanIndexMap(); + struct vio_dev *viodev; + int i; + + /* there is only one of each of these */ + vio_register_device_iseries("viocons", 0); + vio_register_device_iseries("vscsi", 0); + + vlan_map = HvLpConfig_getVirtualLanIndexMap(); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { + if ((vlan_map & (0x8000 >> i)) == 0) + continue; + viodev = vio_register_device_iseries("vlan", i); + /* veth is special and has it own iommu_table */ + viodev->iommu_table = &veth_iommu_table; + } + for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++) + vio_register_device_iseries("viodasd", i); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++) + vio_register_device_iseries("viocd", i); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++) + vio_register_device_iseries("viotape", i); +} +#endif + +/** + * vio_bus_init: - Initialize the virtual IO bus + */ +static int __init vio_bus_init(void) +{ + int err; + + err = bus_register(&vio_bus_type); + if (err) { + printk(KERN_ERR "failed to register VIO bus\n"); + return err; + } + + /* the fake parent of all vio devices, just to give us a nice directory */ + vio_bus_device = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (!vio_bus_device) { + return 1; + } + memset(vio_bus_device, 0, sizeof(struct vio_dev)); + strcpy(vio_bus_device->dev.bus_id, "vio"); + + err = device_register(&vio_bus_device->dev); + if (err) { + printk(KERN_WARNING "%s: device_register returned %i\n", __FUNCTION__, + err); + kfree(vio_bus_device); + return err; + } + +#ifdef CONFIG_PPC_PSERIES + probe_bus_pseries(); +#endif +#ifdef CONFIG_PPC_ISERIES + probe_bus_iseries(); +#endif + + return 0; +} + +__initcall(vio_bus_init); + +/* vio_dev refcount hit 0 */ +static void __devinit vio_dev_release(struct device *dev) +{ + DBGENTER(); + +#ifdef CONFIG_PPC_PSERIES + /* XXX free TCE table */ + of_node_put(dev->platform_data); +#endif + kfree(to_vio_dev(dev)); +} + +#ifdef CONFIG_PPC_PSERIES +static ssize_t viodev_show_devspec(struct device *dev, char *buf) +{ + struct device_node *of_node = dev->platform_data; + + return sprintf(buf, "%s\n", of_node->full_name); +} +DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL); +#endif + +static ssize_t viodev_show_name(struct device *dev, char *buf) +{ + return sprintf(buf, "%s\n", to_vio_dev(dev)->name); +} +DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL); + +static struct vio_dev * __devinit vio_register_device_common( + struct vio_dev *viodev, char *name, char *type, + uint32_t unit_address, struct iommu_table *iommu_table) +{ + DBGENTER(); + + viodev->name = name; + viodev->type = type; + viodev->unit_address = unit_address; + viodev->iommu_table = iommu_table; + /* init generic 'struct device' fields: */ + viodev->dev.parent = &vio_bus_device->dev; + viodev->dev.bus = &vio_bus_type; + viodev->dev.release = vio_dev_release; + + /* register with generic device framework */ + if (device_register(&viodev->dev)) { + printk(KERN_ERR "%s: failed to register device %s\n", + __FUNCTION__, viodev->dev.bus_id); + return NULL; + } + device_create_file(&viodev->dev, &dev_attr_name); + + return viodev; +} + +#ifdef CONFIG_PPC_PSERIES +/** + * vio_register_device_node: - Register a new vio device. + * @of_node: The OF node for this device. + * + * Creates and initializes a vio_dev structure from the data in + * of_node (dev.platform_data) and adds it to the list of virtual devices. + * Returns a pointer to the created vio_dev or NULL if node has + * NULL device_type or compatible fields. + */ +struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) +{ + struct vio_dev *viodev; + unsigned int *unit_address; + unsigned int *irq_p; + + DBGENTER(); + + /* we need the 'device_type' property, in order to match with drivers */ + if ((NULL == of_node->type)) { + printk(KERN_WARNING + "%s: node %s missing 'device_type'\n", __FUNCTION__, + of_node->name ? of_node->name : ""); + return NULL; + } + + unit_address = (unsigned int *)get_property(of_node, "reg", NULL); + if (!unit_address) { + printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, + of_node->name ? of_node->name : ""); + return NULL; + } + + /* allocate a vio_dev for this node */ + viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (!viodev) { + return NULL; + } + memset(viodev, 0, sizeof(struct vio_dev)); + + viodev->dev.platform_data = of_node_get(of_node); + + viodev->irq = NO_IRQ; + irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); + if (irq_p) { + int virq = virt_irq_create_mapping(*irq_p); + if (virq == NO_IRQ) { + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", of_node->full_name); + } else + viodev->irq = irq_offset_up(virq); + } + + snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); + + /* register with generic device framework */ + if (vio_register_device_common(viodev, of_node->name, of_node->type, + *unit_address, vio_build_iommu_table(viodev)) + == NULL) { + /* XXX free TCE table */ + kfree(viodev); + return NULL; + } + device_create_file(&viodev->dev, &dev_attr_devspec); + + return viodev; +} +EXPORT_SYMBOL(vio_register_device_node); +#endif + +#ifdef CONFIG_PPC_ISERIES +/** + * vio_register_device: - Register a new vio device. + * @voidev: The device to register. + */ +static struct vio_dev *__init vio_register_device_iseries(char *type, + uint32_t unit_num) +{ + struct vio_dev *viodev; + + DBGENTER(); + + /* allocate a vio_dev for this node */ + viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (!viodev) + return NULL; + memset(viodev, 0, sizeof(struct vio_dev)); + + snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num); + + return vio_register_device_common(viodev, viodev->dev.bus_id, type, + unit_num, &vio_iommu_table); +} +#endif + +void __devinit vio_unregister_device(struct vio_dev *viodev) +{ + DBGENTER(); +#ifdef CONFIG_PPC_PSERIES + device_remove_file(&viodev->dev, &dev_attr_devspec); +#endif + device_remove_file(&viodev->dev, &dev_attr_name); + device_unregister(&viodev->dev); +} +EXPORT_SYMBOL(vio_unregister_device); + +#ifdef CONFIG_PPC_PSERIES +/** + * vio_get_attribute: - get attribute for virtual device + * @vdev: The vio device to get property. + * @which: The property/attribute to be extracted. + * @length: Pointer to length of returned data size (unused if NULL). + * + * Calls prom.c's get_property() to return the value of the + * attribute specified by the preprocessor constant @which +*/ +const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) +{ + return get_property(vdev->dev.platform_data, (char*)which, length); +} +EXPORT_SYMBOL(vio_get_attribute); + +/* vio_find_name() - internal because only vio.c knows how we formatted the + * kobject name + * XXX once vio_bus_type.devices is actually used as a kset in + * drivers/base/bus.c, this function should be removed in favor of + * "device_find(kobj_name, &vio_bus_type)" + */ +static struct vio_dev *vio_find_name(const char *kobj_name) +{ + struct kobject *found; + + found = kset_find_obj(&devices_subsys.kset, kobj_name); + if (!found) + return NULL; + + return to_vio_dev(container_of(found, struct device, kobj)); +} + +/** + * vio_find_node - find an already-registered vio_dev + * @vnode: device_node of the virtual device we're looking for + */ +struct vio_dev *vio_find_node(struct device_node *vnode) +{ + uint32_t *unit_address; + char kobj_name[BUS_ID_SIZE]; + + /* construct the kobject name from the device node */ + unit_address = (uint32_t *)get_property(vnode, "reg", NULL); + if (!unit_address) + return NULL; + snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); + + return vio_find_name(kobj_name); +} +EXPORT_SYMBOL(vio_find_node); + +/** + * vio_build_iommu_table: - gets the dma information from OF and builds the TCE tree. + * @dev: the virtual device. + * + * Returns a pointer to the built tce tree, or NULL if it can't + * find property. +*/ +static struct iommu_table * vio_build_iommu_table(struct vio_dev *dev) +{ + unsigned int *dma_window; + struct iommu_table *newTceTable; + unsigned long offset; + int dma_window_property_size; + + dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size); + if(!dma_window) { + return NULL; + } + + newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + /* There should be some code to extract the phys-encoded offset + using prom_n_addr_cells(). However, according to a comment + on earlier versions, it's always zero, so we don't bother */ + offset = dma_window[1] >> PAGE_SHIFT; + + /* TCE table size - measured in tce entries */ + newTceTable->it_size = dma_window[4] >> PAGE_SHIFT; + /* offset for VIO should always be 0 */ + newTceTable->it_offset = offset; + newTceTable->it_busno = 0; + newTceTable->it_index = (unsigned long)dma_window[0]; + newTceTable->it_type = TCE_VB; + + return iommu_init_table(newTceTable); +} + +int vio_enable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); + if (rc != H_Success) { + printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); + } + return rc; +} +EXPORT_SYMBOL(vio_enable_interrupts); + +int vio_disable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); + if (rc != H_Success) { + printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); + } + return rc; +} +EXPORT_SYMBOL(vio_disable_interrupts); +#endif + +static dma_addr_t vio_map_single(struct device *dev, void *vaddr, + size_t size, enum dma_data_direction direction) +{ + return iommu_map_single(to_vio_dev(dev)->iommu_table, vaddr, size, + direction); +} + +static void vio_unmap_single(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + iommu_unmap_single(to_vio_dev(dev)->iommu_table, dma_handle, size, + direction); +} + +static int vio_map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + return iommu_map_sg(dev, to_vio_dev(dev)->iommu_table, sglist, + nelems, direction); +} + +static void vio_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + iommu_unmap_sg(to_vio_dev(dev)->iommu_table, sglist, nelems, direction); +} + +static void *vio_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, unsigned int __nocast flag) +{ + return iommu_alloc_coherent(to_vio_dev(dev)->iommu_table, size, + dma_handle, flag); +} + +static void vio_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + iommu_free_coherent(to_vio_dev(dev)->iommu_table, size, vaddr, + dma_handle); +} + +static int vio_dma_supported(struct device *dev, u64 mask) +{ + return 1; +} + +struct dma_mapping_ops vio_dma_ops = { + .alloc_coherent = vio_alloc_coherent, + .free_coherent = vio_free_coherent, + .map_single = vio_map_single, + .unmap_single = vio_unmap_single, + .map_sg = vio_map_sg, + .unmap_sg = vio_unmap_sg, + .dma_supported = vio_dma_supported, +}; + +static int vio_bus_match(struct device *dev, struct device_driver *drv) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + struct vio_driver *vio_drv = to_vio_driver(drv); + const struct vio_device_id *ids = vio_drv->id_table; + const struct vio_device_id *found_id; + + DBGENTER(); + + if (!ids) + return 0; + + found_id = vio_match_device(ids, vio_dev); + if (found_id) + return 1; + + return 0; +} + +struct bus_type vio_bus_type = { + .name = "vio", + .match = vio_bus_match, +}; + +EXPORT_SYMBOL(vio_bus_type); diff --git a/arch/ppc64/kernel/viopath.c b/arch/ppc64/kernel/viopath.c new file mode 100644 index 00000000000..2ed8ee07568 --- /dev/null +++ b/arch/ppc64/kernel/viopath.c @@ -0,0 +1,675 @@ +/* -*- linux-c -*- + * arch/ppc64/kernel/viopath.c + * + * iSeries Virtual I/O Message Path code + * + * Authors: Dave Boutcher + * Ryan Arnold + * Colin Devilbiss + * + * (C) Copyright 2000-2003 IBM Corporation + * + * This code is used by the iSeries virtual disk, cd, + * tape, and console to communicate with OS/400 in another + * partition. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) anyu later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Status of the path to each other partition in the system. + * This is overkill, since we will only ever establish connections + * to our hosting partition and the primary partition on the system. + * But this allows for other support in the future. + */ +static struct viopathStatus { + int isOpen; /* Did we open the path? */ + int isActive; /* Do we have a mon msg outstanding */ + int users[VIO_MAX_SUBTYPES]; + HvLpInstanceId mSourceInst; + HvLpInstanceId mTargetInst; + int numberAllocated; +} viopathStatus[HVMAXARCHITECTEDLPS]; + +static DEFINE_SPINLOCK(statuslock); + +/* + * For each kind of event we allocate a buffer that is + * guaranteed not to cross a page boundary + */ +static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] __page_aligned; +static atomic_t event_buffer_available[VIO_MAX_SUBTYPES]; +static int event_buffer_initialised; + +static void handleMonitorEvent(struct HvLpEvent *event); + +/* + * We use this structure to handle asynchronous responses. The caller + * blocks on the semaphore and the handler posts the semaphore. However, + * if system_state is not SYSTEM_RUNNING, then wait_atomic is used ... + */ +struct alloc_parms { + struct semaphore sem; + int number; + atomic_t wait_atomic; + int used_wait_atomic; +}; + +/* Put a sequence number in each mon msg. The value is not + * important. Start at something other than 0 just for + * readability. wrapping this is ok. + */ +static u8 viomonseq = 22; + +/* Our hosting logical partition. We get this at startup + * time, and different modules access this variable directly. + */ +HvLpIndex viopath_hostLp = HvLpIndexInvalid; +EXPORT_SYMBOL(viopath_hostLp); +HvLpIndex viopath_ourLp = HvLpIndexInvalid; +EXPORT_SYMBOL(viopath_ourLp); + +/* For each kind of incoming event we set a pointer to a + * routine to call. + */ +static vio_event_handler_t *vio_handler[VIO_MAX_SUBTYPES]; + +#define VIOPATH_KERN_WARN KERN_WARNING "viopath: " +#define VIOPATH_KERN_INFO KERN_INFO "viopath: " + +static int proc_viopath_show(struct seq_file *m, void *v) +{ + char *buf; + u16 vlanMap; + dma_addr_t handle; + HvLpEvent_Rc hvrc; + DECLARE_MUTEX_LOCKED(Semaphore); + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return 0; + memset(buf, 0, PAGE_SIZE); + + handle = dma_map_single(iSeries_vio_dev, buf, PAGE_SIZE, + DMA_FROM_DEVICE); + + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_config | vioconfigget, + HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst(viopath_hostLp), + viopath_targetinst(viopath_hostLp), + (u64)(unsigned long)&Semaphore, VIOVERSION << 16, + ((u64)handle) << 32, PAGE_SIZE, 0, 0); + + if (hvrc != HvLpEvent_Rc_Good) + printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc); + + down(&Semaphore); + + vlanMap = HvLpConfig_getVirtualLanIndexMap(); + + buf[PAGE_SIZE-1] = '\0'; + seq_printf(m, "%s", buf); + seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap); + seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n", + e2a(xItExtVpdPanel.mfgID[2]), + e2a(xItExtVpdPanel.mfgID[3]), + e2a(xItExtVpdPanel.systemSerial[1]), + e2a(xItExtVpdPanel.systemSerial[2]), + e2a(xItExtVpdPanel.systemSerial[3]), + e2a(xItExtVpdPanel.systemSerial[4]), + e2a(xItExtVpdPanel.systemSerial[5])); + + dma_unmap_single(iSeries_vio_dev, handle, PAGE_SIZE, DMA_FROM_DEVICE); + kfree(buf); + + return 0; +} + +static int proc_viopath_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_viopath_show, NULL); +} + +static struct file_operations proc_viopath_operations = { + .open = proc_viopath_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init vio_proc_init(void) +{ + struct proc_dir_entry *e; + + e = create_proc_entry("iSeries/config", 0, NULL); + if (e) + e->proc_fops = &proc_viopath_operations; + + return 0; +} +__initcall(vio_proc_init); + +/* See if a given LP is active. Allow for invalid lps to be passed in + * and just return invalid + */ +int viopath_isactive(HvLpIndex lp) +{ + if (lp == HvLpIndexInvalid) + return 0; + if (lp < HVMAXARCHITECTEDLPS) + return viopathStatus[lp].isActive; + else + return 0; +} +EXPORT_SYMBOL(viopath_isactive); + +/* + * We cache the source and target instance ids for each + * partition. + */ +HvLpInstanceId viopath_sourceinst(HvLpIndex lp) +{ + return viopathStatus[lp].mSourceInst; +} +EXPORT_SYMBOL(viopath_sourceinst); + +HvLpInstanceId viopath_targetinst(HvLpIndex lp) +{ + return viopathStatus[lp].mTargetInst; +} +EXPORT_SYMBOL(viopath_targetinst); + +/* + * Send a monitor message. This is a message with the acknowledge + * bit on that the other side will NOT explicitly acknowledge. When + * the other side goes down, the hypervisor will acknowledge any + * outstanding messages....so we will know when the other side dies. + */ +static void sendMonMsg(HvLpIndex remoteLp) +{ + HvLpEvent_Rc hvrc; + + viopathStatus[remoteLp].mSourceInst = + HvCallEvent_getSourceLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + viopathStatus[remoteLp].mTargetInst = + HvCallEvent_getTargetLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + + /* + * Deliberately ignore the return code here. if we call this + * more than once, we don't care. + */ + vio_setHandler(viomajorsubtype_monitor, handleMonitorEvent); + + hvrc = HvCallEvent_signalLpEventFast(remoteLp, HvLpEvent_Type_VirtualIo, + viomajorsubtype_monitor, HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_DeferredAck, + viopathStatus[remoteLp].mSourceInst, + viopathStatus[remoteLp].mTargetInst, + viomonseq++, 0, 0, 0, 0, 0); + + if (hvrc == HvLpEvent_Rc_Good) + viopathStatus[remoteLp].isActive = 1; + else { + printk(VIOPATH_KERN_WARN "could not connect to partition %d\n", + remoteLp); + viopathStatus[remoteLp].isActive = 0; + } +} + +static void handleMonitorEvent(struct HvLpEvent *event) +{ + HvLpIndex remoteLp; + int i; + + /* + * This handler is _also_ called as part of the loop + * at the end of this routine, so it must be able to + * ignore NULL events... + */ + if (!event) + return; + + /* + * First see if this is just a normal monitor message from the + * other partition + */ + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + remoteLp = event->xSourceLp; + if (!viopathStatus[remoteLp].isActive) + sendMonMsg(remoteLp); + return; + } + + /* + * This path is for an acknowledgement; the other partition + * died + */ + remoteLp = event->xTargetLp; + if ((event->xSourceInstanceId != viopathStatus[remoteLp].mSourceInst) || + (event->xTargetInstanceId != viopathStatus[remoteLp].mTargetInst)) { + printk(VIOPATH_KERN_WARN "ignoring ack....mismatched instances\n"); + return; + } + + printk(VIOPATH_KERN_WARN "partition %d ended\n", remoteLp); + + viopathStatus[remoteLp].isActive = 0; + + /* + * For each active handler, pass them a NULL + * message to indicate that the other partition + * died + */ + for (i = 0; i < VIO_MAX_SUBTYPES; i++) { + if (vio_handler[i] != NULL) + (*vio_handler[i])(NULL); + } +} + +int vio_setHandler(int subtype, vio_event_handler_t *beh) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + if (vio_handler[subtype] != NULL) + return -EBUSY; + vio_handler[subtype] = beh; + return 0; +} +EXPORT_SYMBOL(vio_setHandler); + +int vio_clearHandler(int subtype) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + if (vio_handler[subtype] == NULL) + return -EAGAIN; + vio_handler[subtype] = NULL; + return 0; +} +EXPORT_SYMBOL(vio_clearHandler); + +static void handleConfig(struct HvLpEvent *event) +{ + if (!event) + return; + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + printk(VIOPATH_KERN_WARN + "unexpected config request from partition %d", + event->xSourceLp); + + if ((event->xFlags.xFunction == HvLpEvent_Function_Int) && + (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + return; + } + + up((struct semaphore *)event->xCorrelationToken); +} + +/* + * Initialization of the hosting partition + */ +void vio_set_hostlp(void) +{ + /* + * If this has already been set then we DON'T want to either change + * it or re-register the proc file system + */ + if (viopath_hostLp != HvLpIndexInvalid) + return; + + /* + * Figure out our hosting partition. This isn't allowed to change + * while we're active + */ + viopath_ourLp = HvLpConfig_getLpIndex(); + viopath_hostLp = HvCallCfg_getHostingLpIndex(viopath_ourLp); + + if (viopath_hostLp != HvLpIndexInvalid) + vio_setHandler(viomajorsubtype_config, handleConfig); +} +EXPORT_SYMBOL(vio_set_hostlp); + +static void vio_handleEvent(struct HvLpEvent *event, struct pt_regs *regs) +{ + HvLpIndex remoteLp; + int subtype = (event->xSubtype & VIOMAJOR_SUBTYPE_MASK) + >> VIOMAJOR_SUBTYPE_SHIFT; + + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + remoteLp = event->xSourceLp; + /* + * The isActive is checked because if the hosting partition + * went down and came back up it would not be active but it + * would have different source and target instances, in which + * case we'd want to reset them. This case really protects + * against an unauthorized active partition sending interrupts + * or acks to this linux partition. + */ + if (viopathStatus[remoteLp].isActive + && (event->xSourceInstanceId != + viopathStatus[remoteLp].mTargetInst)) { + printk(VIOPATH_KERN_WARN + "message from invalid partition. " + "int msg rcvd, source inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mTargetInst, + event->xSourceInstanceId); + return; + } + + if (viopathStatus[remoteLp].isActive + && (event->xTargetInstanceId != + viopathStatus[remoteLp].mSourceInst)) { + printk(VIOPATH_KERN_WARN + "message from invalid partition. " + "int msg rcvd, target inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mSourceInst, + event->xTargetInstanceId); + return; + } + } else { + remoteLp = event->xTargetLp; + if (event->xSourceInstanceId != + viopathStatus[remoteLp].mSourceInst) { + printk(VIOPATH_KERN_WARN + "message from invalid partition. " + "ack msg rcvd, source inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mSourceInst, + event->xSourceInstanceId); + return; + } + + if (event->xTargetInstanceId != + viopathStatus[remoteLp].mTargetInst) { + printk(VIOPATH_KERN_WARN + "message from invalid partition. " + "viopath: ack msg rcvd, target inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mTargetInst, + event->xTargetInstanceId); + return; + } + } + + if (vio_handler[subtype] == NULL) { + printk(VIOPATH_KERN_WARN + "unexpected virtual io event subtype %d from partition %d\n", + event->xSubtype, remoteLp); + /* No handler. Ack if necessary */ + if ((event->xFlags.xFunction == HvLpEvent_Function_Int) && + (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + return; + } + + /* This innocuous little line is where all the real work happens */ + (*vio_handler[subtype])(event); +} + +static void viopath_donealloc(void *parm, int number) +{ + struct alloc_parms *parmsp = parm; + + parmsp->number = number; + if (parmsp->used_wait_atomic) + atomic_set(&parmsp->wait_atomic, 0); + else + up(&parmsp->sem); +} + +static int allocateEvents(HvLpIndex remoteLp, int numEvents) +{ + struct alloc_parms parms; + + if (system_state != SYSTEM_RUNNING) { + parms.used_wait_atomic = 1; + atomic_set(&parms.wait_atomic, 1); + } else { + parms.used_wait_atomic = 0; + init_MUTEX_LOCKED(&parms.sem); + } + mf_allocate_lp_events(remoteLp, HvLpEvent_Type_VirtualIo, 250, /* It would be nice to put a real number here! */ + numEvents, &viopath_donealloc, &parms); + if (system_state != SYSTEM_RUNNING) { + while (atomic_read(&parms.wait_atomic)) + mb(); + } else + down(&parms.sem); + return parms.number; +} + +int viopath_open(HvLpIndex remoteLp, int subtype, int numReq) +{ + int i; + unsigned long flags; + int tempNumAllocated; + + if ((remoteLp >= HvMaxArchitectedLps) || (remoteLp == HvLpIndexInvalid)) + return -EINVAL; + + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + + spin_lock_irqsave(&statuslock, flags); + + if (!event_buffer_initialised) { + for (i = 0; i < VIO_MAX_SUBTYPES; i++) + atomic_set(&event_buffer_available[i], 1); + event_buffer_initialised = 1; + } + + viopathStatus[remoteLp].users[subtype]++; + + if (!viopathStatus[remoteLp].isOpen) { + viopathStatus[remoteLp].isOpen = 1; + HvCallEvent_openLpEventPath(remoteLp, HvLpEvent_Type_VirtualIo); + + /* + * Don't hold the spinlock during an operation that + * can sleep. + */ + spin_unlock_irqrestore(&statuslock, flags); + tempNumAllocated = allocateEvents(remoteLp, 1); + spin_lock_irqsave(&statuslock, flags); + + viopathStatus[remoteLp].numberAllocated += tempNumAllocated; + + if (viopathStatus[remoteLp].numberAllocated == 0) { + HvCallEvent_closeLpEventPath(remoteLp, + HvLpEvent_Type_VirtualIo); + + spin_unlock_irqrestore(&statuslock, flags); + return -ENOMEM; + } + + viopathStatus[remoteLp].mSourceInst = + HvCallEvent_getSourceLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + viopathStatus[remoteLp].mTargetInst = + HvCallEvent_getTargetLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + HvLpEvent_registerHandler(HvLpEvent_Type_VirtualIo, + &vio_handleEvent); + sendMonMsg(remoteLp); + printk(VIOPATH_KERN_INFO "opening connection to partition %d, " + "setting sinst %d, tinst %d\n", + remoteLp, viopathStatus[remoteLp].mSourceInst, + viopathStatus[remoteLp].mTargetInst); + } + + spin_unlock_irqrestore(&statuslock, flags); + tempNumAllocated = allocateEvents(remoteLp, numReq); + spin_lock_irqsave(&statuslock, flags); + viopathStatus[remoteLp].numberAllocated += tempNumAllocated; + spin_unlock_irqrestore(&statuslock, flags); + + return 0; +} +EXPORT_SYMBOL(viopath_open); + +int viopath_close(HvLpIndex remoteLp, int subtype, int numReq) +{ + unsigned long flags; + int i; + int numOpen; + struct alloc_parms parms; + + if ((remoteLp >= HvMaxArchitectedLps) || (remoteLp == HvLpIndexInvalid)) + return -EINVAL; + + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + + spin_lock_irqsave(&statuslock, flags); + /* + * If the viopath_close somehow gets called before a + * viopath_open it could decrement to -1 which is a non + * recoverable state so we'll prevent this from + * happening. + */ + if (viopathStatus[remoteLp].users[subtype] > 0) + viopathStatus[remoteLp].users[subtype]--; + + spin_unlock_irqrestore(&statuslock, flags); + + parms.used_wait_atomic = 0; + init_MUTEX_LOCKED(&parms.sem); + mf_deallocate_lp_events(remoteLp, HvLpEvent_Type_VirtualIo, + numReq, &viopath_donealloc, &parms); + down(&parms.sem); + + spin_lock_irqsave(&statuslock, flags); + for (i = 0, numOpen = 0; i < VIO_MAX_SUBTYPES; i++) + numOpen += viopathStatus[remoteLp].users[i]; + + if ((viopathStatus[remoteLp].isOpen) && (numOpen == 0)) { + printk(VIOPATH_KERN_INFO "closing connection to partition %d", + remoteLp); + + HvCallEvent_closeLpEventPath(remoteLp, + HvLpEvent_Type_VirtualIo); + viopathStatus[remoteLp].isOpen = 0; + viopathStatus[remoteLp].isActive = 0; + + for (i = 0; i < VIO_MAX_SUBTYPES; i++) + atomic_set(&event_buffer_available[i], 0); + event_buffer_initialised = 0; + } + spin_unlock_irqrestore(&statuslock, flags); + return 0; +} +EXPORT_SYMBOL(viopath_close); + +void *vio_get_event_buffer(int subtype) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return NULL; + + if (atomic_dec_if_positive(&event_buffer_available[subtype]) == 0) + return &event_buffer[subtype * 256]; + else + return NULL; +} +EXPORT_SYMBOL(vio_get_event_buffer); + +void vio_free_event_buffer(int subtype, void *buffer) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) { + printk(VIOPATH_KERN_WARN + "unexpected subtype %d freeing event buffer\n", subtype); + return; + } + + if (atomic_read(&event_buffer_available[subtype]) != 0) { + printk(VIOPATH_KERN_WARN + "freeing unallocated event buffer, subtype %d\n", + subtype); + return; + } + + if (buffer != &event_buffer[subtype * 256]) { + printk(VIOPATH_KERN_WARN + "freeing invalid event buffer, subtype %d\n", subtype); + } + + atomic_set(&event_buffer_available[subtype], 1); +} +EXPORT_SYMBOL(vio_free_event_buffer); + +static const struct vio_error_entry vio_no_error = + { 0, 0, "Non-VIO Error" }; +static const struct vio_error_entry vio_unknown_error = + { 0, EIO, "Unknown Error" }; + +static const struct vio_error_entry vio_default_errors[] = { + {0x0001, EIO, "No Connection"}, + {0x0002, EIO, "No Receiver"}, + {0x0003, EIO, "No Buffer Available"}, + {0x0004, EBADRQC, "Invalid Message Type"}, + {0x0000, 0, NULL}, +}; + +const struct vio_error_entry *vio_lookup_rc( + const struct vio_error_entry *local_table, u16 rc) +{ + const struct vio_error_entry *cur; + + if (!rc) + return &vio_no_error; + if (local_table) + for (cur = local_table; cur->rc; ++cur) + if (cur->rc == rc) + return cur; + for (cur = vio_default_errors; cur->rc; ++cur) + if (cur->rc == rc) + return cur; + return &vio_unknown_error; +} +EXPORT_SYMBOL(vio_lookup_rc); diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S new file mode 100644 index 00000000000..4103cc13f8d --- /dev/null +++ b/arch/ppc64/kernel/vmlinux.lds.S @@ -0,0 +1,145 @@ +#include + +OUTPUT_ARCH(powerpc:common64) +jiffies = jiffies_64; +SECTIONS +{ + /* Sections to be discarded. */ + /DISCARD/ : { + *(.exitcall.exit) + } + + + /* Read-only sections, merged into text segment: */ + .text : { + *(.text .text.*) + SCHED_TEXT + LOCK_TEXT + *(.fixup) + . = ALIGN(4096); + _etext = .; + } + + __ex_table : { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } + + __bug_table : { + __start___bug_table = .; + *(__bug_table) + __stop___bug_table = .; + } + + __ftr_fixup : { + __start___ftr_fixup = .; + *(__ftr_fixup) + __stop___ftr_fixup = .; + } + + RODATA + + + /* will be freed after init */ + . = ALIGN(4096); + __init_begin = .; + + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } + + .init.data : { + *(.init.data) + } + + . = ALIGN(16); + .init.setup : { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } + + .initcall.init : { + __initcall_start = .; + *(.initcall1.init) + *(.initcall2.init) + *(.initcall3.init) + *(.initcall4.init) + *(.initcall5.init) + *(.initcall6.init) + *(.initcall7.init) + __initcall_end = .; + } + + .con_initcall.init : { + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + } + + SECURITY_INIT + + . = ALIGN(4096); + .init.ramfs : { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } + + .data.percpu : { + __per_cpu_start = .; + *(.data.percpu) + __per_cpu_end = .; + } + + . = ALIGN(16384); + __init_end = .; + /* freed after init ends here */ + + + /* Read/write sections */ + . = ALIGN(16384); + /* The initial task and kernel stack */ + .data.init_task : { + *(.data.init_task) + } + + .data.page_aligned : { + *(.data.page_aligned) + } + + .data.cacheline_aligned : { + *(.data.cacheline_aligned) + } + + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) + } + + .opd : { + *(.opd) + } + + .got : { + __toc_start = .; + *(.got) + *(.toc) + . = ALIGN(4096); + _edata = .; + } + + + . = ALIGN(4096); + .bss : { + __bss_start = .; + *(.bss) + __bss_stop = .; + } + + . = ALIGN(4096); + _end = . ; +} diff --git a/arch/ppc64/kernel/xics.c b/arch/ppc64/kernel/xics.c new file mode 100644 index 00000000000..eedd1d3c2a1 --- /dev/null +++ b/arch/ppc64/kernel/xics.c @@ -0,0 +1,713 @@ +/* + * arch/ppc64/kernel/xics.c + * + * Copyright 2000 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "i8259.h" + +static unsigned int xics_startup(unsigned int irq); +static void xics_enable_irq(unsigned int irq); +static void xics_disable_irq(unsigned int irq); +static void xics_mask_and_ack_irq(unsigned int irq); +static void xics_end_irq(unsigned int irq); +static void xics_set_affinity(unsigned int irq_nr, cpumask_t cpumask); + +struct hw_interrupt_type xics_pic = { + .typename = " XICS ", + .startup = xics_startup, + .enable = xics_enable_irq, + .disable = xics_disable_irq, + .ack = xics_mask_and_ack_irq, + .end = xics_end_irq, + .set_affinity = xics_set_affinity +}; + +struct hw_interrupt_type xics_8259_pic = { + .typename = " XICS/8259", + .ack = xics_mask_and_ack_irq, +}; + +/* This is used to map real irq numbers to virtual */ +static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC); + +#define XICS_IPI 2 +#define XICS_IRQ_SPURIOUS 0 + +/* Want a priority other than 0. Various HW issues require this. */ +#define DEFAULT_PRIORITY 5 + +/* + * Mark IPIs as higher priority so we can take them inside interrupts that + * arent marked SA_INTERRUPT + */ +#define IPI_PRIORITY 4 + +struct xics_ipl { + union { + u32 word; + u8 bytes[4]; + } xirr_poll; + union { + u32 word; + u8 bytes[4]; + } xirr; + u32 dummy; + union { + u32 word; + u8 bytes[4]; + } qirr; +}; + +static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS]; + +static int xics_irq_8259_cascade = 0; +static int xics_irq_8259_cascade_real = 0; +static unsigned int default_server = 0xFF; +/* also referenced in smp.c... */ +unsigned int default_distrib_server = 0; +unsigned int interrupt_server_size = 8; + +/* + * XICS only has a single IPI, so encode the messages per CPU + */ +struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned; + +/* RTAS service tokens */ +int ibm_get_xive; +int ibm_set_xive; +int ibm_int_on; +int ibm_int_off; + +typedef struct { + int (*xirr_info_get)(int cpu); + void (*xirr_info_set)(int cpu, int val); + void (*cppr_info)(int cpu, u8 val); + void (*qirr_info)(int cpu, u8 val); +} xics_ops; + + +/* SMP */ + +static int pSeries_xirr_info_get(int n_cpu) +{ + return in_be32(&xics_per_cpu[n_cpu]->xirr.word); +} + +static void pSeries_xirr_info_set(int n_cpu, int value) +{ + out_be32(&xics_per_cpu[n_cpu]->xirr.word, value); +} + +static void pSeries_cppr_info(int n_cpu, u8 value) +{ + out_8(&xics_per_cpu[n_cpu]->xirr.bytes[0], value); +} + +static void pSeries_qirr_info(int n_cpu, u8 value) +{ + out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value); +} + +static xics_ops pSeries_ops = { + pSeries_xirr_info_get, + pSeries_xirr_info_set, + pSeries_cppr_info, + pSeries_qirr_info +}; + +static xics_ops *ops = &pSeries_ops; + + +/* LPAR */ + +static inline long plpar_eoi(unsigned long xirr) +{ + return plpar_hcall_norets(H_EOI, xirr); +} + +static inline long plpar_cppr(unsigned long cppr) +{ + return plpar_hcall_norets(H_CPPR, cppr); +} + +static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr) +{ + return plpar_hcall_norets(H_IPI, servernum, mfrr); +} + +static inline long plpar_xirr(unsigned long *xirr_ret) +{ + unsigned long dummy; + return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy); +} + +static int pSeriesLP_xirr_info_get(int n_cpu) +{ + unsigned long lpar_rc; + unsigned long return_value; + + lpar_rc = plpar_xirr(&return_value); + if (lpar_rc != H_Success) + panic(" bad return code xirr - rc = %lx \n", lpar_rc); + return (int)return_value; +} + +static void pSeriesLP_xirr_info_set(int n_cpu, int value) +{ + unsigned long lpar_rc; + unsigned long val64 = value & 0xffffffff; + + lpar_rc = plpar_eoi(val64); + if (lpar_rc != H_Success) + panic("bad return code EOI - rc = %ld, value=%lx\n", lpar_rc, + val64); +} + +void pSeriesLP_cppr_info(int n_cpu, u8 value) +{ + unsigned long lpar_rc; + + lpar_rc = plpar_cppr(value); + if (lpar_rc != H_Success) + panic("bad return code cppr - rc = %lx\n", lpar_rc); +} + +static void pSeriesLP_qirr_info(int n_cpu , u8 value) +{ + unsigned long lpar_rc; + + lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu), value); + if (lpar_rc != H_Success) + panic("bad return code qirr - rc = %lx\n", lpar_rc); +} + +xics_ops pSeriesLP_ops = { + pSeriesLP_xirr_info_get, + pSeriesLP_xirr_info_set, + pSeriesLP_cppr_info, + pSeriesLP_qirr_info +}; + +static unsigned int xics_startup(unsigned int virq) +{ + unsigned int irq; + + irq = irq_offset_down(virq); + if (radix_tree_insert(&irq_map, virt_irq_to_real(irq), + &virt_irq_to_real_map[irq]) == -ENOMEM) + printk(KERN_CRIT "Out of memory creating real -> virtual" + " IRQ mapping for irq %u (real 0x%x)\n", + virq, virt_irq_to_real(irq)); + xics_enable_irq(virq); + return 0; /* return value is ignored */ +} + +static unsigned int real_irq_to_virt(unsigned int real_irq) +{ + unsigned int *ptr; + + ptr = radix_tree_lookup(&irq_map, real_irq); + if (ptr == NULL) + return NO_IRQ; + return ptr - virt_irq_to_real_map; +} + +#ifdef CONFIG_SMP +static int get_irq_server(unsigned int irq) +{ + unsigned int server; + /* For the moment only implement delivery to all cpus or one cpu */ + cpumask_t cpumask = irq_affinity[irq]; + cpumask_t tmp = CPU_MASK_NONE; + + if (!distribute_irqs) + return default_server; + + if (cpus_equal(cpumask, CPU_MASK_ALL)) { + server = default_distrib_server; + } else { + cpus_and(tmp, cpu_online_map, cpumask); + + if (cpus_empty(tmp)) + server = default_distrib_server; + else + server = get_hard_smp_processor_id(first_cpu(tmp)); + } + + return server; + +} +#else +static int get_irq_server(unsigned int irq) +{ + return default_server; +} +#endif + +static void xics_enable_irq(unsigned int virq) +{ + unsigned int irq; + int call_status; + unsigned int server; + + irq = virt_irq_to_real(irq_offset_down(virq)); + if (irq == XICS_IPI) + return; + + server = get_irq_server(virq); + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, + DEFAULT_PRIORITY); + if (call_status != 0) { + printk(KERN_ERR "xics_enable_irq: irq=%d: ibm_set_xive " + "returned %x\n", irq, call_status); + return; + } + + /* Now unmask the interrupt (often a no-op) */ + call_status = rtas_call(ibm_int_on, 1, 1, NULL, irq); + if (call_status != 0) { + printk(KERN_ERR "xics_enable_irq: irq=%d: ibm_int_on " + "returned %x\n", irq, call_status); + return; + } +} + +static void xics_disable_real_irq(unsigned int irq) +{ + int call_status; + unsigned int server; + + if (irq == XICS_IPI) + return; + + call_status = rtas_call(ibm_int_off, 1, 1, NULL, irq); + if (call_status != 0) { + printk(KERN_ERR "xics_disable_real_irq: irq=%d: " + "ibm_int_off returned %x\n", irq, call_status); + return; + } + + server = get_irq_server(irq); + /* Have to set XIVE to 0xff to be able to remove a slot */ + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, 0xff); + if (call_status != 0) { + printk(KERN_ERR "xics_disable_irq: irq=%d: ibm_set_xive(0xff)" + " returned %x\n", irq, call_status); + return; + } +} + +static void xics_disable_irq(unsigned int virq) +{ + unsigned int irq; + + irq = virt_irq_to_real(irq_offset_down(virq)); + xics_disable_real_irq(irq); +} + +static void xics_end_irq(unsigned int irq) +{ + int cpu = smp_processor_id(); + + iosync(); + ops->xirr_info_set(cpu, ((0xff << 24) | + (virt_irq_to_real(irq_offset_down(irq))))); + +} + +static void xics_mask_and_ack_irq(unsigned int irq) +{ + int cpu = smp_processor_id(); + + if (irq < irq_offset_value()) { + i8259_pic.ack(irq); + iosync(); + ops->xirr_info_set(cpu, ((0xff<<24) | + xics_irq_8259_cascade_real)); + iosync(); + } +} + +int xics_get_irq(struct pt_regs *regs) +{ + unsigned int cpu = smp_processor_id(); + unsigned int vec; + int irq; + + vec = ops->xirr_info_get(cpu); + /* (vec >> 24) == old priority */ + vec &= 0x00ffffff; + + /* for sanity, this had better be < NR_IRQS - 16 */ + if (vec == xics_irq_8259_cascade_real) { + irq = i8259_irq(cpu); + if (irq == -1) { + /* Spurious cascaded interrupt. Still must ack xics */ + xics_end_irq(irq_offset_up(xics_irq_8259_cascade)); + + irq = -1; + } + } else if (vec == XICS_IRQ_SPURIOUS) { + irq = -1; + } else { + irq = real_irq_to_virt(vec); + if (irq == NO_IRQ) + irq = real_irq_to_virt_slowpath(vec); + if (irq == NO_IRQ) { + printk(KERN_ERR "Interrupt %d (real) is invalid," + " disabling it.\n", vec); + xics_disable_real_irq(vec); + } else + irq = irq_offset_up(irq); + } + return irq; +} + +#ifdef CONFIG_SMP + +irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + ops->qirr_info(cpu, 0xff); + + WARN_ON(cpu_is_offline(cpu)); + + while (xics_ipi_message[cpu].value) { + if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, + &xics_ipi_message[cpu].value)) { + mb(); + smp_message_recv(PPC_MSG_CALL_FUNCTION, regs); + } + if (test_and_clear_bit(PPC_MSG_RESCHEDULE, + &xics_ipi_message[cpu].value)) { + mb(); + smp_message_recv(PPC_MSG_RESCHEDULE, regs); + } +#if 0 + if (test_and_clear_bit(PPC_MSG_MIGRATE_TASK, + &xics_ipi_message[cpu].value)) { + mb(); + smp_message_recv(PPC_MSG_MIGRATE_TASK, regs); + } +#endif +#ifdef CONFIG_DEBUGGER + if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, + &xics_ipi_message[cpu].value)) { + mb(); + smp_message_recv(PPC_MSG_DEBUGGER_BREAK, regs); + } +#endif + } + return IRQ_HANDLED; +} + +void xics_cause_IPI(int cpu) +{ + ops->qirr_info(cpu, IPI_PRIORITY); +} + +void xics_setup_cpu(void) +{ + int cpu = smp_processor_id(); + + ops->cppr_info(cpu, 0xff); + iosync(); +} + +#endif /* CONFIG_SMP */ + +void xics_init_IRQ(void) +{ + int i; + unsigned long intr_size = 0; + struct device_node *np; + uint *ireg, ilen, indx = 0; + unsigned long intr_base = 0; + struct xics_interrupt_node { + unsigned long addr; + unsigned long size; + } intnodes[NR_CPUS]; + + ppc64_boot_msg(0x20, "XICS Init"); + + ibm_get_xive = rtas_token("ibm,get-xive"); + ibm_set_xive = rtas_token("ibm,set-xive"); + ibm_int_on = rtas_token("ibm,int-on"); + ibm_int_off = rtas_token("ibm,int-off"); + + np = of_find_node_by_type(NULL, "PowerPC-External-Interrupt-Presentation"); + if (!np) + panic("xics_init_IRQ: can't find interrupt presentation"); + +nextnode: + ireg = (uint *)get_property(np, "ibm,interrupt-server-ranges", NULL); + if (ireg) { + /* + * set node starting index for this node + */ + indx = *ireg; + } + + ireg = (uint *)get_property(np, "reg", &ilen); + if (!ireg) + panic("xics_init_IRQ: can't find interrupt reg property"); + + while (ilen) { + intnodes[indx].addr = (unsigned long)*ireg++ << 32; + ilen -= sizeof(uint); + intnodes[indx].addr |= *ireg++; + ilen -= sizeof(uint); + intnodes[indx].size = (unsigned long)*ireg++ << 32; + ilen -= sizeof(uint); + intnodes[indx].size |= *ireg++; + ilen -= sizeof(uint); + indx++; + if (indx >= NR_CPUS) break; + } + + np = of_find_node_by_type(np, "PowerPC-External-Interrupt-Presentation"); + if ((indx < NR_CPUS) && np) goto nextnode; + + /* Find the server numbers for the boot cpu. */ + for (np = of_find_node_by_type(NULL, "cpu"); + np; + np = of_find_node_by_type(np, "cpu")) { + ireg = (uint *)get_property(np, "reg", &ilen); + if (ireg && ireg[0] == boot_cpuid_phys) { + ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s", + &ilen); + i = ilen / sizeof(int); + if (ireg && i > 0) { + default_server = ireg[0]; + default_distrib_server = ireg[i-1]; /* take last element */ + } + ireg = (uint *)get_property(np, + "ibm,interrupt-server#-size", NULL); + if (ireg) + interrupt_server_size = *ireg; + break; + } + } + of_node_put(np); + + intr_base = intnodes[0].addr; + intr_size = intnodes[0].size; + + np = of_find_node_by_type(NULL, "interrupt-controller"); + if (!np) { + printk(KERN_WARNING "xics: no ISA interrupt controller\n"); + xics_irq_8259_cascade_real = -1; + xics_irq_8259_cascade = -1; + } else { + ireg = (uint *) get_property(np, "interrupts", NULL); + if (!ireg) + panic("xics_init_IRQ: can't find ISA interrupts property"); + + xics_irq_8259_cascade_real = *ireg; + xics_irq_8259_cascade + = virt_irq_create_mapping(xics_irq_8259_cascade_real); + of_node_put(np); + } + + if (systemcfg->platform == PLATFORM_PSERIES) { +#ifdef CONFIG_SMP + for_each_cpu(i) { + int hard_id; + + /* FIXME: Do this dynamically! --RR */ + if (!cpu_present(i)) + continue; + + hard_id = get_hard_smp_processor_id(i); + xics_per_cpu[i] = ioremap(intnodes[hard_id].addr, + intnodes[hard_id].size); + } +#else + xics_per_cpu[0] = ioremap(intr_base, intr_size); +#endif /* CONFIG_SMP */ + } else if (systemcfg->platform == PLATFORM_PSERIES_LPAR) { + ops = &pSeriesLP_ops; + } + + xics_8259_pic.enable = i8259_pic.enable; + xics_8259_pic.disable = i8259_pic.disable; + for (i = 0; i < 16; ++i) + get_irq_desc(i)->handler = &xics_8259_pic; + for (; i < NR_IRQS; ++i) + get_irq_desc(i)->handler = &xics_pic; + + ops->cppr_info(boot_cpuid, 0xff); + iosync(); + + ppc64_boot_msg(0x21, "XICS Done"); +} + +/* + * We cant do this in init_IRQ because we need the memory subsystem up for + * request_irq() + */ +static int __init xics_setup_i8259(void) +{ + if (ppc64_interrupt_controller == IC_PPC_XIC && + xics_irq_8259_cascade != -1) { + if (request_irq(irq_offset_up(xics_irq_8259_cascade), + no_action, 0, "8259 cascade", NULL)) + printk(KERN_ERR "xics_setup_i8259: couldn't get 8259 " + "cascade\n"); + i8259_init(0); + } + return 0; +} +arch_initcall(xics_setup_i8259); + +#ifdef CONFIG_SMP +void xics_request_IPIs(void) +{ + virt_irq_to_real_map[XICS_IPI] = XICS_IPI; + + /* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */ + request_irq(irq_offset_up(XICS_IPI), xics_ipi_action, SA_INTERRUPT, + "IPI", NULL); + get_irq_desc(irq_offset_up(XICS_IPI))->status |= IRQ_PER_CPU; +} +#endif + +static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) +{ + unsigned int irq; + int status; + int xics_status[2]; + unsigned long newmask; + cpumask_t tmp = CPU_MASK_NONE; + + irq = virt_irq_to_real(irq_offset_down(virq)); + if (irq == XICS_IPI || irq == NO_IRQ) + return; + + status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); + + if (status) { + printk(KERN_ERR "xics_set_affinity: irq=%d ibm,get-xive " + "returns %d\n", irq, status); + return; + } + + /* For the moment only implement delivery to all cpus or one cpu */ + if (cpus_equal(cpumask, CPU_MASK_ALL)) { + newmask = default_distrib_server; + } else { + cpus_and(tmp, cpu_online_map, cpumask); + if (cpus_empty(tmp)) + return; + newmask = get_hard_smp_processor_id(first_cpu(tmp)); + } + + status = rtas_call(ibm_set_xive, 3, 1, NULL, + irq, newmask, xics_status[1]); + + if (status) { + printk(KERN_ERR "xics_set_affinity: irq=%d ibm,set-xive " + "returns %d\n", irq, status); + return; + } +} + +#ifdef CONFIG_HOTPLUG_CPU + +/* Interrupts are disabled. */ +void xics_migrate_irqs_away(void) +{ + int status; + unsigned int irq, virq, cpu = smp_processor_id(); + + /* Reject any interrupt that was queued to us... */ + ops->cppr_info(cpu, 0); + iosync(); + + /* remove ourselves from the global interrupt queue */ + status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, + (1UL << interrupt_server_size) - 1 - default_distrib_server, 0); + WARN_ON(status < 0); + + /* Allow IPIs again... */ + ops->cppr_info(cpu, DEFAULT_PRIORITY); + iosync(); + + for_each_irq(virq) { + irq_desc_t *desc; + int xics_status[2]; + unsigned long flags; + + /* We cant set affinity on ISA interrupts */ + if (virq < irq_offset_value()) + continue; + + desc = get_irq_desc(virq); + irq = virt_irq_to_real(irq_offset_down(virq)); + + /* We need to get IPIs still. */ + if (irq == XICS_IPI || irq == NO_IRQ) + continue; + + /* We only need to migrate enabled IRQS */ + if (desc == NULL || desc->handler == NULL + || desc->action == NULL + || desc->handler->set_affinity == NULL) + continue; + + spin_lock_irqsave(&desc->lock, flags); + + status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); + if (status) { + printk(KERN_ERR "migrate_irqs_away: irq=%d " + "ibm,get-xive returns %d\n", + virq, status); + goto unlock; + } + + /* + * We only support delivery to all cpus or to one cpu. + * The irq has to be migrated only in the single cpu + * case. + */ + if (xics_status[0] != get_hard_smp_processor_id(cpu)) + goto unlock; + + printk(KERN_WARNING "IRQ %d affinity broken off cpu %u\n", + virq, cpu); + + /* Reset affinity to all cpus */ + desc->handler->set_affinity(virq, CPU_MASK_ALL); + irq_affinity[virq] = CPU_MASK_ALL; +unlock: + spin_unlock_irqrestore(&desc->lock, flags); + } +} +#endif diff --git a/arch/ppc64/lib/Makefile b/arch/ppc64/lib/Makefile new file mode 100644 index 00000000000..bf7b5bbfc04 --- /dev/null +++ b/arch/ppc64/lib/Makefile @@ -0,0 +1,18 @@ +# +# Makefile for ppc64-specific library files.. +# + +lib-y := checksum.o dec_and_lock.o string.o strcase.o +lib-y += copypage.o memcpy.o copyuser.o usercopy.o + +# Lock primitives are defined as no-ops in include/linux/spinlock.h +# for non-SMP configs. Don't build the real versions. + +lib-$(CONFIG_SMP) += locks.o + +# e2a provides EBCDIC to ASCII conversions. +ifdef CONFIG_PPC_ISERIES +obj-$(CONFIG_PCI) += e2a.o +endif + +lib-$(CONFIG_DEBUG_KERNEL) += sstep.o diff --git a/arch/ppc64/lib/checksum.S b/arch/ppc64/lib/checksum.S new file mode 100644 index 00000000000..ef96c6c58ef --- /dev/null +++ b/arch/ppc64/lib/checksum.S @@ -0,0 +1,229 @@ +/* + * This file contains assembly-language implementations + * of IP-style 1's complement checksum routines. + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). + */ + +#include +#include +#include +#include + +/* + * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header + * len is in words and is always >= 5. + * + * In practice len == 5, but this is not guaranteed. So this code does not + * attempt to use doubleword instructions. + */ +_GLOBAL(ip_fast_csum) + lwz r0,0(r3) + lwzu r5,4(r3) + addic. r4,r4,-2 + addc r0,r0,r5 + mtctr r4 + blelr- +1: lwzu r4,4(r3) + adde r0,r0,r4 + bdnz 1b + addze r0,r0 /* add in final carry */ + rldicl r4,r0,32,0 /* fold two 32-bit halves together */ + add r0,r0,r4 + srdi r0,r0,32 + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * Compute checksum of TCP or UDP pseudo-header: + * csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum) + * No real gain trying to do this specially for 64 bit, but + * the 32 bit addition may spill into the upper bits of + * the doubleword so we still must fold it down from 64. + */ +_GLOBAL(csum_tcpudp_magic) + rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ + addc r0,r3,r4 /* add 4 32-bit words together */ + adde r0,r0,r5 + adde r0,r0,r7 + rldicl r4,r0,32,0 /* fold 64 bit value */ + add r0,r4,r0 + srdi r0,r0,32 + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * Computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit). + * + * This code assumes at least halfword alignment, though the length + * can be any number of bytes. The sum is accumulated in r5. + * + * csum_partial(r3=buff, r4=len, r5=sum) + */ +_GLOBAL(csum_partial) + subi r3,r3,8 /* we'll offset by 8 for the loads */ + srdi. r6,r4,3 /* divide by 8 for doubleword count */ + addic r5,r5,0 /* clear carry */ + beq 3f /* if we're doing < 8 bytes */ + andi. r0,r3,2 /* aligned on a word boundary already? */ + beq+ 1f + lhz r6,8(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r4,r4,2 + addc r5,r5,r6 + srdi. r6,r4,3 /* recompute number of doublewords */ + beq 3f /* any left? */ +1: mtctr r6 +2: ldu r6,8(r3) /* main sum loop */ + adde r5,r5,r6 + bdnz 2b + andi. r4,r4,7 /* compute bytes left to sum after doublewords */ +3: cmpwi 0,r4,4 /* is at least a full word left? */ + blt 4f + lwz r6,8(r3) /* sum this word */ + addi r3,r3,4 + subi r4,r4,4 + adde r5,r5,r6 +4: cmpwi 0,r4,2 /* is at least a halfword left? */ + blt+ 5f + lhz r6,8(r3) /* sum this halfword */ + addi r3,r3,2 + subi r4,r4,2 + adde r5,r5,r6 +5: cmpwi 0,r4,1 /* is at least a byte left? */ + bne+ 6f + lbz r6,8(r3) /* sum this byte */ + slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */ + adde r5,r5,r6 +6: addze r5,r5 /* add in final carry */ + rldicl r4,r5,32,0 /* fold two 32-bit halves together */ + add r3,r4,r5 + srdi r3,r3,32 + blr + +/* + * Computes the checksum of a memory block at src, length len, + * and adds in "sum" (32-bit), while copying the block to dst. + * If an access exception occurs on src or dst, it stores -EFAULT + * to *src_err or *dst_err respectively, and (for an error on + * src) zeroes the rest of dst. + * + * This code needs to be reworked to take advantage of 64 bit sum+copy. + * However, due to tokenring halfword alignment problems this will be very + * tricky. For now we'll leave it until we instrument it somehow. + * + * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) + */ +_GLOBAL(csum_partial_copy_generic) + addic r0,r6,0 + subi r3,r3,4 + subi r4,r4,4 + srwi. r6,r5,2 + beq 3f /* if we're doing < 4 bytes */ + andi. r9,r4,2 /* Align dst to longword boundary */ + beq+ 1f +81: lhz r6,4(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r5,r5,2 +91: sth r6,4(r4) + addi r4,r4,2 + addc r0,r0,r6 + srwi. r6,r5,2 /* # words to do */ + beq 3f +1: mtctr r6 +82: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */ +92: stwu r6,4(r4) /* be unnecessary to unroll this loop */ + adde r0,r0,r6 + bdnz 82b + andi. r5,r5,3 +3: cmpwi 0,r5,2 + blt+ 4f +83: lhz r6,4(r3) + addi r3,r3,2 + subi r5,r5,2 +93: sth r6,4(r4) + addi r4,r4,2 + adde r0,r0,r6 +4: cmpwi 0,r5,1 + bne+ 5f +84: lbz r6,4(r3) +94: stb r6,4(r4) + slwi r6,r6,8 /* Upper byte of word */ + adde r0,r0,r6 +5: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ + rldicl r4,r3,32,0 /* fold 64 bit value */ + add r3,r4,r3 + srdi r3,r3,32 + blr + +/* These shouldn't go in the fixup section, since that would + cause the ex_table addresses to get out of order. */ + + .globl src_error_1 +src_error_1: + li r6,0 + subi r5,r5,2 +95: sth r6,4(r4) + addi r4,r4,2 + srwi. r6,r5,2 + beq 3f + mtctr r6 + .globl src_error_2 +src_error_2: + li r6,0 +96: stwu r6,4(r4) + bdnz 96b +3: andi. r5,r5,3 + beq src_error + .globl src_error_3 +src_error_3: + li r6,0 + mtctr r5 + addi r4,r4,3 +97: stbu r6,1(r4) + bdnz 97b + .globl src_error +src_error: + cmpdi 0,r7,0 + beq 1f + li r6,-EFAULT + stw r6,0(r7) +1: addze r3,r0 + blr + + .globl dst_error +dst_error: + cmpdi 0,r8,0 + beq 1f + li r6,-EFAULT + stw r6,0(r8) +1: addze r3,r0 + blr + +.section __ex_table,"a" + .align 3 + .llong 81b,src_error_1 + .llong 91b,dst_error + .llong 82b,src_error_2 + .llong 92b,dst_error + .llong 83b,src_error_3 + .llong 93b,dst_error + .llong 84b,src_error_3 + .llong 94b,dst_error + .llong 95b,dst_error + .llong 96b,dst_error + .llong 97b,dst_error diff --git a/arch/ppc64/lib/copypage.S b/arch/ppc64/lib/copypage.S new file mode 100644 index 00000000000..733d61618bb --- /dev/null +++ b/arch/ppc64/lib/copypage.S @@ -0,0 +1,121 @@ +/* + * arch/ppc64/lib/copypage.S + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include + +_GLOBAL(copy_page) + std r31,-8(1) + std r30,-16(1) + std r29,-24(1) + std r28,-32(1) + std r27,-40(1) + std r26,-48(1) + std r25,-56(1) + std r24,-64(1) + std r23,-72(1) + std r22,-80(1) + std r21,-88(1) + std r20,-96(1) + li r5,4096/32 - 1 + addi r3,r3,-8 + li r12,5 +0: addi r5,r5,-24 + mtctr r12 + ld r22,640(4) + ld r21,512(4) + ld r20,384(4) + ld r11,256(4) + ld r9,128(4) + ld r7,0(4) + ld r25,648(4) + ld r24,520(4) + ld r23,392(4) + ld r10,264(4) + ld r8,136(4) + ldu r6,8(4) + cmpwi r5,24 +1: std r22,648(3) + std r21,520(3) + std r20,392(3) + std r11,264(3) + std r9,136(3) + std r7,8(3) + ld r28,648(4) + ld r27,520(4) + ld r26,392(4) + ld r31,264(4) + ld r30,136(4) + ld r29,8(4) + std r25,656(3) + std r24,528(3) + std r23,400(3) + std r10,272(3) + std r8,144(3) + std r6,16(3) + ld r22,656(4) + ld r21,528(4) + ld r20,400(4) + ld r11,272(4) + ld r9,144(4) + ld r7,16(4) + std r28,664(3) + std r27,536(3) + std r26,408(3) + std r31,280(3) + std r30,152(3) + stdu r29,24(3) + ld r25,664(4) + ld r24,536(4) + ld r23,408(4) + ld r10,280(4) + ld r8,152(4) + ldu r6,24(4) + bdnz 1b + std r22,648(3) + std r21,520(3) + std r20,392(3) + std r11,264(3) + std r9,136(3) + std r7,8(3) + addi r4,r4,640 + addi r3,r3,648 + bge 0b + mtctr r5 + ld r7,0(4) + ld r8,8(4) + ldu r9,16(4) +3: ld r10,8(4) + std r7,8(3) + ld r7,16(4) + std r8,16(3) + ld r8,24(4) + std r9,24(3) + ldu r9,32(4) + stdu r10,32(3) + bdnz 3b +4: ld r10,8(4) + std r7,8(3) + std r8,16(3) + std r9,24(3) + std r10,32(3) +9: ld r20,-96(1) + ld r21,-88(1) + ld r22,-80(1) + ld r23,-72(1) + ld r24,-64(1) + ld r25,-56(1) + ld r26,-48(1) + ld r27,-40(1) + ld r28,-32(1) + ld r29,-24(1) + ld r30,-16(1) + ld r31,-8(1) + blr diff --git a/arch/ppc64/lib/copyuser.S b/arch/ppc64/lib/copyuser.S new file mode 100644 index 00000000000..a0b3fbbd6fb --- /dev/null +++ b/arch/ppc64/lib/copyuser.S @@ -0,0 +1,576 @@ +/* + * arch/ppc64/lib/copyuser.S + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include + + .align 7 +_GLOBAL(__copy_tofrom_user) + /* first check for a whole page copy on a page boundary */ + cmpldi cr1,r5,16 + cmpdi cr6,r5,4096 + or r0,r3,r4 + neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ + andi. r0,r0,4095 + std r3,-24(r1) + crand cr0*4+2,cr0*4+2,cr6*4+2 + std r4,-16(r1) + std r5,-8(r1) + dcbt 0,r4 + beq .Lcopy_page + andi. r6,r6,7 + mtcrf 0x01,r5 + blt cr1,.Lshort_copy + bne .Ldst_unaligned +.Ldst_aligned: + andi. r0,r4,7 + addi r3,r3,-16 + bne .Lsrc_unaligned + srdi r7,r5,4 +20: ld r9,0(r4) + addi r4,r4,-8 + mtctr r7 + andi. r5,r5,7 + bf cr7*4+0,22f + addi r3,r3,8 + addi r4,r4,8 + mr r8,r9 + blt cr1,72f +21: ld r9,8(r4) +70: std r8,8(r3) +22: ldu r8,16(r4) +71: stdu r9,16(r3) + bdnz 21b +72: std r8,8(r3) + beq+ 3f + addi r3,r3,16 +23: ld r9,8(r4) +.Ldo_tail: + bf cr7*4+1,1f + rotldi r9,r9,32 +73: stw r9,0(r3) + addi r3,r3,4 +1: bf cr7*4+2,2f + rotldi r9,r9,16 +74: sth r9,0(r3) + addi r3,r3,2 +2: bf cr7*4+3,3f + rotldi r9,r9,8 +75: stb r9,0(r3) +3: li r3,0 + blr + +.Lsrc_unaligned: + srdi r6,r5,3 + addi r5,r5,-16 + subf r4,r0,r4 + srdi r7,r5,4 + sldi r10,r0,3 + cmpldi cr6,r6,3 + andi. r5,r5,7 + mtctr r7 + subfic r11,r10,64 + add r5,r5,r0 + bt cr7*4+0,28f + +24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ +25: ld r0,8(r4) + sld r6,r9,r10 +26: ldu r9,16(r4) + srd r7,r0,r11 + sld r8,r0,r10 + or r7,r7,r6 + blt cr6,79f +27: ld r0,8(r4) + b 2f + +28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ +29: ldu r9,8(r4) + sld r8,r0,r10 + addi r3,r3,-8 + blt cr6,5f +30: ld r0,8(r4) + srd r12,r9,r11 + sld r6,r9,r10 +31: ldu r9,16(r4) + or r12,r8,r12 + srd r7,r0,r11 + sld r8,r0,r10 + addi r3,r3,16 + beq cr6,78f + +1: or r7,r7,r6 +32: ld r0,8(r4) +76: std r12,8(r3) +2: srd r12,r9,r11 + sld r6,r9,r10 +33: ldu r9,16(r4) + or r12,r8,r12 +77: stdu r7,16(r3) + srd r7,r0,r11 + sld r8,r0,r10 + bdnz 1b + +78: std r12,8(r3) + or r7,r7,r6 +79: std r7,16(r3) +5: srd r12,r9,r11 + or r12,r8,r12 +80: std r12,24(r3) + bne 6f + li r3,0 + blr +6: cmpwi cr1,r5,8 + addi r3,r3,32 + sld r9,r9,r10 + ble cr1,.Ldo_tail +34: ld r0,8(r4) + srd r7,r0,r11 + or r9,r7,r9 + b .Ldo_tail + +.Ldst_unaligned: + mtcrf 0x01,r6 /* put #bytes to 8B bdry into cr7 */ + subf r5,r6,r5 + li r7,0 + cmpldi r1,r5,16 + bf cr7*4+3,1f +35: lbz r0,0(r4) +81: stb r0,0(r3) + addi r7,r7,1 +1: bf cr7*4+2,2f +36: lhzx r0,r7,r4 +82: sthx r0,r7,r3 + addi r7,r7,2 +2: bf cr7*4+1,3f +37: lwzx r0,r7,r4 +83: stwx r0,r7,r3 +3: mtcrf 0x01,r5 + add r4,r6,r4 + add r3,r6,r3 + b .Ldst_aligned + +.Lshort_copy: + bf cr7*4+0,1f +38: lwz r0,0(r4) +39: lwz r9,4(r4) + addi r4,r4,8 +84: stw r0,0(r3) +85: stw r9,4(r3) + addi r3,r3,8 +1: bf cr7*4+1,2f +40: lwz r0,0(r4) + addi r4,r4,4 +86: stw r0,0(r3) + addi r3,r3,4 +2: bf cr7*4+2,3f +41: lhz r0,0(r4) + addi r4,r4,2 +87: sth r0,0(r3) + addi r3,r3,2 +3: bf cr7*4+3,4f +42: lbz r0,0(r4) +88: stb r0,0(r3) +4: li r3,0 + blr + +/* + * exception handlers follow + * we have to return the number of bytes not copied + * for an exception on a load, we set the rest of the destination to 0 + */ + +136: +137: + add r3,r3,r7 + b 1f +130: +131: + addi r3,r3,8 +120: +122: +124: +125: +126: +127: +128: +129: +133: + addi r3,r3,8 +121: +132: + addi r3,r3,8 +123: +134: +135: +138: +139: +140: +141: +142: + +/* + * here we have had a fault on a load and r3 points to the first + * unmodified byte of the destination + */ +1: ld r6,-24(r1) + ld r4,-16(r1) + ld r5,-8(r1) + subf r6,r6,r3 + add r4,r4,r6 + subf r5,r6,r5 /* #bytes left to go */ + +/* + * first see if we can copy any more bytes before hitting another exception + */ + mtctr r5 +43: lbz r0,0(r4) + addi r4,r4,1 +89: stb r0,0(r3) + addi r3,r3,1 + bdnz 43b + li r3,0 /* huh? all copied successfully this time? */ + blr + +/* + * here we have trapped again, need to clear ctr bytes starting at r3 + */ +143: mfctr r5 + li r0,0 + mr r4,r3 + mr r3,r5 /* return the number of bytes not copied */ +1: andi. r9,r4,7 + beq 3f +90: stb r0,0(r4) + addic. r5,r5,-1 + addi r4,r4,1 + bne 1b + blr +3: cmpldi cr1,r5,8 + srdi r9,r5,3 + andi. r5,r5,7 + blt cr1,93f + mtctr r9 +91: std r0,0(r4) + addi r4,r4,8 + bdnz 91b +93: beqlr + mtctr r5 +92: stb r0,0(r4) + addi r4,r4,1 + bdnz 92b + blr + +/* + * exception handlers for stores: we just need to work + * out how many bytes weren't copied + */ +182: +183: + add r3,r3,r7 + b 1f +180: + addi r3,r3,8 +171: +177: + addi r3,r3,8 +170: +172: +176: +178: + addi r3,r3,4 +185: + addi r3,r3,4 +173: +174: +175: +179: +181: +184: +186: +187: +188: +189: +1: + ld r6,-24(r1) + ld r5,-8(r1) + add r6,r6,r5 + subf r3,r3,r6 /* #bytes not copied */ +190: +191: +192: + blr /* #bytes not copied in r3 */ + + .section __ex_table,"a" + .align 3 + .llong 20b,120b + .llong 21b,121b + .llong 70b,170b + .llong 22b,122b + .llong 71b,171b + .llong 72b,172b + .llong 23b,123b + .llong 73b,173b + .llong 74b,174b + .llong 75b,175b + .llong 24b,124b + .llong 25b,125b + .llong 26b,126b + .llong 27b,127b + .llong 28b,128b + .llong 29b,129b + .llong 30b,130b + .llong 31b,131b + .llong 32b,132b + .llong 76b,176b + .llong 33b,133b + .llong 77b,177b + .llong 78b,178b + .llong 79b,179b + .llong 80b,180b + .llong 34b,134b + .llong 35b,135b + .llong 81b,181b + .llong 36b,136b + .llong 82b,182b + .llong 37b,137b + .llong 83b,183b + .llong 38b,138b + .llong 39b,139b + .llong 84b,184b + .llong 85b,185b + .llong 40b,140b + .llong 86b,186b + .llong 41b,141b + .llong 87b,187b + .llong 42b,142b + .llong 88b,188b + .llong 43b,143b + .llong 89b,189b + .llong 90b,190b + .llong 91b,191b + .llong 92b,192b + + .text + +/* + * Routine to copy a whole page of data, optimized for POWER4. + * On POWER4 it is more than 50% faster than the simple loop + * above (following the .Ldst_aligned label) but it runs slightly + * slower on POWER3. + */ +.Lcopy_page: + std r31,-32(1) + std r30,-40(1) + std r29,-48(1) + std r28,-56(1) + std r27,-64(1) + std r26,-72(1) + std r25,-80(1) + std r24,-88(1) + std r23,-96(1) + std r22,-104(1) + std r21,-112(1) + std r20,-120(1) + li r5,4096/32 - 1 + addi r3,r3,-8 + li r0,5 +0: addi r5,r5,-24 + mtctr r0 +20: ld r22,640(4) +21: ld r21,512(4) +22: ld r20,384(4) +23: ld r11,256(4) +24: ld r9,128(4) +25: ld r7,0(4) +26: ld r25,648(4) +27: ld r24,520(4) +28: ld r23,392(4) +29: ld r10,264(4) +30: ld r8,136(4) +31: ldu r6,8(4) + cmpwi r5,24 +1: +32: std r22,648(3) +33: std r21,520(3) +34: std r20,392(3) +35: std r11,264(3) +36: std r9,136(3) +37: std r7,8(3) +38: ld r28,648(4) +39: ld r27,520(4) +40: ld r26,392(4) +41: ld r31,264(4) +42: ld r30,136(4) +43: ld r29,8(4) +44: std r25,656(3) +45: std r24,528(3) +46: std r23,400(3) +47: std r10,272(3) +48: std r8,144(3) +49: std r6,16(3) +50: ld r22,656(4) +51: ld r21,528(4) +52: ld r20,400(4) +53: ld r11,272(4) +54: ld r9,144(4) +55: ld r7,16(4) +56: std r28,664(3) +57: std r27,536(3) +58: std r26,408(3) +59: std r31,280(3) +60: std r30,152(3) +61: stdu r29,24(3) +62: ld r25,664(4) +63: ld r24,536(4) +64: ld r23,408(4) +65: ld r10,280(4) +66: ld r8,152(4) +67: ldu r6,24(4) + bdnz 1b +68: std r22,648(3) +69: std r21,520(3) +70: std r20,392(3) +71: std r11,264(3) +72: std r9,136(3) +73: std r7,8(3) +74: addi r4,r4,640 +75: addi r3,r3,648 + bge 0b + mtctr r5 +76: ld r7,0(4) +77: ld r8,8(4) +78: ldu r9,16(4) +3: +79: ld r10,8(4) +80: std r7,8(3) +81: ld r7,16(4) +82: std r8,16(3) +83: ld r8,24(4) +84: std r9,24(3) +85: ldu r9,32(4) +86: stdu r10,32(3) + bdnz 3b +4: +87: ld r10,8(4) +88: std r7,8(3) +89: std r8,16(3) +90: std r9,24(3) +91: std r10,32(3) +9: ld r20,-120(1) + ld r21,-112(1) + ld r22,-104(1) + ld r23,-96(1) + ld r24,-88(1) + ld r25,-80(1) + ld r26,-72(1) + ld r27,-64(1) + ld r28,-56(1) + ld r29,-48(1) + ld r30,-40(1) + ld r31,-32(1) + li r3,0 + blr + +/* + * on an exception, reset to the beginning and jump back into the + * standard __copy_tofrom_user + */ +100: ld r20,-120(1) + ld r21,-112(1) + ld r22,-104(1) + ld r23,-96(1) + ld r24,-88(1) + ld r25,-80(1) + ld r26,-72(1) + ld r27,-64(1) + ld r28,-56(1) + ld r29,-48(1) + ld r30,-40(1) + ld r31,-32(1) + ld r3,-24(r1) + ld r4,-16(r1) + li r5,4096 + b .Ldst_aligned + + .section __ex_table,"a" + .align 3 + .llong 20b,100b + .llong 21b,100b + .llong 22b,100b + .llong 23b,100b + .llong 24b,100b + .llong 25b,100b + .llong 26b,100b + .llong 27b,100b + .llong 28b,100b + .llong 29b,100b + .llong 30b,100b + .llong 31b,100b + .llong 32b,100b + .llong 33b,100b + .llong 34b,100b + .llong 35b,100b + .llong 36b,100b + .llong 37b,100b + .llong 38b,100b + .llong 39b,100b + .llong 40b,100b + .llong 41b,100b + .llong 42b,100b + .llong 43b,100b + .llong 44b,100b + .llong 45b,100b + .llong 46b,100b + .llong 47b,100b + .llong 48b,100b + .llong 49b,100b + .llong 50b,100b + .llong 51b,100b + .llong 52b,100b + .llong 53b,100b + .llong 54b,100b + .llong 55b,100b + .llong 56b,100b + .llong 57b,100b + .llong 58b,100b + .llong 59b,100b + .llong 60b,100b + .llong 61b,100b + .llong 62b,100b + .llong 63b,100b + .llong 64b,100b + .llong 65b,100b + .llong 66b,100b + .llong 67b,100b + .llong 68b,100b + .llong 69b,100b + .llong 70b,100b + .llong 71b,100b + .llong 72b,100b + .llong 73b,100b + .llong 74b,100b + .llong 75b,100b + .llong 76b,100b + .llong 77b,100b + .llong 78b,100b + .llong 79b,100b + .llong 80b,100b + .llong 81b,100b + .llong 82b,100b + .llong 83b,100b + .llong 84b,100b + .llong 85b,100b + .llong 86b,100b + .llong 87b,100b + .llong 88b,100b + .llong 89b,100b + .llong 90b,100b + .llong 91b,100b diff --git a/arch/ppc64/lib/dec_and_lock.c b/arch/ppc64/lib/dec_and_lock.c new file mode 100644 index 00000000000..6e8d8591708 --- /dev/null +++ b/arch/ppc64/lib/dec_and_lock.c @@ -0,0 +1,55 @@ +/* + * ppc64 version of atomic_dec_and_lock() using cmpxchg + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +/* + * This is an implementation of the notion of "decrement a + * reference count, and return locked if it decremented to zero". + * + * This implementation can be used on any architecture that + * has a cmpxchg, and where atomic->value is an int holding + * the value of the atomic (i.e. the high bits aren't used + * for a lock or anything like that). + * + * N.B. ATOMIC_DEC_AND_LOCK gets defined in include/linux/spinlock.h + * if spinlocks are empty and thus atomic_dec_and_lock is defined + * to be atomic_dec_and_test - in that case we don't need it + * defined here as well. + */ + +#ifndef ATOMIC_DEC_AND_LOCK +int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +{ + int counter; + int newcount; + + for (;;) { + counter = atomic_read(atomic); + newcount = counter - 1; + if (!newcount) + break; /* do it the slow way */ + + newcount = cmpxchg(&atomic->counter, counter, newcount); + if (newcount == counter) + return 0; + } + + spin_lock(lock); + if (atomic_dec_and_test(atomic)) + return 1; + spin_unlock(lock); + return 0; +} + +EXPORT_SYMBOL(_atomic_dec_and_lock); +#endif /* ATOMIC_DEC_AND_LOCK */ diff --git a/arch/ppc64/lib/e2a.c b/arch/ppc64/lib/e2a.c new file mode 100644 index 00000000000..d2b83488792 --- /dev/null +++ b/arch/ppc64/lib/e2a.c @@ -0,0 +1,108 @@ +/* + * arch/ppc64/lib/e2a.c + * + * EBCDIC to ASCII conversion + * + * This function moved here from arch/ppc64/kernel/viopath.c + * + * (C) Copyright 2000-2004 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) anyu later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include + +unsigned char e2a(unsigned char x) +{ + switch (x) { + case 0xF0: + return '0'; + case 0xF1: + return '1'; + case 0xF2: + return '2'; + case 0xF3: + return '3'; + case 0xF4: + return '4'; + case 0xF5: + return '5'; + case 0xF6: + return '6'; + case 0xF7: + return '7'; + case 0xF8: + return '8'; + case 0xF9: + return '9'; + case 0xC1: + return 'A'; + case 0xC2: + return 'B'; + case 0xC3: + return 'C'; + case 0xC4: + return 'D'; + case 0xC5: + return 'E'; + case 0xC6: + return 'F'; + case 0xC7: + return 'G'; + case 0xC8: + return 'H'; + case 0xC9: + return 'I'; + case 0xD1: + return 'J'; + case 0xD2: + return 'K'; + case 0xD3: + return 'L'; + case 0xD4: + return 'M'; + case 0xD5: + return 'N'; + case 0xD6: + return 'O'; + case 0xD7: + return 'P'; + case 0xD8: + return 'Q'; + case 0xD9: + return 'R'; + case 0xE2: + return 'S'; + case 0xE3: + return 'T'; + case 0xE4: + return 'U'; + case 0xE5: + return 'V'; + case 0xE6: + return 'W'; + case 0xE7: + return 'X'; + case 0xE8: + return 'Y'; + case 0xE9: + return 'Z'; + } + return ' '; +} +EXPORT_SYMBOL(e2a); + + diff --git a/arch/ppc64/lib/locks.c b/arch/ppc64/lib/locks.c new file mode 100644 index 00000000000..ef70ef91abe --- /dev/null +++ b/arch/ppc64/lib/locks.c @@ -0,0 +1,95 @@ +/* + * Spin and read/write lock operations. + * + * Copyright (C) 2001-2004 Paul Mackerras , IBM + * Copyright (C) 2001 Anton Blanchard , IBM + * Copyright (C) 2002 Dave Engebretsen , IBM + * Rework to support virtual processors + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* waiting for a spinlock... */ +#if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES) + +void __spin_yield(spinlock_t *lock) +{ + unsigned int lock_value, holder_cpu, yield_count; + struct paca_struct *holder_paca; + + lock_value = lock->lock; + if (lock_value == 0) + return; + holder_cpu = lock_value & 0xffff; + BUG_ON(holder_cpu >= NR_CPUS); + holder_paca = &paca[holder_cpu]; + yield_count = holder_paca->lppaca.yield_count; + if ((yield_count & 1) == 0) + return; /* virtual cpu is currently running */ + rmb(); + if (lock->lock != lock_value) + return; /* something has changed */ +#ifdef CONFIG_PPC_ISERIES + HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, + ((u64)holder_cpu << 32) | yield_count); +#else + plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu), + yield_count); +#endif +} + +/* + * Waiting for a read lock or a write lock on a rwlock... + * This turns out to be the same for read and write locks, since + * we only know the holder if it is write-locked. + */ +void __rw_yield(rwlock_t *rw) +{ + int lock_value; + unsigned int holder_cpu, yield_count; + struct paca_struct *holder_paca; + + lock_value = rw->lock; + if (lock_value >= 0) + return; /* no write lock at present */ + holder_cpu = lock_value & 0xffff; + BUG_ON(holder_cpu >= NR_CPUS); + holder_paca = &paca[holder_cpu]; + yield_count = holder_paca->lppaca.yield_count; + if ((yield_count & 1) == 0) + return; /* virtual cpu is currently running */ + rmb(); + if (rw->lock != lock_value) + return; /* something has changed */ +#ifdef CONFIG_PPC_ISERIES + HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, + ((u64)holder_cpu << 32) | yield_count); +#else + plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu), + yield_count); +#endif +} +#endif + +void spin_unlock_wait(spinlock_t *lock) +{ + while (lock->lock) { + HMT_low(); + if (SHARED_PROCESSOR) + __spin_yield(lock); + } + HMT_medium(); +} + +EXPORT_SYMBOL(spin_unlock_wait); diff --git a/arch/ppc64/lib/memcpy.S b/arch/ppc64/lib/memcpy.S new file mode 100644 index 00000000000..9ccacdf5bcb --- /dev/null +++ b/arch/ppc64/lib/memcpy.S @@ -0,0 +1,172 @@ +/* + * arch/ppc64/lib/memcpy.S + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include + + .align 7 +_GLOBAL(memcpy) + mtcrf 0x01,r5 + cmpldi cr1,r5,16 + neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry + andi. r6,r6,7 + dcbt 0,r4 + blt cr1,.Lshort_copy + bne .Ldst_unaligned +.Ldst_aligned: + andi. r0,r4,7 + addi r3,r3,-16 + bne .Lsrc_unaligned + srdi r7,r5,4 + ld r9,0(r4) + addi r4,r4,-8 + mtctr r7 + andi. r5,r5,7 + bf cr7*4+0,2f + addi r3,r3,8 + addi r4,r4,8 + mr r8,r9 + blt cr1,3f +1: ld r9,8(r4) + std r8,8(r3) +2: ldu r8,16(r4) + stdu r9,16(r3) + bdnz 1b +3: std r8,8(r3) + beqlr + addi r3,r3,16 + ld r9,8(r4) +.Ldo_tail: + bf cr7*4+1,1f + rotldi r9,r9,32 + stw r9,0(r3) + addi r3,r3,4 +1: bf cr7*4+2,2f + rotldi r9,r9,16 + sth r9,0(r3) + addi r3,r3,2 +2: bf cr7*4+3,3f + rotldi r9,r9,8 + stb r9,0(r3) +3: blr + +.Lsrc_unaligned: + srdi r6,r5,3 + addi r5,r5,-16 + subf r4,r0,r4 + srdi r7,r5,4 + sldi r10,r0,3 + cmpdi cr6,r6,3 + andi. r5,r5,7 + mtctr r7 + subfic r11,r10,64 + add r5,r5,r0 + + bt cr7*4+0,0f + + ld r9,0(r4) # 3+2n loads, 2+2n stores + ld r0,8(r4) + sld r6,r9,r10 + ldu r9,16(r4) + srd r7,r0,r11 + sld r8,r0,r10 + or r7,r7,r6 + blt cr6,4f + ld r0,8(r4) + # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12 + b 2f + +0: ld r0,0(r4) # 4+2n loads, 3+2n stores + ldu r9,8(r4) + sld r8,r0,r10 + addi r3,r3,-8 + blt cr6,5f + ld r0,8(r4) + srd r12,r9,r11 + sld r6,r9,r10 + ldu r9,16(r4) + or r12,r8,r12 + srd r7,r0,r11 + sld r8,r0,r10 + addi r3,r3,16 + beq cr6,3f + + # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9 +1: or r7,r7,r6 + ld r0,8(r4) + std r12,8(r3) +2: srd r12,r9,r11 + sld r6,r9,r10 + ldu r9,16(r4) + or r12,r8,r12 + stdu r7,16(r3) + srd r7,r0,r11 + sld r8,r0,r10 + bdnz 1b + +3: std r12,8(r3) + or r7,r7,r6 +4: std r7,16(r3) +5: srd r12,r9,r11 + or r12,r8,r12 + std r12,24(r3) + beqlr + cmpwi cr1,r5,8 + addi r3,r3,32 + sld r9,r9,r10 + ble cr1,.Ldo_tail + ld r0,8(r4) + srd r7,r0,r11 + or r9,r7,r9 + b .Ldo_tail + +.Ldst_unaligned: + mtcrf 0x01,r6 # put #bytes to 8B bdry into cr7 + subf r5,r6,r5 + li r7,0 + cmpldi r1,r5,16 + bf cr7*4+3,1f + lbz r0,0(r4) + stb r0,0(r3) + addi r7,r7,1 +1: bf cr7*4+2,2f + lhzx r0,r7,r4 + sthx r0,r7,r3 + addi r7,r7,2 +2: bf cr7*4+1,3f + lwzx r0,r7,r4 + stwx r0,r7,r3 +3: mtcrf 0x01,r5 + add r4,r6,r4 + add r3,r6,r3 + b .Ldst_aligned + +.Lshort_copy: + bf cr7*4+0,1f + lwz r0,0(r4) + lwz r9,4(r4) + addi r4,r4,8 + stw r0,0(r3) + stw r9,4(r3) + addi r3,r3,8 +1: bf cr7*4+1,2f + lwz r0,0(r4) + addi r4,r4,4 + stw r0,0(r3) + addi r3,r3,4 +2: bf cr7*4+2,3f + lhz r0,0(r4) + addi r4,r4,2 + sth r0,0(r3) + addi r3,r3,2 +3: bf cr7*4+3,4f + lbz r0,0(r4) + stb r0,0(r3) +4: blr diff --git a/arch/ppc64/lib/sstep.c b/arch/ppc64/lib/sstep.c new file mode 100644 index 00000000000..e79123d1485 --- /dev/null +++ b/arch/ppc64/lib/sstep.c @@ -0,0 +1,141 @@ +/* + * Single-step support. + * + * Copyright (C) 2004 Paul Mackerras , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include + +extern char system_call_common[]; + +/* Bits in SRR1 that are copied from MSR */ +#define MSR_MASK 0xffffffff87c0ffff + +/* + * Determine whether a conditional branch instruction would branch. + */ +static int branch_taken(unsigned int instr, struct pt_regs *regs) +{ + unsigned int bo = (instr >> 21) & 0x1f; + unsigned int bi; + + if ((bo & 4) == 0) { + /* decrement counter */ + --regs->ctr; + if (((bo >> 1) & 1) ^ (regs->ctr == 0)) + return 0; + } + if ((bo & 0x10) == 0) { + /* check bit from CR */ + bi = (instr >> 16) & 0x1f; + if (((regs->ccr >> (31 - bi)) & 1) != ((bo >> 3) & 1)) + return 0; + } + return 1; +} + +/* + * Emulate instructions that cause a transfer of control. + * Returns 1 if the step was emulated, 0 if not, + * or -1 if the instruction is one that should not be stepped, + * such as an rfid, or a mtmsrd that would clear MSR_RI. + */ +int emulate_step(struct pt_regs *regs, unsigned int instr) +{ + unsigned int opcode, rd; + unsigned long int imm; + + opcode = instr >> 26; + switch (opcode) { + case 16: /* bc */ + imm = (signed short)(instr & 0xfffc); + if ((instr & 2) == 0) + imm += regs->nip; + regs->nip += 4; + if ((regs->msr & MSR_SF) == 0) + regs->nip &= 0xffffffffUL; + if (instr & 1) + regs->link = regs->nip; + if (branch_taken(instr, regs)) + regs->nip = imm; + return 1; + case 17: /* sc */ + /* + * N.B. this uses knowledge about how the syscall + * entry code works. If that is changed, this will + * need to be changed also. + */ + regs->gpr[9] = regs->gpr[13]; + regs->gpr[11] = regs->nip + 4; + regs->gpr[12] = regs->msr & MSR_MASK; + regs->gpr[13] = (unsigned long) get_paca(); + regs->nip = (unsigned long) &system_call_common; + regs->msr = MSR_KERNEL; + return 1; + case 18: /* b */ + imm = instr & 0x03fffffc; + if (imm & 0x02000000) + imm -= 0x04000000; + if ((instr & 2) == 0) + imm += regs->nip; + if (instr & 1) { + regs->link = regs->nip + 4; + if ((regs->msr & MSR_SF) == 0) + regs->link &= 0xffffffffUL; + } + if ((regs->msr & MSR_SF) == 0) + imm &= 0xffffffffUL; + regs->nip = imm; + return 1; + case 19: + switch (instr & 0x7fe) { + case 0x20: /* bclr */ + case 0x420: /* bcctr */ + imm = (instr & 0x400)? regs->ctr: regs->link; + regs->nip += 4; + if ((regs->msr & MSR_SF) == 0) { + regs->nip &= 0xffffffffUL; + imm &= 0xffffffffUL; + } + if (instr & 1) + regs->link = regs->nip; + if (branch_taken(instr, regs)) + regs->nip = imm; + return 1; + case 0x24: /* rfid, scary */ + return -1; + } + case 31: + rd = (instr >> 21) & 0x1f; + switch (instr & 0x7fe) { + case 0xa6: /* mfmsr */ + regs->gpr[rd] = regs->msr & MSR_MASK; + regs->nip += 4; + if ((regs->msr & MSR_SF) == 0) + regs->nip &= 0xffffffffUL; + return 1; + case 0x164: /* mtmsrd */ + /* only MSR_EE and MSR_RI get changed if bit 15 set */ + /* mtmsrd doesn't change MSR_HV and MSR_ME */ + imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL; + imm = (regs->msr & MSR_MASK & ~imm) + | (regs->gpr[rd] & imm); + if ((imm & MSR_RI) == 0) + /* can't step mtmsrd that would clear MSR_RI */ + return -1; + regs->msr = imm; + regs->nip += 4; + if ((imm & MSR_SF) == 0) + regs->nip &= 0xffffffffUL; + return 1; + } + } + return 0; +} diff --git a/arch/ppc64/lib/strcase.c b/arch/ppc64/lib/strcase.c new file mode 100644 index 00000000000..e84f243368c --- /dev/null +++ b/arch/ppc64/lib/strcase.c @@ -0,0 +1,31 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include + +int strcasecmp(const char *s1, const char *s2) +{ + int c1, c2; + + do { + c1 = tolower(*s1++); + c2 = tolower(*s2++); + } while (c1 == c2 && c1 != 0); + return c1 - c2; +} + +int strncasecmp(const char *s1, const char *s2, int n) +{ + int c1, c2; + + do { + c1 = tolower(*s1++); + c2 = tolower(*s2++); + } while ((--n > 0) && c1 == c2 && c1 != 0); + return c1 - c2; +} diff --git a/arch/ppc64/lib/string.S b/arch/ppc64/lib/string.S new file mode 100644 index 00000000000..813587e5c2e --- /dev/null +++ b/arch/ppc64/lib/string.S @@ -0,0 +1,285 @@ +/* + * String handling functions for PowerPC. + * + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +_GLOBAL(strcpy) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + +_GLOBAL(strncpy) + cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r6) + bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ + blr + +_GLOBAL(strcat) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r5) + cmpwi 0,r0,0 + bne 1b + addi r5,r5,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + +_GLOBAL(strcmp) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r5) + cmpwi 1,r3,0 + lbzu r0,1(r4) + subf. r3,r0,r3 + beqlr 1 + beq 1b + blr + +_GLOBAL(strlen) + addi r4,r3,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + bne 1b + subf r3,r3,r4 + blr + +_GLOBAL(memset) + neg r0,r3 + rlwimi r4,r4,8,16,23 + andi. r0,r0,7 /* # bytes to be 8-byte aligned */ + rlwimi r4,r4,16,0,15 + cmplw cr1,r5,r0 /* do we get that far? */ + rldimi r4,r4,32,0 + mtcrf 1,r0 + mr r6,r3 + blt cr1,8f + beq+ 3f /* if already 8-byte aligned */ + subf r5,r0,r5 + bf 31,1f + stb r4,0(r6) + addi r6,r6,1 +1: bf 30,2f + sth r4,0(r6) + addi r6,r6,2 +2: bf 29,3f + stw r4,0(r6) + addi r6,r6,4 +3: srdi. r0,r5,6 + clrldi r5,r5,58 + mtctr r0 + beq 5f +4: std r4,0(r6) + std r4,8(r6) + std r4,16(r6) + std r4,24(r6) + std r4,32(r6) + std r4,40(r6) + std r4,48(r6) + std r4,56(r6) + addi r6,r6,64 + bdnz 4b +5: srwi. r0,r5,3 + clrlwi r5,r5,29 + mtcrf 1,r0 + beq 8f + bf 29,6f + std r4,0(r6) + std r4,8(r6) + std r4,16(r6) + std r4,24(r6) + addi r6,r6,32 +6: bf 30,7f + std r4,0(r6) + std r4,8(r6) + addi r6,r6,16 +7: bf 31,8f + std r4,0(r6) + addi r6,r6,8 +8: cmpwi r5,0 + mtcrf 1,r5 + beqlr+ + bf 29,9f + stw r4,0(r6) + addi r6,r6,4 +9: bf 30,10f + sth r4,0(r6) + addi r6,r6,2 +10: bflr 31 + stb r4,0(r6) + blr + +_GLOBAL(memmove) + cmplw 0,r3,r4 + bgt .backwards_memcpy + b .memcpy + +_GLOBAL(backwards_memcpy) + rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + add r6,r3,r5 + add r4,r4,r5 + beq 2f + andi. r0,r6,3 + mtctr r7 + bne 5f +1: lwz r7,-4(r4) + lwzu r8,-8(r4) + stw r7,-4(r6) + stwu r8,-8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,-4(r4) + subi r5,r5,4 + stwu r0,-4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 +4: lbzu r0,-1(r4) + stbu r0,-1(r6) + bdnz 4b + blr +5: mtctr r0 +6: lbzu r7,-1(r4) + stbu r7,-1(r6) + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + +_GLOBAL(memcmp) + cmpwi 0,r5,0 + ble- 2f + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r6) + lbzu r0,1(r4) + subf. r3,r0,r3 + bdnzt 2,1b + blr +2: li r3,0 + blr + +_GLOBAL(memchr) + cmpwi 0,r5,0 + ble- 2f + mtctr r5 + addi r3,r3,-1 +1: lbzu r0,1(r3) + cmpw 0,r0,r4 + bdnzf 2,1b + beqlr +2: li r3,0 + blr + +_GLOBAL(__clear_user) + addi r6,r3,-4 + li r3,0 + li r5,0 + cmplwi 0,r4,4 + blt 7f + /* clear a single word */ +11: stwu r5,4(r6) + beqlr + /* clear word sized chunks */ + andi. r0,r6,3 + add r4,r0,r4 + subf r6,r0,r6 + srwi r0,r4,2 + andi. r4,r4,3 + mtctr r0 + bdz 7f +1: stwu r5,4(r6) + bdnz 1b + /* clear byte sized chunks */ +7: cmpwi 0,r4,0 + beqlr + mtctr r4 + addi r6,r6,3 +8: stbu r5,1(r6) + bdnz 8b + blr +90: mr r3,r4 + blr +91: mfctr r3 + slwi r3,r3,2 + add r3,r3,r4 + blr +92: mfctr r3 + blr + + .section __ex_table,"a" + .align 3 + .llong 11b,90b + .llong 1b,91b + .llong 8b,92b + .text + +/* r3 = dst, r4 = src, r5 = count */ +_GLOBAL(__strncpy_from_user) + addi r6,r3,-1 + addi r4,r4,-1 + cmpwi 0,r5,0 + beq 2f + mtctr r5 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r6) + bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ + beq 3f +2: addi r6,r6,1 +3: subf r3,r3,r6 + blr +99: li r3,-EFAULT + blr + + .section __ex_table,"a" + .align 3 + .llong 1b,99b + .text + +/* r3 = str, r4 = len (> 0) */ +_GLOBAL(__strnlen_user) + addi r7,r3,-1 + mtctr r4 /* ctr = len */ +1: lbzu r0,1(r7) /* get next byte */ + cmpwi 0,r0,0 + bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */ + addi r7,r7,1 + subf r3,r3,r7 /* number of bytes we have looked at */ + beqlr /* return if we found a 0 byte */ + cmpw 0,r3,r4 /* did we look at all len bytes? */ + blt 99f /* if not, must have hit top */ + addi r3,r4,1 /* return len + 1 to indicate no null found */ + blr +99: li r3,0 /* bad address, return 0 */ + blr + + .section __ex_table,"a" + .align 3 + .llong 1b,99b diff --git a/arch/ppc64/lib/usercopy.c b/arch/ppc64/lib/usercopy.c new file mode 100644 index 00000000000..5eea6f3c1e0 --- /dev/null +++ b/arch/ppc64/lib/usercopy.c @@ -0,0 +1,41 @@ +/* + * Functions which are too large to be inlined. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include + +unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (likely(access_ok(VERIFY_READ, from, n))) + n = __copy_from_user(to, from, n); + else + memset(to, 0, n); + return n; +} + +unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (likely(access_ok(VERIFY_WRITE, to, n))) + n = __copy_to_user(to, from, n); + return n; +} + +unsigned long copy_in_user(void __user *to, const void __user *from, + unsigned long n) +{ + might_sleep(); + if (likely(access_ok(VERIFY_READ, from, n) && + access_ok(VERIFY_WRITE, to, n))) + n =__copy_tofrom_user(to, from, n); + return n; +} + +EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(copy_to_user); +EXPORT_SYMBOL(copy_in_user); + diff --git a/arch/ppc64/mm/Makefile b/arch/ppc64/mm/Makefile new file mode 100644 index 00000000000..ac522d57b2a --- /dev/null +++ b/arch/ppc64/mm/Makefile @@ -0,0 +1,11 @@ +# +# Makefile for the linux ppc-specific parts of the memory manager. +# + +EXTRA_CFLAGS += -mno-minimal-toc + +obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o \ + slb_low.o slb.o stab.o mmap.o +obj-$(CONFIG_DISCONTIGMEM) += numa.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_PPC_MULTIPLATFORM) += hash_native.o diff --git a/arch/ppc64/mm/fault.c b/arch/ppc64/mm/fault.c new file mode 100644 index 00000000000..20b0f37e8bf --- /dev/null +++ b/arch/ppc64/mm/fault.c @@ -0,0 +1,312 @@ +/* + * arch/ppc/mm/fault.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/i386/mm/fault.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Modified by Cort Dougan and Paul Mackerras. + * + * Modified for PPC64 by Dave Engebretsen (engebret@ibm.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* + * Check whether the instruction at regs->nip is a store using + * an update addressing form which will update r1. + */ +static int store_updates_sp(struct pt_regs *regs) +{ + unsigned int inst; + + if (get_user(inst, (unsigned int __user *)regs->nip)) + return 0; + /* check for 1 in the rA field */ + if (((inst >> 16) & 0x1f) != 1) + return 0; + /* check major opcode */ + switch (inst >> 26) { + case 37: /* stwu */ + case 39: /* stbu */ + case 45: /* sthu */ + case 53: /* stfsu */ + case 55: /* stfdu */ + return 1; + case 62: /* std or stdu */ + return (inst & 3) == 1; + case 31: + /* check minor opcode */ + switch ((inst >> 1) & 0x3ff) { + case 181: /* stdux */ + case 183: /* stwux */ + case 247: /* stbux */ + case 439: /* sthux */ + case 695: /* stfsux */ + case 759: /* stfdux */ + return 1; + } + } + return 0; +} + +/* + * The error_code parameter is + * - DSISR for a non-SLB data access fault, + * - SRR1 & 0x08000000 for a non-SLB instruction access fault + * - 0 any SLB fault. + * The return value is 0 if the fault was handled, or the signal + * number if this is a kernel fault that can't be handled here. + */ +int do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + struct vm_area_struct * vma; + struct mm_struct *mm = current->mm; + siginfo_t info; + unsigned long code = SEGV_MAPERR; + unsigned long is_write = error_code & DSISR_ISSTORE; + unsigned long trap = TRAP(regs); + unsigned long is_exec = trap == 0x400; + + BUG_ON((trap == 0x380) || (trap == 0x480)); + + if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return 0; + + if (trap == 0x300) { + if (debugger_fault_handler(regs)) + return 0; + } + + /* On a kernel SLB miss we can only check for a valid exception entry */ + if (!user_mode(regs) && (address >= TASK_SIZE)) + return SIGSEGV; + + if (error_code & DSISR_DABRMATCH) { + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return 0; + if (debugger_dabr_match(regs)) + return 0; + } + + if (in_atomic() || mm == NULL) { + if (!user_mode(regs)) + return SIGSEGV; + /* in_atomic() in user mode is really bad, + as is current->mm == NULL. */ + printk(KERN_EMERG "Page fault in user mode with" + "in_atomic() = %d mm = %p\n", in_atomic(), mm); + printk(KERN_EMERG "NIP = %lx MSR = %lx\n", + regs->nip, regs->msr); + die("Weird page fault", regs, SIGSEGV); + } + + /* When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the + * kernel and should generate an OOPS. Unfortunatly, in the case of an + * erroneous fault occuring in a code path which already holds mmap_sem + * we will deadlock attempting to validate the fault against the + * address space. Luckily the kernel only validly references user + * space from well defined areas of code, which are listed in the + * exceptions table. + * + * As the vast majority of faults will be valid we will only perform + * the source reference check when there is a possibilty of a deadlock. + * Attempt to lock the address space, if we cannot we then validate the + * source. If this is invalid we can skip the address space check, + * thus avoiding the deadlock. + */ + if (!down_read_trylock(&mm->mmap_sem)) { + if (!user_mode(regs) && !search_exception_tables(regs->nip)) + goto bad_area_nosemaphore; + + down_read(&mm->mmap_sem); + } + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + + if (vma->vm_start <= address) { + goto good_area; + } + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + + /* + * N.B. The POWER/Open ABI allows programs to access up to + * 288 bytes below the stack pointer. + * The kernel signal delivery code writes up to about 1.5kB + * below the stack pointer (r1) before decrementing it. + * The exec code can write slightly over 640kB to the stack + * before setting the user r1. Thus we allow the stack to + * expand to 1MB without further checks. + */ + if (address + 0x100000 < vma->vm_end) { + /* get user regs even if this fault is in kernel mode */ + struct pt_regs *uregs = current->thread.regs; + if (uregs == NULL) + goto bad_area; + + /* + * A user-mode access to an address a long way below + * the stack pointer is only valid if the instruction + * is one which would update the stack pointer to the + * address accessed if the instruction completed, + * i.e. either stwu rs,n(r1) or stwux rs,r1,rb + * (or the byte, halfword, float or double forms). + * + * If we don't check this then any write to the area + * between the last mapped region and the stack will + * expand the stack rather than segfaulting. + */ + if (address + 2048 < uregs->gpr[1] + && (!user_mode(regs) || !store_updates_sp(regs))) + goto bad_area; + } + + if (expand_stack(vma, address)) + goto bad_area; + +good_area: + code = SEGV_ACCERR; + + if (is_exec) { + /* protection fault */ + if (error_code & DSISR_PROTFAULT) + goto bad_area; + if (!(vma->vm_flags & VM_EXEC)) + goto bad_area; + /* a write */ + } else if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + /* a read */ + } else { + if (!(vma->vm_flags & VM_READ)) + goto bad_area; + } + + survive: + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + switch (handle_mm_fault(mm, vma, address, is_write)) { + + case VM_FAULT_MINOR: + current->min_flt++; + break; + case VM_FAULT_MAJOR: + current->maj_flt++; + break; + case VM_FAULT_SIGBUS: + goto do_sigbus; + case VM_FAULT_OOM: + goto out_of_memory; + default: + BUG(); + } + + up_read(&mm->mmap_sem); + return 0; + +bad_area: + up_read(&mm->mmap_sem); + +bad_area_nosemaphore: + /* User mode accesses cause a SIGSEGV */ + if (user_mode(regs)) { + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = code; + info.si_addr = (void __user *) address; + force_sig_info(SIGSEGV, &info, current); + return 0; + } + + if (trap == 0x400 && (error_code & DSISR_PROTFAULT) + && printk_ratelimit()) + printk(KERN_CRIT "kernel tried to execute NX-protected" + " page (%lx) - exploit attempt? (uid: %d)\n", + address, current->uid); + + return SIGSEGV; + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + up_read(&mm->mmap_sem); + if (current->pid == 1) { + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + printk("VM: killing process %s\n", current->comm); + if (user_mode(regs)) + do_exit(SIGKILL); + return SIGKILL; + +do_sigbus: + up_read(&mm->mmap_sem); + if (user_mode(regs)) { + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); + return 0; + } + return SIGBUS; +} + +/* + * bad_page_fault is called when we have a bad access from the kernel. + * It is called from do_page_fault above and from some of the procedures + * in traps.c. + */ +void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +{ + const struct exception_table_entry *entry; + + /* Are we prepared to handle this fault? */ + if ((entry = search_exception_tables(regs->nip)) != NULL) { + regs->nip = entry->fixup; + return; + } + + /* kernel has accessed a bad area */ + die("Kernel access of bad area", regs, sig); +} diff --git a/arch/ppc64/mm/hash_low.S b/arch/ppc64/mm/hash_low.S new file mode 100644 index 00000000000..8c0156a3700 --- /dev/null +++ b/arch/ppc64/mm/hash_low.S @@ -0,0 +1,287 @@ +/* + * ppc64 MMU hashtable management routines + * + * (c) Copyright IBM Corp. 2003 + * + * Maintained by: Benjamin Herrenschmidt + * + * + * This file is covered by the GNU Public Licence v2 as + * described in the kernel's COPYING file. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + .text + +/* + * Stackframe: + * + * +-> Back chain (SP + 256) + * | General register save area (SP + 112) + * | Parameter save area (SP + 48) + * | TOC save area (SP + 40) + * | link editor doubleword (SP + 32) + * | compiler doubleword (SP + 24) + * | LR save area (SP + 16) + * | CR save area (SP + 8) + * SP ---> +-- Back chain (SP + 0) + */ +#define STACKFRAMESIZE 256 + +/* Save parameters offsets */ +#define STK_PARM(i) (STACKFRAMESIZE + 48 + ((i)-3)*8) + +/* Save non-volatile offsets */ +#define STK_REG(i) (112 + ((i)-14)*8) + +/* + * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid, + * pte_t *ptep, unsigned long trap, int local) + * + * Adds a page to the hash table. This is the non-LPAR version for now + */ + +_GLOBAL(__hash_page) + mflr r0 + std r0,16(r1) + stdu r1,-STACKFRAMESIZE(r1) + /* Save all params that we need after a function call */ + std r6,STK_PARM(r6)(r1) + std r8,STK_PARM(r8)(r1) + + /* Add _PAGE_PRESENT to access */ + ori r4,r4,_PAGE_PRESENT + + /* Save non-volatile registers. + * r31 will hold "old PTE" + * r30 is "new PTE" + * r29 is "va" + * r28 is a hash value + * r27 is hashtab mask (maybe dynamic patched instead ?) + */ + std r27,STK_REG(r27)(r1) + std r28,STK_REG(r28)(r1) + std r29,STK_REG(r29)(r1) + std r30,STK_REG(r30)(r1) + std r31,STK_REG(r31)(r1) + + /* Step 1: + * + * Check permissions, atomically mark the linux PTE busy + * and hashed. + */ +1: + ldarx r31,0,r6 + /* Check access rights (access & ~(pte_val(*ptep))) */ + andc. r0,r4,r31 + bne- htab_wrong_access + /* Check if PTE is busy */ + andi. r0,r31,_PAGE_BUSY + bne- 1b + /* Prepare new PTE value (turn access RW into DIRTY, then + * add BUSY,HASHPTE and ACCESSED) + */ + rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ + or r30,r30,r31 + ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE + /* Write the linux PTE atomically (setting busy) */ + stdcx. r30,0,r6 + bne- 1b + isync + + /* Step 2: + * + * Insert/Update the HPTE in the hash table. At this point, + * r4 (access) is re-useable, we use it for the new HPTE flags + */ + + /* Calc va and put it in r29 */ + rldicr r29,r5,28,63-28 + rldicl r3,r3,0,36 + or r29,r3,r29 + + /* Calculate hash value for primary slot and store it in r28 */ + rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */ + rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */ + xor r28,r5,r0 + + /* Convert linux PTE bits into HW equivalents */ + andi. r3,r30,0x1fe /* Get basic set of flags */ + xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */ + rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ + rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ + and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */ + andc r0,r30,r0 /* r0 = pte & ~r0 */ + rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ + + /* We eventually do the icache sync here (maybe inline that + * code rather than call a C function...) + */ +BEGIN_FTR_SECTION +BEGIN_FTR_SECTION + mr r4,r30 + mr r5,r7 + bl .hash_page_do_lazy_icache +END_FTR_SECTION_IFSET(CPU_FTR_NOEXECUTE) +END_FTR_SECTION_IFCLR(CPU_FTR_COHERENT_ICACHE) + + /* At this point, r3 contains new PP bits, save them in + * place of "access" in the param area (sic) + */ + std r3,STK_PARM(r4)(r1) + + /* Get htab_hash_mask */ + ld r4,htab_hash_mask@got(2) + ld r27,0(r4) /* htab_hash_mask -> r27 */ + + /* Check if we may already be in the hashtable, in this case, we + * go to out-of-line code to try to modify the HPTE + */ + andi. r0,r31,_PAGE_HASHPTE + bne htab_modify_pte + +htab_insert_pte: + /* Clear hpte bits in new pte (we also clear BUSY btw) and + * add _PAGE_HASHPTE + */ + lis r0,_PAGE_HPTEFLAGS@h + ori r0,r0,_PAGE_HPTEFLAGS@l + andc r30,r30,r0 + ori r30,r30,_PAGE_HASHPTE + + /* page number in r5 */ + rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT + + /* Calculate primary group hash */ + and r0,r28,r27 + rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ + + /* Call ppc_md.hpte_insert */ + ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ + mr r4,r29 /* Retreive va */ + li r6,0 /* primary slot */ + li r8,0 /* not bolted and not large */ + li r9,0 +_GLOBAL(htab_call_hpte_insert1) + bl . /* Will be patched by htab_finish_init() */ + cmpdi 0,r3,0 + bge htab_pte_insert_ok /* Insertion successful */ + cmpdi 0,r3,-2 /* Critical failure */ + beq- htab_pte_insert_failure + + /* Now try secondary slot */ + + /* page number in r5 */ + rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT + + /* Calculate secondary group hash */ + andc r0,r27,r28 + rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ + + /* Call ppc_md.hpte_insert */ + ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ + mr r4,r29 /* Retreive va */ + li r6,1 /* secondary slot */ + li r8,0 /* not bolted and not large */ + li r9,0 +_GLOBAL(htab_call_hpte_insert2) + bl . /* Will be patched by htab_finish_init() */ + cmpdi 0,r3,0 + bge+ htab_pte_insert_ok /* Insertion successful */ + cmpdi 0,r3,-2 /* Critical failure */ + beq- htab_pte_insert_failure + + /* Both are full, we need to evict something */ + mftb r0 + /* Pick a random group based on TB */ + andi. r0,r0,1 + mr r5,r28 + bne 2f + not r5,r5 +2: and r0,r5,r27 + rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ + /* Call ppc_md.hpte_remove */ +_GLOBAL(htab_call_hpte_remove) + bl . /* Will be patched by htab_finish_init() */ + + /* Try all again */ + b htab_insert_pte + +htab_pte_insert_ok: + /* Insert slot number & secondary bit in PTE */ + rldimi r30,r3,12,63-15 + + /* Write out the PTE with a normal write + * (maybe add eieio may be good still ?) + */ +htab_write_out_pte: + ld r6,STK_PARM(r6)(r1) + std r30,0(r6) + li r3, 0 +bail: + ld r27,STK_REG(r27)(r1) + ld r28,STK_REG(r28)(r1) + ld r29,STK_REG(r29)(r1) + ld r30,STK_REG(r30)(r1) + ld r31,STK_REG(r31)(r1) + addi r1,r1,STACKFRAMESIZE + ld r0,16(r1) + mtlr r0 + blr + +htab_modify_pte: + /* Keep PP bits in r4 and slot idx from the PTE around in r3 */ + mr r4,r3 + rlwinm r3,r31,32-12,29,31 + + /* Secondary group ? if yes, get a inverted hash value */ + mr r5,r28 + andi. r0,r31,_PAGE_SECONDARY + beq 1f + not r5,r5 +1: + /* Calculate proper slot value for ppc_md.hpte_updatepp */ + and r0,r5,r27 + rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */ + add r3,r0,r3 /* add slot idx */ + + /* Call ppc_md.hpte_updatepp */ + mr r5,r29 /* va */ + li r6,0 /* large is 0 */ + ld r7,STK_PARM(r8)(r1) /* get "local" param */ +_GLOBAL(htab_call_hpte_updatepp) + bl . /* Will be patched by htab_finish_init() */ + + /* if we failed because typically the HPTE wasn't really here + * we try an insertion. + */ + cmpdi 0,r3,-1 + beq- htab_insert_pte + + /* Clear the BUSY bit and Write out the PTE */ + li r0,_PAGE_BUSY + andc r30,r30,r0 + b htab_write_out_pte + +htab_wrong_access: + /* Bail out clearing reservation */ + stdcx. r31,0,r6 + li r3,1 + b bail + +htab_pte_insert_failure: + /* Bail out restoring old PTE */ + ld r6,STK_PARM(r6)(r1) + std r31,0(r6) + li r3,-1 + b bail + + diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c new file mode 100644 index 00000000000..144657e0c3d --- /dev/null +++ b/arch/ppc64/mm/hash_native.c @@ -0,0 +1,423 @@ +/* + * native hashtable management. + * + * SMP scalability work: + * Copyright (C) 2001 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define HPTE_LOCK_BIT 3 + +static DEFINE_SPINLOCK(native_tlbie_lock); + +static inline void native_lock_hpte(HPTE *hptep) +{ + unsigned long *word = &hptep->dw0.dword0; + + while (1) { + if (!test_and_set_bit(HPTE_LOCK_BIT, word)) + break; + while(test_bit(HPTE_LOCK_BIT, word)) + cpu_relax(); + } +} + +static inline void native_unlock_hpte(HPTE *hptep) +{ + unsigned long *word = &hptep->dw0.dword0; + + asm volatile("lwsync":::"memory"); + clear_bit(HPTE_LOCK_BIT, word); +} + +long native_hpte_insert(unsigned long hpte_group, unsigned long va, + unsigned long prpn, int secondary, + unsigned long hpteflags, int bolted, int large) +{ + unsigned long arpn = physRpn_to_absRpn(prpn); + HPTE *hptep = htab_address + hpte_group; + Hpte_dword0 dw0; + HPTE lhpte; + int i; + + for (i = 0; i < HPTES_PER_GROUP; i++) { + dw0 = hptep->dw0.dw0; + + if (!dw0.v) { + /* retry with lock held */ + native_lock_hpte(hptep); + dw0 = hptep->dw0.dw0; + if (!dw0.v) + break; + native_unlock_hpte(hptep); + } + + hptep++; + } + + if (i == HPTES_PER_GROUP) + return -1; + + lhpte.dw1.dword1 = 0; + lhpte.dw1.dw1.rpn = arpn; + lhpte.dw1.flags.flags = hpteflags; + + lhpte.dw0.dword0 = 0; + lhpte.dw0.dw0.avpn = va >> 23; + lhpte.dw0.dw0.h = secondary; + lhpte.dw0.dw0.bolted = bolted; + lhpte.dw0.dw0.v = 1; + + if (large) { + lhpte.dw0.dw0.l = 1; + lhpte.dw0.dw0.avpn &= ~0x1UL; + } + + hptep->dw1.dword1 = lhpte.dw1.dword1; + + /* Guarantee the second dword is visible before the valid bit */ + __asm__ __volatile__ ("eieio" : : : "memory"); + + /* + * Now set the first dword including the valid bit + * NOTE: this also unlocks the hpte + */ + hptep->dw0.dword0 = lhpte.dw0.dword0; + + __asm__ __volatile__ ("ptesync" : : : "memory"); + + return i | (secondary << 3); +} + +static long native_hpte_remove(unsigned long hpte_group) +{ + HPTE *hptep; + Hpte_dword0 dw0; + int i; + int slot_offset; + + /* pick a random entry to start at */ + slot_offset = mftb() & 0x7; + + for (i = 0; i < HPTES_PER_GROUP; i++) { + hptep = htab_address + hpte_group + slot_offset; + dw0 = hptep->dw0.dw0; + + if (dw0.v && !dw0.bolted) { + /* retry with lock held */ + native_lock_hpte(hptep); + dw0 = hptep->dw0.dw0; + if (dw0.v && !dw0.bolted) + break; + native_unlock_hpte(hptep); + } + + slot_offset++; + slot_offset &= 0x7; + } + + if (i == HPTES_PER_GROUP) + return -1; + + /* Invalidate the hpte. NOTE: this also unlocks it */ + hptep->dw0.dword0 = 0; + + return i; +} + +static inline void set_pp_bit(unsigned long pp, HPTE *addr) +{ + unsigned long old; + unsigned long *p = &addr->dw1.dword1; + + __asm__ __volatile__( + "1: ldarx %0,0,%3\n\ + rldimi %0,%2,0,61\n\ + stdcx. %0,0,%3\n\ + bne 1b" + : "=&r" (old), "=m" (*p) + : "r" (pp), "r" (p), "m" (*p) + : "cc"); +} + +/* + * Only works on small pages. Yes its ugly to have to check each slot in + * the group but we only use this during bootup. + */ +static long native_hpte_find(unsigned long vpn) +{ + HPTE *hptep; + unsigned long hash; + unsigned long i, j; + long slot; + Hpte_dword0 dw0; + + hash = hpt_hash(vpn, 0); + + for (j = 0; j < 2; j++) { + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + for (i = 0; i < HPTES_PER_GROUP; i++) { + hptep = htab_address + slot; + dw0 = hptep->dw0.dw0; + + if ((dw0.avpn == (vpn >> 11)) && dw0.v && + (dw0.h == j)) { + /* HPTE matches */ + if (j) + slot = -slot; + return slot; + } + ++slot; + } + hash = ~hash; + } + + return -1; +} + +static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long va, int large, int local) +{ + HPTE *hptep = htab_address + slot; + Hpte_dword0 dw0; + unsigned long avpn = va >> 23; + int ret = 0; + + if (large) + avpn &= ~0x1UL; + + native_lock_hpte(hptep); + + dw0 = hptep->dw0.dw0; + + /* Even if we miss, we need to invalidate the TLB */ + if ((dw0.avpn != avpn) || !dw0.v) { + native_unlock_hpte(hptep); + ret = -1; + } else { + set_pp_bit(newpp, hptep); + native_unlock_hpte(hptep); + } + + /* Ensure it is out of the tlb too */ + if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { + tlbiel(va); + } else { + int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + spin_lock(&native_tlbie_lock); + tlbie(va, large); + if (lock_tlbie) + spin_unlock(&native_tlbie_lock); + } + + return ret; +} + +/* + * Update the page protection bits. Intended to be used to create + * guard pages for kernel data structures on pages which are bolted + * in the HPT. Assumes pages being operated on will not be stolen. + * Does not work on large pages. + * + * No need to lock here because we should be the only user. + */ +static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) +{ + unsigned long vsid, va, vpn, flags = 0; + long slot; + HPTE *hptep; + int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0x0fffffff); + vpn = va >> PAGE_SHIFT; + + slot = native_hpte_find(vpn); + if (slot == -1) + panic("could not find page to bolt\n"); + hptep = htab_address + slot; + + set_pp_bit(newpp, hptep); + + /* Ensure it is out of the tlb too */ + if (lock_tlbie) + spin_lock_irqsave(&native_tlbie_lock, flags); + tlbie(va, 0); + if (lock_tlbie) + spin_unlock_irqrestore(&native_tlbie_lock, flags); +} + +static void native_hpte_invalidate(unsigned long slot, unsigned long va, + int large, int local) +{ + HPTE *hptep = htab_address + slot; + Hpte_dword0 dw0; + unsigned long avpn = va >> 23; + unsigned long flags; + int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + + if (large) + avpn &= ~0x1UL; + + local_irq_save(flags); + native_lock_hpte(hptep); + + dw0 = hptep->dw0.dw0; + + /* Even if we miss, we need to invalidate the TLB */ + if ((dw0.avpn != avpn) || !dw0.v) { + native_unlock_hpte(hptep); + } else { + /* Invalidate the hpte. NOTE: this also unlocks it */ + hptep->dw0.dword0 = 0; + } + + /* Invalidate the tlb */ + if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { + tlbiel(va); + } else { + if (lock_tlbie) + spin_lock(&native_tlbie_lock); + tlbie(va, large); + if (lock_tlbie) + spin_unlock(&native_tlbie_lock); + } + local_irq_restore(flags); +} + +static void native_flush_hash_range(unsigned long context, + unsigned long number, int local) +{ + unsigned long vsid, vpn, va, hash, secondary, slot, flags, avpn; + int i, j; + HPTE *hptep; + Hpte_dword0 dw0; + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + + /* XXX fix for large ptes */ + unsigned long large = 0; + + local_irq_save(flags); + + j = 0; + for (i = 0; i < number; i++) { + if ((batch->addr[i] >= USER_START) && + (batch->addr[i] <= USER_END)) + vsid = get_vsid(context, batch->addr[i]); + else + vsid = get_kernel_vsid(batch->addr[i]); + + va = (vsid << 28) | (batch->addr[i] & 0x0fffffff); + batch->vaddr[j] = va; + if (large) + vpn = va >> HPAGE_SHIFT; + else + vpn = va >> PAGE_SHIFT; + hash = hpt_hash(vpn, large); + secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15; + if (secondary) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12; + + hptep = htab_address + slot; + + avpn = va >> 23; + if (large) + avpn &= ~0x1UL; + + native_lock_hpte(hptep); + + dw0 = hptep->dw0.dw0; + + /* Even if we miss, we need to invalidate the TLB */ + if ((dw0.avpn != avpn) || !dw0.v) { + native_unlock_hpte(hptep); + } else { + /* Invalidate the hpte. NOTE: this also unlocks it */ + hptep->dw0.dword0 = 0; + } + + j++; + } + + if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { + asm volatile("ptesync":::"memory"); + + for (i = 0; i < j; i++) + __tlbiel(batch->vaddr[i]); + + asm volatile("ptesync":::"memory"); + } else { + int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + spin_lock(&native_tlbie_lock); + + asm volatile("ptesync":::"memory"); + + for (i = 0; i < j; i++) + __tlbie(batch->vaddr[i], 0); + + asm volatile("eieio; tlbsync; ptesync":::"memory"); + + if (lock_tlbie) + spin_unlock(&native_tlbie_lock); + } + + local_irq_restore(flags); +} + +#ifdef CONFIG_PPC_PSERIES +/* Disable TLB batching on nighthawk */ +static inline int tlb_batching_enabled(void) +{ + struct device_node *root = of_find_node_by_path("/"); + int enabled = 1; + + if (root) { + const char *model = get_property(root, "model", NULL); + if (model && !strcmp(model, "IBM,9076-N81")) + enabled = 0; + of_node_put(root); + } + + return enabled; +} +#else +static inline int tlb_batching_enabled(void) +{ + return 1; +} +#endif + +void hpte_init_native(void) +{ + ppc_md.hpte_invalidate = native_hpte_invalidate; + ppc_md.hpte_updatepp = native_hpte_updatepp; + ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp; + ppc_md.hpte_insert = native_hpte_insert; + ppc_md.hpte_remove = native_hpte_remove; + if (tlb_batching_enabled()) + ppc_md.flush_hash_range = native_flush_hash_range; + htab_finish_init(); +} diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c new file mode 100644 index 00000000000..e48be12f518 --- /dev/null +++ b/arch/ppc64/mm/hash_utils.c @@ -0,0 +1,439 @@ +/* + * PowerPC64 port by Mike Corrigan and Dave Engebretsen + * {mikejc|engebret}@us.ibm.com + * + * Copyright (c) 2000 Mike Corrigan + * + * SMP scalability work: + * Copyright (C) 2001 Anton Blanchard , IBM + * + * Module name: htab.c + * + * Description: + * PowerPC Hashed Page Table functions + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* + * Note: pte --> Linux PTE + * HPTE --> PowerPC Hashed Page Table Entry + * + * Execution context: + * htab_initialize is called with the MMU off (of course), but + * the kernel has been copied down to zero so it can directly + * reference global data. At this point it is very difficult + * to print debug info. + * + */ + +#ifdef CONFIG_U3_DART +extern unsigned long dart_tablebase; +#endif /* CONFIG_U3_DART */ + +HPTE *htab_address; +unsigned long htab_hash_mask; + +extern unsigned long _SDR1; + +#define KB (1024) +#define MB (1024*KB) + +static inline void loop_forever(void) +{ + volatile unsigned long x = 1; + for(;x;x|=1) + ; +} + +#ifdef CONFIG_PPC_MULTIPLATFORM +static inline void create_pte_mapping(unsigned long start, unsigned long end, + unsigned long mode, int large) +{ + unsigned long addr; + unsigned int step; + unsigned long tmp_mode; + + if (large) + step = 16*MB; + else + step = 4*KB; + + for (addr = start; addr < end; addr += step) { + unsigned long vpn, hash, hpteg; + unsigned long vsid = get_kernel_vsid(addr); + unsigned long va = (vsid << 28) | (addr & 0xfffffff); + int ret; + + if (large) + vpn = va >> HPAGE_SHIFT; + else + vpn = va >> PAGE_SHIFT; + + + tmp_mode = mode; + + /* Make non-kernel text non-executable */ + if (!in_kernel_text(addr)) + tmp_mode = mode | HW_NO_EXEC; + + hash = hpt_hash(vpn, large); + + hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + +#ifdef CONFIG_PPC_PSERIES + if (systemcfg->platform & PLATFORM_LPAR) + ret = pSeries_lpar_hpte_insert(hpteg, va, + virt_to_abs(addr) >> PAGE_SHIFT, + 0, tmp_mode, 1, large); + else +#endif /* CONFIG_PPC_PSERIES */ + ret = native_hpte_insert(hpteg, va, + virt_to_abs(addr) >> PAGE_SHIFT, + 0, tmp_mode, 1, large); + + if (ret == -1) { + ppc64_terminate_msg(0x20, "create_pte_mapping"); + loop_forever(); + } + } +} + +void __init htab_initialize(void) +{ + unsigned long table, htab_size_bytes; + unsigned long pteg_count; + unsigned long mode_rw; + int i, use_largepages = 0; + unsigned long base = 0, size = 0; + extern unsigned long tce_alloc_start, tce_alloc_end; + + DBG(" -> htab_initialize()\n"); + + /* + * Calculate the required size of the htab. We want the number of + * PTEGs to equal one half the number of real pages. + */ + htab_size_bytes = 1UL << ppc64_pft_size; + pteg_count = htab_size_bytes >> 7; + + /* For debug, make the HTAB 1/8 as big as it normally would be. */ + ifppcdebug(PPCDBG_HTABSIZE) { + pteg_count >>= 3; + htab_size_bytes = pteg_count << 7; + } + + htab_hash_mask = pteg_count - 1; + + if (systemcfg->platform & PLATFORM_LPAR) { + /* Using a hypervisor which owns the htab */ + htab_address = NULL; + _SDR1 = 0; + } else { + /* Find storage for the HPT. Must be contiguous in + * the absolute address space. + */ + table = lmb_alloc(htab_size_bytes, htab_size_bytes); + + DBG("Hash table allocated at %lx, size: %lx\n", table, + htab_size_bytes); + + if ( !table ) { + ppc64_terminate_msg(0x20, "hpt space"); + loop_forever(); + } + htab_address = abs_to_virt(table); + + /* htab absolute addr + encoded htabsize */ + _SDR1 = table + __ilog2(pteg_count) - 11; + + /* Initialize the HPT with no entries */ + memset((void *)table, 0, htab_size_bytes); + } + + mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX; + + /* On U3 based machines, we need to reserve the DART area and + * _NOT_ map it to avoid cache paradoxes as it's remapped non + * cacheable later on + */ + if (cpu_has_feature(CPU_FTR_16M_PAGE)) + use_largepages = 1; + + /* create bolted the linear mapping in the hash table */ + for (i=0; i < lmb.memory.cnt; i++) { + base = lmb.memory.region[i].physbase + KERNELBASE; + size = lmb.memory.region[i].size; + + DBG("creating mapping for region: %lx : %lx\n", base, size); + +#ifdef CONFIG_U3_DART + /* Do not map the DART space. Fortunately, it will be aligned + * in such a way that it will not cross two lmb regions and will + * fit within a single 16Mb page. + * The DART space is assumed to be a full 16Mb region even if we + * only use 2Mb of that space. We will use more of it later for + * AGP GART. We have to use a full 16Mb large page. + */ + DBG("DART base: %lx\n", dart_tablebase); + + if (dart_tablebase != 0 && dart_tablebase >= base + && dart_tablebase < (base + size)) { + if (base != dart_tablebase) + create_pte_mapping(base, dart_tablebase, mode_rw, + use_largepages); + if ((base + size) > (dart_tablebase + 16*MB)) + create_pte_mapping(dart_tablebase + 16*MB, base + size, + mode_rw, use_largepages); + continue; + } +#endif /* CONFIG_U3_DART */ + create_pte_mapping(base, base + size, mode_rw, use_largepages); + } + + /* + * If we have a memory_limit and we've allocated TCEs then we need to + * explicitly map the TCE area at the top of RAM. We also cope with the + * case that the TCEs start below memory_limit. + * tce_alloc_start/end are 16MB aligned so the mapping should work + * for either 4K or 16MB pages. + */ + if (tce_alloc_start) { + tce_alloc_start += KERNELBASE; + tce_alloc_end += KERNELBASE; + + if (base + size >= tce_alloc_start) + tce_alloc_start = base + size + 1; + + create_pte_mapping(tce_alloc_start, tce_alloc_end, + mode_rw, use_largepages); + } + + DBG(" <- htab_initialize()\n"); +} +#undef KB +#undef MB +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +/* + * Called by asm hashtable.S for doing lazy icache flush + */ +unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) +{ + struct page *page; + + if (!pfn_valid(pte_pfn(pte))) + return pp; + + page = pte_page(pte); + + /* page is dirty */ + if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { + if (trap == 0x400) { + __flush_dcache_icache(page_address(page)); + set_bit(PG_arch_1, &page->flags); + } else + pp |= HW_NO_EXEC; + } + return pp; +} + +/* Result code is: + * 0 - handled + * 1 - normal page fault + * -1 - critical hash insertion error + */ +int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +{ + void *pgdir; + unsigned long vsid; + struct mm_struct *mm; + pte_t *ptep; + int ret; + int user_region = 0; + int local = 0; + cpumask_t tmp; + + switch (REGION_ID(ea)) { + case USER_REGION_ID: + user_region = 1; + mm = current->mm; + if ((ea > USER_END) || (! mm)) + return 1; + + vsid = get_vsid(mm->context.id, ea); + break; + case IO_REGION_ID: + if (ea > IMALLOC_END) + return 1; + mm = &ioremap_mm; + vsid = get_kernel_vsid(ea); + break; + case VMALLOC_REGION_ID: + if (ea > VMALLOC_END) + return 1; + mm = &init_mm; + vsid = get_kernel_vsid(ea); + break; +#if 0 + case KERNEL_REGION_ID: + /* + * Should never get here - entire 0xC0... region is bolted. + * Send the problem up to do_page_fault + */ +#endif + default: + /* Not a valid range + * Send the problem up to do_page_fault + */ + return 1; + break; + } + + pgdir = mm->pgd; + + if (pgdir == NULL) + return 1; + + tmp = cpumask_of_cpu(smp_processor_id()); + if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) + local = 1; + + /* Is this a huge page ? */ + if (unlikely(in_hugepage_area(mm->context, ea))) + ret = hash_huge_page(mm, access, ea, vsid, local); + else { + ptep = find_linux_pte(pgdir, ea); + if (ptep == NULL) + return 1; + ret = __hash_page(ea, access, vsid, ptep, trap, local); + } + + return ret; +} + +void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte, + int local) +{ + unsigned long vsid, vpn, va, hash, secondary, slot; + unsigned long huge = pte_huge(pte); + + if ((ea >= USER_START) && (ea <= USER_END)) + vsid = get_vsid(context, ea); + else + vsid = get_kernel_vsid(ea); + + va = (vsid << 28) | (ea & 0x0fffffff); + if (huge) + vpn = va >> HPAGE_SHIFT; + else + vpn = va >> PAGE_SHIFT; + hash = hpt_hash(vpn, huge); + secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15; + if (secondary) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12; + + ppc_md.hpte_invalidate(slot, va, huge, local); +} + +void flush_hash_range(unsigned long context, unsigned long number, int local) +{ + if (ppc_md.flush_hash_range) { + ppc_md.flush_hash_range(context, number, local); + } else { + int i; + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + + for (i = 0; i < number; i++) + flush_hash_page(context, batch->addr[i], batch->pte[i], + local); + } +} + +static inline void make_bl(unsigned int *insn_addr, void *func) +{ + unsigned long funcp = *((unsigned long *)func); + int offset = funcp - (unsigned long)insn_addr; + + *insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc)); + flush_icache_range((unsigned long)insn_addr, 4+ + (unsigned long)insn_addr); +} + +/* + * low_hash_fault is called when we the low level hash code failed + * to instert a PTE due to an hypervisor error + */ +void low_hash_fault(struct pt_regs *regs, unsigned long address) +{ + if (user_mode(regs)) { + siginfo_t info; + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); + return; + } + bad_page_fault(regs, address, SIGBUS); +} + +void __init htab_finish_init(void) +{ + extern unsigned int *htab_call_hpte_insert1; + extern unsigned int *htab_call_hpte_insert2; + extern unsigned int *htab_call_hpte_remove; + extern unsigned int *htab_call_hpte_updatepp; + + make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert); + make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert); + make_bl(htab_call_hpte_remove, ppc_md.hpte_remove); + make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp); +} diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c new file mode 100644 index 00000000000..c62ddaff072 --- /dev/null +++ b/arch/ppc64/mm/hugetlbpage.c @@ -0,0 +1,904 @@ +/* + * PPC64 (POWER4) Huge TLB Page Support for Kernel. + * + * Copyright (C) 2003 David Gibson, IBM Corporation. + * + * Based on the IA-32 version: + * Copyright (C) 2002, Rohit Seth + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define HUGEPGDIR_SHIFT (HPAGE_SHIFT + PAGE_SHIFT - 3) +#define HUGEPGDIR_SIZE (1UL << HUGEPGDIR_SHIFT) +#define HUGEPGDIR_MASK (~(HUGEPGDIR_SIZE-1)) + +#define HUGEPTE_INDEX_SIZE 9 +#define HUGEPGD_INDEX_SIZE 10 + +#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE) +#define PTRS_PER_HUGEPGD (1 << HUGEPGD_INDEX_SIZE) + +static inline int hugepgd_index(unsigned long addr) +{ + return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT; +} + +static pgd_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr) +{ + int index; + + if (! mm->context.huge_pgdir) + return NULL; + + + index = hugepgd_index(addr); + BUG_ON(index >= PTRS_PER_HUGEPGD); + return mm->context.huge_pgdir + index; +} + +static inline pte_t *hugepte_offset(pgd_t *dir, unsigned long addr) +{ + int index; + + if (pgd_none(*dir)) + return NULL; + + index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE; + return (pte_t *)pgd_page(*dir) + index; +} + +static pgd_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr) +{ + BUG_ON(! in_hugepage_area(mm->context, addr)); + + if (! mm->context.huge_pgdir) { + pgd_t *new; + spin_unlock(&mm->page_table_lock); + /* Don't use pgd_alloc(), because we want __GFP_REPEAT */ + new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); + BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); + spin_lock(&mm->page_table_lock); + + /* + * Because we dropped the lock, we should re-check the + * entry, as somebody else could have populated it.. + */ + if (mm->context.huge_pgdir) + pgd_free(new); + else + mm->context.huge_pgdir = new; + } + return hugepgd_offset(mm, addr); +} + +static pte_t *hugepte_alloc(struct mm_struct *mm, pgd_t *dir, + unsigned long addr) +{ + if (! pgd_present(*dir)) { + pte_t *new; + + spin_unlock(&mm->page_table_lock); + new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); + BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); + spin_lock(&mm->page_table_lock); + /* + * Because we dropped the lock, we should re-check the + * entry, as somebody else could have populated it.. + */ + if (pgd_present(*dir)) { + if (new) + kmem_cache_free(zero_cache, new); + } else { + struct page *ptepage; + + if (! new) + return NULL; + ptepage = virt_to_page(new); + ptepage->mapping = (void *) mm; + ptepage->index = addr & HUGEPGDIR_MASK; + pgd_populate(mm, dir, new); + } + } + + return hugepte_offset(dir, addr); +} + +static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + + BUG_ON(! in_hugepage_area(mm->context, addr)); + + pgd = hugepgd_offset(mm, addr); + if (! pgd) + return NULL; + + return hugepte_offset(pgd, addr); +} + +static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + + BUG_ON(! in_hugepage_area(mm->context, addr)); + + pgd = hugepgd_alloc(mm, addr); + if (! pgd) + return NULL; + + return hugepte_alloc(mm, pgd, addr); +} + +static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, struct page *page, + pte_t *ptep, int write_access) +{ + pte_t entry; + + add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); + if (write_access) { + entry = + pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); + } else { + entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); + } + entry = pte_mkyoung(entry); + entry = pte_mkhuge(entry); + + set_pte_at(mm, addr, ptep, entry); +} + +/* + * This function checks for proper alignment of input addr and len parameters. + */ +int is_aligned_hugepage_range(unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + if (! (within_hugepage_low_range(addr, len) + || within_hugepage_high_range(addr, len)) ) + return -EINVAL; + return 0; +} + +static void flush_segments(void *parm) +{ + u16 segs = (unsigned long) parm; + unsigned long i; + + asm volatile("isync" : : : "memory"); + + for (i = 0; i < 16; i++) { + if (! (segs & (1U << i))) + continue; + asm volatile("slbie %0" : : "r" (i << SID_SHIFT)); + } + + asm volatile("isync" : : : "memory"); +} + +static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg) +{ + unsigned long start = seg << SID_SHIFT; + unsigned long end = (seg+1) << SID_SHIFT; + struct vm_area_struct *vma; + unsigned long addr; + struct mmu_gather *tlb; + + BUG_ON(seg >= 16); + + /* Check no VMAs are in the region */ + vma = find_vma(mm, start); + if (vma && (vma->vm_start < end)) + return -EBUSY; + + /* Clean up any leftover PTE pages in the region */ + spin_lock(&mm->page_table_lock); + tlb = tlb_gather_mmu(mm, 0); + for (addr = start; addr < end; addr += PMD_SIZE) { + pgd_t *pgd = pgd_offset(mm, addr); + pmd_t *pmd; + struct page *page; + pte_t *pte; + int i; + + if (pgd_none(*pgd)) + continue; + pmd = pmd_offset(pgd, addr); + if (!pmd || pmd_none(*pmd)) + continue; + if (pmd_bad(*pmd)) { + pmd_ERROR(*pmd); + pmd_clear(pmd); + continue; + } + pte = (pte_t *)pmd_page_kernel(*pmd); + /* No VMAs, so there should be no PTEs, check just in case. */ + for (i = 0; i < PTRS_PER_PTE; i++) { + BUG_ON(!pte_none(*pte)); + pte++; + } + page = pmd_page(*pmd); + pmd_clear(pmd); + mm->nr_ptes--; + dec_page_state(nr_page_table_pages); + pte_free_tlb(tlb, page); + } + tlb_finish_mmu(tlb, start, end); + spin_unlock(&mm->page_table_lock); + + return 0; +} + +static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs) +{ + unsigned long i; + + newsegs &= ~(mm->context.htlb_segs); + if (! newsegs) + return 0; /* The segments we want are already open */ + + for (i = 0; i < 16; i++) + if ((1 << i) & newsegs) + if (prepare_low_seg_for_htlb(mm, i) != 0) + return -EBUSY; + + mm->context.htlb_segs |= newsegs; + + /* update the paca copy of the context struct */ + get_paca()->context = mm->context; + + /* the context change must make it to memory before the flush, + * so that further SLB misses do the right thing. */ + mb(); + on_each_cpu(flush_segments, (void *)(unsigned long)newsegs, 0, 1); + + return 0; +} + +int prepare_hugepage_range(unsigned long addr, unsigned long len) +{ + if (within_hugepage_high_range(addr, len)) + return 0; + else if ((addr < 0x100000000UL) && ((addr+len) < 0x100000000UL)) { + int err; + /* Yes, we need both tests, in case addr+len overflows + * 64-bit arithmetic */ + err = open_low_hpage_segs(current->mm, + LOW_ESID_MASK(addr, len)); + if (err) + printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" + " failed (segs: 0x%04hx)\n", addr, len, + LOW_ESID_MASK(addr, len)); + return err; + } + + return -EINVAL; +} + +int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, + struct vm_area_struct *vma) +{ + pte_t *src_pte, *dst_pte, entry; + struct page *ptepage; + unsigned long addr = vma->vm_start; + unsigned long end = vma->vm_end; + int err = -ENOMEM; + + while (addr < end) { + dst_pte = huge_pte_alloc(dst, addr); + if (!dst_pte) + goto out; + + src_pte = huge_pte_offset(src, addr); + entry = *src_pte; + + ptepage = pte_page(entry); + get_page(ptepage); + add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); + set_pte_at(dst, addr, dst_pte, entry); + + addr += HPAGE_SIZE; + } + + err = 0; + out: + return err; +} + +int +follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, + struct page **pages, struct vm_area_struct **vmas, + unsigned long *position, int *length, int i) +{ + unsigned long vpfn, vaddr = *position; + int remainder = *length; + + WARN_ON(!is_vm_hugetlb_page(vma)); + + vpfn = vaddr/PAGE_SIZE; + while (vaddr < vma->vm_end && remainder) { + if (pages) { + pte_t *pte; + struct page *page; + + pte = huge_pte_offset(mm, vaddr); + + /* hugetlb should be locked, and hence, prefaulted */ + WARN_ON(!pte || pte_none(*pte)); + + page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; + + WARN_ON(!PageCompound(page)); + + get_page(page); + pages[i] = page; + } + + if (vmas) + vmas[i] = vma; + + vaddr += PAGE_SIZE; + ++vpfn; + --remainder; + ++i; + } + + *length = remainder; + *position = vaddr; + + return i; +} + +struct page * +follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) +{ + pte_t *ptep; + struct page *page; + + if (! in_hugepage_area(mm->context, address)) + return ERR_PTR(-EINVAL); + + ptep = huge_pte_offset(mm, address); + page = pte_page(*ptep); + if (page) + page += (address % HPAGE_SIZE) / PAGE_SIZE; + + return page; +} + +int pmd_huge(pmd_t pmd) +{ + return 0; +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + BUG(); + return NULL; +} + +void unmap_hugepage_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long addr; + pte_t *ptep; + struct page *page; + + WARN_ON(!is_vm_hugetlb_page(vma)); + BUG_ON((start % HPAGE_SIZE) != 0); + BUG_ON((end % HPAGE_SIZE) != 0); + + for (addr = start; addr < end; addr += HPAGE_SIZE) { + pte_t pte; + + ptep = huge_pte_offset(mm, addr); + if (!ptep || pte_none(*ptep)) + continue; + + pte = *ptep; + page = pte_page(pte); + pte_clear(mm, addr, ptep); + + put_page(page); + } + add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT)); + flush_tlb_pending(); +} + +void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev, + unsigned long start, unsigned long end) +{ + /* Because the huge pgtables are only 2 level, they can take + * at most around 4M, much less than one hugepage which the + * process is presumably entitled to use. So we don't bother + * freeing up the pagetables on unmap, and wait until + * destroy_context() to clean up the lot. */ +} + +int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) +{ + struct mm_struct *mm = current->mm; + unsigned long addr; + int ret = 0; + + WARN_ON(!is_vm_hugetlb_page(vma)); + BUG_ON((vma->vm_start % HPAGE_SIZE) != 0); + BUG_ON((vma->vm_end % HPAGE_SIZE) != 0); + + spin_lock(&mm->page_table_lock); + for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { + unsigned long idx; + pte_t *pte = huge_pte_alloc(mm, addr); + struct page *page; + + if (!pte) { + ret = -ENOMEM; + goto out; + } + if (! pte_none(*pte)) + continue; + + idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) + + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); + page = find_get_page(mapping, idx); + if (!page) { + /* charge the fs quota first */ + if (hugetlb_get_quota(mapping)) { + ret = -ENOMEM; + goto out; + } + page = alloc_huge_page(); + if (!page) { + hugetlb_put_quota(mapping); + ret = -ENOMEM; + goto out; + } + ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); + if (! ret) { + unlock_page(page); + } else { + hugetlb_put_quota(mapping); + free_huge_page(page); + goto out; + } + } + set_huge_pte(mm, vma, addr, page, pte, vma->vm_flags & VM_WRITE); + } +out: + spin_unlock(&mm->page_table_lock); + return ret; +} + +/* Because we have an exclusive hugepage region which lies within the + * normal user address space, we have to take special measures to make + * non-huge mmap()s evade the hugepage reserved regions. */ +unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr; + + if (len > TASK_SIZE) + return -ENOMEM; + + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (((TASK_SIZE - len) >= addr) + && (!vma || (addr+len) <= vma->vm_start) + && !is_hugepage_only_range(mm, addr,len)) + return addr; + } + start_addr = addr = mm->free_area_cache; + +full_search: + vma = find_vma(mm, addr); + while (TASK_SIZE - len >= addr) { + BUG_ON(vma && (addr >= vma->vm_end)); + + if (touches_hugepage_low_range(mm, addr, len)) { + addr = ALIGN(addr+1, 1<vm_start) { + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; + } + addr = vma->vm_end; + vma = vma->vm_next; + } + + /* Make sure we didn't miss any holes */ + if (start_addr != TASK_UNMAPPED_BASE) { + start_addr = addr = TASK_UNMAPPED_BASE; + goto full_search; + } + return -ENOMEM; +} + +/* + * This mmap-allocator allocates new areas top-down from below the + * stack's low limit (the base): + * + * Because we have an exclusive hugepage region which lies within the + * normal user address space, we have to take special measures to make + * non-huge mmap()s evade the hugepage reserved regions. + */ +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma, *prev_vma; + struct mm_struct *mm = current->mm; + unsigned long base = mm->mmap_base, addr = addr0; + int first_time = 1; + + /* requested length too big for entire address space */ + if (len > TASK_SIZE) + return -ENOMEM; + + /* dont allow allocations above current base */ + if (mm->free_area_cache > base) + mm->free_area_cache = base; + + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start) + && !is_hugepage_only_range(mm, addr,len)) + return addr; + } + +try_again: + /* make sure it can fit in the remaining address space */ + if (mm->free_area_cache < len) + goto fail; + + /* either no address requested or cant fit in requested address hole */ + addr = (mm->free_area_cache - len) & PAGE_MASK; + do { +hugepage_recheck: + if (touches_hugepage_low_range(mm, addr, len)) { + addr = (addr & ((~0) << SID_SHIFT)) - len; + goto hugepage_recheck; + } else if (touches_hugepage_high_range(addr, len)) { + addr = TASK_HPAGE_BASE - len; + } + + /* + * Lookup failure means no vma is above this address, + * i.e. return with success: + */ + if (!(vma = find_vma_prev(mm, addr, &prev_vma))) + return addr; + + /* + * new region fits between prev_vma->vm_end and + * vma->vm_start, use it: + */ + if (addr+len <= vma->vm_start && + (!prev_vma || (addr >= prev_vma->vm_end))) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + else + /* pull free_area_cache down to the first hole */ + if (mm->free_area_cache == vma->vm_end) + mm->free_area_cache = vma->vm_start; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start-len; + } while (len <= vma->vm_start); + +fail: + /* + * if hint left us with no space for the requested + * mapping then try again: + */ + if (first_time) { + mm->free_area_cache = base; + first_time = 0; + goto try_again; + } + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->free_area_cache = base; + + return addr; +} + +static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) +{ + unsigned long addr = 0; + struct vm_area_struct *vma; + + vma = find_vma(current->mm, addr); + while (addr + len <= 0x100000000UL) { + BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ + + if (! __within_hugepage_low_range(addr, len, segmask)) { + addr = ALIGN(addr+1, 1<mm, addr); + continue; + } + + if (!vma || (addr + len) <= vma->vm_start) + return addr; + addr = ALIGN(vma->vm_end, HPAGE_SIZE); + /* Depending on segmask this might not be a confirmed + * hugepage region, so the ALIGN could have skipped + * some VMAs */ + vma = find_vma(current->mm, addr); + } + + return -ENOMEM; +} + +static unsigned long htlb_get_high_area(unsigned long len) +{ + unsigned long addr = TASK_HPAGE_BASE; + struct vm_area_struct *vma; + + vma = find_vma(current->mm, addr); + for (vma = find_vma(current->mm, addr); + addr + len <= TASK_HPAGE_END; + vma = vma->vm_next) { + BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ + BUG_ON(! within_hugepage_high_range(addr, len)); + + if (!vma || (addr + len) <= vma->vm_start) + return addr; + addr = ALIGN(vma->vm_end, HPAGE_SIZE); + /* Because we're in a hugepage region, this alignment + * should not skip us over any VMAs */ + } + + return -ENOMEM; +} + +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + + if (!cpu_has_feature(CPU_FTR_16M_PAGE)) + return -EINVAL; + + if (test_thread_flag(TIF_32BIT)) { + int lastshift = 0; + u16 segmask, cursegs = current->mm->context.htlb_segs; + + /* First see if we can do the mapping in the existing + * low hpage segments */ + addr = htlb_get_low_area(len, cursegs); + if (addr != -ENOMEM) + return addr; + + for (segmask = LOW_ESID_MASK(0x100000000UL-len, len); + ! lastshift; segmask >>=1) { + if (segmask & 1) + lastshift = 1; + + addr = htlb_get_low_area(len, cursegs | segmask); + if ((addr != -ENOMEM) + && open_low_hpage_segs(current->mm, segmask) == 0) + return addr; + } + printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" + " enough segments\n"); + return -ENOMEM; + } else { + return htlb_get_high_area(len); + } +} + +void hugetlb_mm_free_pgd(struct mm_struct *mm) +{ + int i; + pgd_t *pgdir; + + spin_lock(&mm->page_table_lock); + + pgdir = mm->context.huge_pgdir; + if (! pgdir) + goto out; + + mm->context.huge_pgdir = NULL; + + /* cleanup any hugepte pages leftover */ + for (i = 0; i < PTRS_PER_HUGEPGD; i++) { + pgd_t *pgd = pgdir + i; + + if (! pgd_none(*pgd)) { + pte_t *pte = (pte_t *)pgd_page(*pgd); + struct page *ptepage = virt_to_page(pte); + + ptepage->mapping = NULL; + + BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE)); + kmem_cache_free(zero_cache, pte); + } + pgd_clear(pgd); + } + + BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE)); + kmem_cache_free(zero_cache, pgdir); + + out: + spin_unlock(&mm->page_table_lock); +} + +int hash_huge_page(struct mm_struct *mm, unsigned long access, + unsigned long ea, unsigned long vsid, int local) +{ + pte_t *ptep; + unsigned long va, vpn; + pte_t old_pte, new_pte; + unsigned long hpteflags, prpn; + long slot; + int err = 1; + + spin_lock(&mm->page_table_lock); + + ptep = huge_pte_offset(mm, ea); + + /* Search the Linux page table for a match with va */ + va = (vsid << 28) | (ea & 0x0fffffff); + vpn = va >> HPAGE_SHIFT; + + /* + * If no pte found or not present, send the problem up to + * do_page_fault + */ + if (unlikely(!ptep || pte_none(*ptep))) + goto out; + +/* BUG_ON(pte_bad(*ptep)); */ + + /* + * Check the user's access rights to the page. If access should be + * prevented then send the problem up to do_page_fault. + */ + if (unlikely(access & ~pte_val(*ptep))) + goto out; + /* + * At this point, we have a pte (old_pte) which can be used to build + * or update an HPTE. There are 2 cases: + * + * 1. There is a valid (present) pte with no associated HPTE (this is + * the most common case) + * 2. There is a valid (present) pte with an associated HPTE. The + * current values of the pp bits in the HPTE prevent access + * because we are doing software DIRTY bit management and the + * page is currently not DIRTY. + */ + + + old_pte = *ptep; + new_pte = old_pte; + + hpteflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); + /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ + hpteflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); + + /* Check if pte already has an hpte (case 2) */ + if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { + /* There MIGHT be an HPTE for this pte */ + unsigned long hash, slot; + + hash = hpt_hash(vpn, 1); + if (pte_val(old_pte) & _PAGE_SECONDARY) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; + + if (ppc_md.hpte_updatepp(slot, hpteflags, va, 1, local) == -1) + pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; + } + + if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { + unsigned long hash = hpt_hash(vpn, 1); + unsigned long hpte_group; + + prpn = pte_pfn(old_pte); + +repeat: + hpte_group = ((hash & htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + + /* Update the linux pte with the HPTE slot */ + pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; + pte_val(new_pte) |= _PAGE_HASHPTE; + + /* Add in WIMG bits */ + /* XXX We should store these in the pte */ + hpteflags |= _PAGE_COHERENT; + + slot = ppc_md.hpte_insert(hpte_group, va, prpn, 0, + hpteflags, 0, 1); + + /* Primary is full, try the secondary */ + if (unlikely(slot == -1)) { + pte_val(new_pte) |= _PAGE_SECONDARY; + hpte_group = ((~hash & htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + slot = ppc_md.hpte_insert(hpte_group, va, prpn, + 1, hpteflags, 0, 1); + if (slot == -1) { + if (mftb() & 0x1) + hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + + ppc_md.hpte_remove(hpte_group); + goto repeat; + } + } + + if (unlikely(slot == -2)) + panic("hash_huge_page: pte_insert failed\n"); + + pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; + + /* + * No need to use ldarx/stdcx here because all who + * might be updating the pte will hold the + * page_table_lock + */ + *ptep = new_pte; + } + + err = 0; + + out: + spin_unlock(&mm->page_table_lock); + + return err; +} diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c new file mode 100644 index 00000000000..9d92b0d9cde --- /dev/null +++ b/arch/ppc64/mm/imalloc.c @@ -0,0 +1,312 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +#include +#include +#include +#include + +static DECLARE_MUTEX(imlist_sem); +struct vm_struct * imlist = NULL; + +static int get_free_im_addr(unsigned long size, unsigned long *im_addr) +{ + unsigned long addr; + struct vm_struct **p, *tmp; + + addr = IMALLOC_START; + for (p = &imlist; (tmp = *p) ; p = &tmp->next) { + if (size + addr < (unsigned long) tmp->addr) + break; + if ((unsigned long)tmp->addr >= IMALLOC_START) + addr = tmp->size + (unsigned long) tmp->addr; + if (addr > IMALLOC_END-size) + return 1; + } + *im_addr = addr; + + return 0; +} + +/* Return whether the region described by v_addr and size is a subset + * of the region described by parent + */ +static inline int im_region_is_subset(unsigned long v_addr, unsigned long size, + struct vm_struct *parent) +{ + return (int) (v_addr >= (unsigned long) parent->addr && + v_addr < (unsigned long) parent->addr + parent->size && + size < parent->size); +} + +/* Return whether the region described by v_addr and size is a superset + * of the region described by child + */ +static int im_region_is_superset(unsigned long v_addr, unsigned long size, + struct vm_struct *child) +{ + struct vm_struct parent; + + parent.addr = (void *) v_addr; + parent.size = size; + + return im_region_is_subset((unsigned long) child->addr, child->size, + &parent); +} + +/* Return whether the region described by v_addr and size overlaps + * the region described by vm. Overlapping regions meet the + * following conditions: + * 1) The regions share some part of the address space + * 2) The regions aren't identical + * 3) Neither region is a subset of the other + */ +static int im_region_overlaps(unsigned long v_addr, unsigned long size, + struct vm_struct *vm) +{ + if (im_region_is_superset(v_addr, size, vm)) + return 0; + + return (v_addr + size > (unsigned long) vm->addr + vm->size && + v_addr < (unsigned long) vm->addr + vm->size) || + (v_addr < (unsigned long) vm->addr && + v_addr + size > (unsigned long) vm->addr); +} + +/* Determine imalloc status of region described by v_addr and size. + * Can return one of the following: + * IM_REGION_UNUSED - Entire region is unallocated in imalloc space. + * IM_REGION_SUBSET - Region is a subset of a region that is already + * allocated in imalloc space. + * vm will be assigned to a ptr to the parent region. + * IM_REGION_EXISTS - Exact region already allocated in imalloc space. + * vm will be assigned to a ptr to the existing imlist + * member. + * IM_REGION_OVERLAPS - Region overlaps an allocated region in imalloc space. + * IM_REGION_SUPERSET - Region is a superset of a region that is already + * allocated in imalloc space. + */ +static int im_region_status(unsigned long v_addr, unsigned long size, + struct vm_struct **vm) +{ + struct vm_struct *tmp; + + for (tmp = imlist; tmp; tmp = tmp->next) + if (v_addr < (unsigned long) tmp->addr + tmp->size) + break; + + if (tmp) { + if (im_region_overlaps(v_addr, size, tmp)) + return IM_REGION_OVERLAP; + + *vm = tmp; + if (im_region_is_subset(v_addr, size, tmp)) { + /* Return with tmp pointing to superset */ + return IM_REGION_SUBSET; + } + if (im_region_is_superset(v_addr, size, tmp)) { + /* Return with tmp pointing to first subset */ + return IM_REGION_SUPERSET; + } + else if (v_addr == (unsigned long) tmp->addr && + size == tmp->size) { + /* Return with tmp pointing to exact region */ + return IM_REGION_EXISTS; + } + } + + *vm = NULL; + return IM_REGION_UNUSED; +} + +static struct vm_struct * split_im_region(unsigned long v_addr, + unsigned long size, struct vm_struct *parent) +{ + struct vm_struct *vm1 = NULL; + struct vm_struct *vm2 = NULL; + struct vm_struct *new_vm = NULL; + + vm1 = (struct vm_struct *) kmalloc(sizeof(*vm1), GFP_KERNEL); + if (vm1 == NULL) { + printk(KERN_ERR "%s() out of memory\n", __FUNCTION__); + return NULL; + } + + if (v_addr == (unsigned long) parent->addr) { + /* Use existing parent vm_struct to represent child, allocate + * new one for the remainder of parent range + */ + vm1->size = parent->size - size; + vm1->addr = (void *) (v_addr + size); + vm1->next = parent->next; + + parent->size = size; + parent->next = vm1; + new_vm = parent; + } else if (v_addr + size == (unsigned long) parent->addr + + parent->size) { + /* Allocate new vm_struct to represent child, use existing + * parent one for remainder of parent range + */ + vm1->size = size; + vm1->addr = (void *) v_addr; + vm1->next = parent->next; + new_vm = vm1; + + parent->size -= size; + parent->next = vm1; + } else { + /* Allocate two new vm_structs for the new child and + * uppermost remainder, and use existing parent one for the + * lower remainder of parent range + */ + vm2 = (struct vm_struct *) kmalloc(sizeof(*vm2), GFP_KERNEL); + if (vm2 == NULL) { + printk(KERN_ERR "%s() out of memory\n", __FUNCTION__); + kfree(vm1); + return NULL; + } + + vm1->size = size; + vm1->addr = (void *) v_addr; + vm1->next = vm2; + new_vm = vm1; + + vm2->size = ((unsigned long) parent->addr + parent->size) - + (v_addr + size); + vm2->addr = (void *) v_addr + size; + vm2->next = parent->next; + + parent->size = v_addr - (unsigned long) parent->addr; + parent->next = vm1; + } + + return new_vm; +} + +static struct vm_struct * __add_new_im_area(unsigned long req_addr, + unsigned long size) +{ + struct vm_struct **p, *tmp, *area; + + for (p = &imlist; (tmp = *p) ; p = &tmp->next) { + if (req_addr + size <= (unsigned long)tmp->addr) + break; + } + + area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL); + if (!area) + return NULL; + area->flags = 0; + area->addr = (void *)req_addr; + area->size = size; + area->next = *p; + *p = area; + + return area; +} + +static struct vm_struct * __im_get_area(unsigned long req_addr, + unsigned long size, + int criteria) +{ + struct vm_struct *tmp; + int status; + + status = im_region_status(req_addr, size, &tmp); + if ((criteria & status) == 0) { + return NULL; + } + + switch (status) { + case IM_REGION_UNUSED: + tmp = __add_new_im_area(req_addr, size); + break; + case IM_REGION_SUBSET: + tmp = split_im_region(req_addr, size, tmp); + break; + case IM_REGION_EXISTS: + /* Return requested region */ + break; + case IM_REGION_SUPERSET: + /* Return first existing subset of requested region */ + break; + default: + printk(KERN_ERR "%s() unexpected imalloc region status\n", + __FUNCTION__); + tmp = NULL; + } + + return tmp; +} + +struct vm_struct * im_get_free_area(unsigned long size) +{ + struct vm_struct *area; + unsigned long addr; + + down(&imlist_sem); + if (get_free_im_addr(size, &addr)) { + printk(KERN_ERR "%s() cannot obtain addr for size 0x%lx\n", + __FUNCTION__, size); + area = NULL; + goto next_im_done; + } + + area = __im_get_area(addr, size, IM_REGION_UNUSED); + if (area == NULL) { + printk(KERN_ERR + "%s() cannot obtain area for addr 0x%lx size 0x%lx\n", + __FUNCTION__, addr, size); + } +next_im_done: + up(&imlist_sem); + return area; +} + +struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, + int criteria) +{ + struct vm_struct *area; + + down(&imlist_sem); + area = __im_get_area(v_addr, size, criteria); + up(&imlist_sem); + return area; +} + +unsigned long im_free(void * addr) +{ + struct vm_struct **p, *tmp; + unsigned long ret_size = 0; + + if (!addr) + return ret_size; + if ((PAGE_SIZE-1) & (unsigned long) addr) { + printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__, addr); + return ret_size; + } + down(&imlist_sem); + for (p = &imlist ; (tmp = *p) ; p = &tmp->next) { + if (tmp->addr == addr) { + ret_size = tmp->size; + *p = tmp->next; + kfree(tmp); + up(&imlist_sem); + return ret_size; + } + } + up(&imlist_sem); + printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__, + addr); + return ret_size; +} diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c new file mode 100644 index 00000000000..23813d03e1c --- /dev/null +++ b/arch/ppc64/mm/init.c @@ -0,0 +1,927 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int mem_init_done; +unsigned long ioremap_bot = IMALLOC_BASE; +static unsigned long phbs_io_bot = PHBS_IO_BASE; + +extern pgd_t swapper_pg_dir[]; +extern struct task_struct *current_set[NR_CPUS]; + +extern pgd_t ioremap_dir[]; +pgd_t * ioremap_pgd = (pgd_t *)&ioremap_dir; + +unsigned long klimit = (unsigned long)_end; + +unsigned long _SDR1=0; +unsigned long _ASR=0; + +/* max amount of RAM to use */ +unsigned long __max_memory; + +/* info on what we think the IO hole is */ +unsigned long io_hole_start; +unsigned long io_hole_size; + +void show_mem(void) +{ + unsigned long total = 0, reserved = 0; + unsigned long shared = 0, cached = 0; + struct page *page; + pg_data_t *pgdat; + unsigned long i; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + for_each_pgdat(pgdat) { + for (i = 0; i < pgdat->node_spanned_pages; i++) { + page = pgdat->node_mem_map + i; + total++; + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (page_count(page)) + shared += page_count(page) - 1; + } + } + printk("%ld pages of RAM\n", total); + printk("%ld reserved pages\n", reserved); + printk("%ld pages shared\n", shared); + printk("%ld pages swap cached\n", cached); +} + +#ifdef CONFIG_PPC_ISERIES + +void __iomem *ioremap(unsigned long addr, unsigned long size) +{ + return (void __iomem *)addr; +} + +extern void __iomem *__ioremap(unsigned long addr, unsigned long size, + unsigned long flags) +{ + return (void __iomem *)addr; +} + +void iounmap(volatile void __iomem *addr) +{ + return; +} + +#else + +/* + * map_io_page currently only called by __ioremap + * map_io_page adds an entry to the ioremap page table + * and adds an entry to the HPT, possibly bolting it + */ +static void map_io_page(unsigned long ea, unsigned long pa, int flags) +{ + pgd_t *pgdp; + pmd_t *pmdp; + pte_t *ptep; + unsigned long vsid; + + if (mem_init_done) { + spin_lock(&ioremap_mm.page_table_lock); + pgdp = pgd_offset_i(ea); + pmdp = pmd_alloc(&ioremap_mm, pgdp, ea); + ptep = pte_alloc_kernel(&ioremap_mm, pmdp, ea); + + pa = abs_to_phys(pa); + set_pte_at(&ioremap_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); + spin_unlock(&ioremap_mm.page_table_lock); + } else { + unsigned long va, vpn, hash, hpteg; + + /* + * If the mm subsystem is not fully up, we cannot create a + * linux page table entry for this mapping. Simply bolt an + * entry in the hardware page table. + */ + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0xFFFFFFF); + vpn = va >> PAGE_SHIFT; + + hash = hpt_hash(vpn, 0); + + hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + + /* Panic if a pte grpup is full */ + if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, 0, + _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX, + 1, 0) == -1) { + panic("map_io_page: could not insert mapping"); + } + } +} + + +static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa, + unsigned long ea, unsigned long size, + unsigned long flags) +{ + unsigned long i; + + if ((flags & _PAGE_PRESENT) == 0) + flags |= pgprot_val(PAGE_KERNEL); + if (flags & (_PAGE_NO_CACHE | _PAGE_WRITETHRU)) + flags |= _PAGE_GUARDED; + + for (i = 0; i < size; i += PAGE_SIZE) { + map_io_page(ea+i, pa+i, flags); + } + + return (void __iomem *) (ea + (addr & ~PAGE_MASK)); +} + + +void __iomem * +ioremap(unsigned long addr, unsigned long size) +{ + return __ioremap(addr, size, _PAGE_NO_CACHE); +} + +void __iomem * +__ioremap(unsigned long addr, unsigned long size, unsigned long flags) +{ + unsigned long pa, ea; + + /* + * Choose an address to map it to. + * Once the imalloc system is running, we use it. + * Before that, we map using addresses going + * up from ioremap_bot. imalloc will use + * the addresses from ioremap_bot through + * IMALLOC_END (0xE000001fffffffff) + * + */ + pa = addr & PAGE_MASK; + size = PAGE_ALIGN(addr + size) - pa; + + if (size == 0) + return NULL; + + if (mem_init_done) { + struct vm_struct *area; + area = im_get_free_area(size); + if (area == NULL) + return NULL; + ea = (unsigned long)(area->addr); + } else { + ea = ioremap_bot; + ioremap_bot += size; + } + + return __ioremap_com(addr, pa, ea, size, flags); +} + +#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK)) + +int __ioremap_explicit(unsigned long pa, unsigned long ea, + unsigned long size, unsigned long flags) +{ + struct vm_struct *area; + + /* For now, require page-aligned values for pa, ea, and size */ + if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) || + !IS_PAGE_ALIGNED(size)) { + printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__); + return 1; + } + + if (!mem_init_done) { + /* Two things to consider in this case: + * 1) No records will be kept (imalloc, etc) that the region + * has been remapped + * 2) It won't be easy to iounmap() the region later (because + * of 1) + */ + ; + } else { + area = im_get_area(ea, size, + IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS); + if (area == NULL) { + /* Expected when PHB-dlpar is in play */ + return 1; + } + if (ea != (unsigned long) area->addr) { + printk(KERN_ERR "unexpected addr return from im_get_area\n"); + return 1; + } + } + + if (__ioremap_com(pa, pa, ea, size, flags) != (void *) ea) { + printk(KERN_ERR "__ioremap_com() returned unexpected addr\n"); + return 1; + } + + return 0; +} + +static void unmap_im_area_pte(pmd_t *pmd, unsigned long address, + unsigned long size) +{ + unsigned long base, end; + pte_t *pte; + + if (pmd_none(*pmd)) + return; + if (pmd_bad(*pmd)) { + pmd_ERROR(*pmd); + pmd_clear(pmd); + return; + } + + pte = pte_offset_kernel(pmd, address); + base = address & PMD_MASK; + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) + end = PMD_SIZE; + + do { + pte_t page; + page = ptep_get_and_clear(&ioremap_mm, base + address, pte); + address += PAGE_SIZE; + pte++; + if (pte_none(page)) + continue; + if (pte_present(page)) + continue; + printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n"); + } while (address < end); +} + +static void unmap_im_area_pmd(pgd_t *dir, unsigned long address, + unsigned long size) +{ + unsigned long base, end; + pmd_t *pmd; + + if (pgd_none(*dir)) + return; + if (pgd_bad(*dir)) { + pgd_ERROR(*dir); + pgd_clear(dir); + return; + } + + pmd = pmd_offset(dir, address); + base = address & PGDIR_MASK; + address &= ~PGDIR_MASK; + end = address + size; + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + + do { + unmap_im_area_pte(pmd, base + address, end - address); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address < end); +} + +/* + * Unmap an IO region and remove it from imalloc'd list. + * Access to IO memory should be serialized by driver. + * This code is modeled after vmalloc code - unmap_vm_area() + * + * XXX what about calls before mem_init_done (ie python_countermeasures()) + */ +void iounmap(volatile void __iomem *token) +{ + unsigned long address, start, end, size; + struct mm_struct *mm; + pgd_t *dir; + void *addr; + + if (!mem_init_done) { + return; + } + + addr = (void *) ((unsigned long __force) token & PAGE_MASK); + + if ((size = im_free(addr)) == 0) { + return; + } + + address = (unsigned long)addr; + start = address; + end = address + size; + + mm = &ioremap_mm; + spin_lock(&mm->page_table_lock); + + dir = pgd_offset_i(address); + flush_cache_vunmap(address, end); + do { + unmap_im_area_pmd(dir, address, end - address); + address = (address + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } while (address && (address < end)); + flush_tlb_kernel_range(start, end); + + spin_unlock(&mm->page_table_lock); + return; +} + +static int iounmap_subset_regions(unsigned long addr, unsigned long size) +{ + struct vm_struct *area; + + /* Check whether subsets of this region exist */ + area = im_get_area(addr, size, IM_REGION_SUPERSET); + if (area == NULL) + return 1; + + while (area) { + iounmap((void __iomem *) area->addr); + area = im_get_area(addr, size, + IM_REGION_SUPERSET); + } + + return 0; +} + +int iounmap_explicit(volatile void __iomem *start, unsigned long size) +{ + struct vm_struct *area; + unsigned long addr; + int rc; + + addr = (unsigned long __force) start & PAGE_MASK; + + /* Verify that the region either exists or is a subset of an existing + * region. In the latter case, split the parent region to create + * the exact region + */ + area = im_get_area(addr, size, + IM_REGION_EXISTS | IM_REGION_SUBSET); + if (area == NULL) { + /* Determine whether subset regions exist. If so, unmap */ + rc = iounmap_subset_regions(addr, size); + if (rc) { + printk(KERN_ERR + "%s() cannot unmap nonexistent range 0x%lx\n", + __FUNCTION__, addr); + return 1; + } + } else { + iounmap((void __iomem *) area->addr); + } + /* + * FIXME! This can't be right: + iounmap(area->addr); + * Maybe it should be "iounmap(area);" + */ + return 0; +} + +#endif + +EXPORT_SYMBOL(ioremap); +EXPORT_SYMBOL(__ioremap); +EXPORT_SYMBOL(iounmap); + +void free_initmem(void) +{ + unsigned long addr; + + addr = (unsigned long)__init_begin; + for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + set_page_count(virt_to_page(addr), 1); + free_page(addr); + totalram_pages++; + } + printk ("Freeing unused kernel memory: %luk freed\n", + ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} +#endif + +static DEFINE_SPINLOCK(mmu_context_lock); +static DEFINE_IDR(mmu_context_idr); + +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + int index; + int err; + +#ifdef CONFIG_HUGETLB_PAGE + /* We leave htlb_segs as it was, but for a fork, we need to + * clear the huge_pgdir. */ + mm->context.huge_pgdir = NULL; +#endif + +again: + if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&mmu_context_lock); + err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); + spin_unlock(&mmu_context_lock); + + if (err == -EAGAIN) + goto again; + else if (err) + return err; + + if (index > MAX_CONTEXT) { + idr_remove(&mmu_context_idr, index); + return -ENOMEM; + } + + mm->context.id = index; + + return 0; +} + +void destroy_context(struct mm_struct *mm) +{ + spin_lock(&mmu_context_lock); + idr_remove(&mmu_context_idr, mm->context.id); + spin_unlock(&mmu_context_lock); + + mm->context.id = NO_CONTEXT; + + hugetlb_mm_free_pgd(mm); +} + +/* + * Do very early mm setup. + */ +void __init mm_init_ppc64(void) +{ +#ifndef CONFIG_PPC_ISERIES + unsigned long i; +#endif + + ppc64_boot_msg(0x100, "MM Init"); + + /* This is the story of the IO hole... please, keep seated, + * unfortunately, we are out of oxygen masks at the moment. + * So we need some rough way to tell where your big IO hole + * is. On pmac, it's between 2G and 4G, on POWER3, it's around + * that area as well, on POWER4 we don't have one, etc... + * We need that as a "hint" when sizing the TCE table on POWER3 + * So far, the simplest way that seem work well enough for us it + * to just assume that the first discontinuity in our physical + * RAM layout is the IO hole. That may not be correct in the future + * (and isn't on iSeries but then we don't care ;) + */ + +#ifndef CONFIG_PPC_ISERIES + for (i = 1; i < lmb.memory.cnt; i++) { + unsigned long base, prevbase, prevsize; + + prevbase = lmb.memory.region[i-1].physbase; + prevsize = lmb.memory.region[i-1].size; + base = lmb.memory.region[i].physbase; + if (base > (prevbase + prevsize)) { + io_hole_start = prevbase + prevsize; + io_hole_size = base - (prevbase + prevsize); + break; + } + } +#endif /* CONFIG_PPC_ISERIES */ + if (io_hole_start) + printk("IO Hole assumed to be %lx -> %lx\n", + io_hole_start, io_hole_start + io_hole_size - 1); + + ppc64_boot_msg(0x100, "MM Init Done"); +} + +/* + * This is called by /dev/mem to know if a given address has to + * be mapped non-cacheable or not + */ +int page_is_ram(unsigned long pfn) +{ + int i; + unsigned long paddr = (pfn << PAGE_SHIFT); + + for (i=0; i < lmb.memory.cnt; i++) { + unsigned long base; + +#ifdef CONFIG_MSCHUNKS + base = lmb.memory.region[i].physbase; +#else + base = lmb.memory.region[i].base; +#endif + if ((paddr >= base) && + (paddr < (base + lmb.memory.region[i].size))) { + return 1; + } + } + + return 0; +} +EXPORT_SYMBOL(page_is_ram); + +/* + * Initialize the bootmem system and give it all the memory we + * have available. + */ +#ifndef CONFIG_DISCONTIGMEM +void __init do_init_bootmem(void) +{ + unsigned long i; + unsigned long start, bootmap_pages; + unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT; + int boot_mapsize; + + /* + * Find an area to use for the bootmem bitmap. Calculate the size of + * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE. + * Add 1 additional page in case the address isn't page-aligned. + */ + bootmap_pages = bootmem_bootmap_pages(total_pages); + + start = abs_to_phys(lmb_alloc(bootmap_pages<> PAGE_SHIFT, total_pages); + + max_pfn = max_low_pfn; + + /* add all physical memory to the bootmem map. Also find the first */ + for (i=0; i < lmb.memory.cnt; i++) { + unsigned long physbase, size; + + physbase = lmb.memory.region[i].physbase; + size = lmb.memory.region[i].size; + free_bootmem(physbase, size); + } + + /* reserve the sections we're already using */ + for (i=0; i < lmb.reserved.cnt; i++) { + unsigned long physbase = lmb.reserved.region[i].physbase; + unsigned long size = lmb.reserved.region[i].size; + + reserve_bootmem(physbase, size); + } +} + +/* + * paging_init() sets up the page tables - in fact we've already done this. + */ +void __init paging_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES]; + unsigned long zholes_size[MAX_NR_ZONES]; + unsigned long total_ram = lmb_phys_mem_size(); + unsigned long top_of_ram = lmb_end_of_DRAM(); + + printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", + top_of_ram, total_ram); + printk(KERN_INFO "Memory hole size: %ldMB\n", + (top_of_ram - total_ram) >> 20); + /* + * All pages are DMA-able so we put them all in the DMA zone. + */ + memset(zones_size, 0, sizeof(zones_size)); + memset(zholes_size, 0, sizeof(zholes_size)); + + zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; + zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT; + + free_area_init_node(0, &contig_page_data, zones_size, + __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); +} +#endif /* CONFIG_DISCONTIGMEM */ + +static struct kcore_list kcore_vmem; + +static int __init setup_kcore(void) +{ + int i; + + for (i=0; i < lmb.memory.cnt; i++) { + unsigned long physbase, size; + struct kcore_list *kcore_mem; + + physbase = lmb.memory.region[i].physbase; + size = lmb.memory.region[i].size; + + /* GFP_ATOMIC to avoid might_sleep warnings during boot */ + kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC); + if (!kcore_mem) + panic("mem_init: kmalloc failed\n"); + + kclist_add(kcore_mem, __va(physbase), size); + } + + kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); + + return 0; +} +module_init(setup_kcore); + +void __init mem_init(void) +{ +#ifdef CONFIG_DISCONTIGMEM + int nid; +#endif + pg_data_t *pgdat; + unsigned long i; + struct page *page; + unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; + + num_physpages = max_low_pfn; /* RAM is assumed contiguous */ + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + +#ifdef CONFIG_DISCONTIGMEM + for_each_online_node(nid) { + if (NODE_DATA(nid)->node_spanned_pages != 0) { + printk("freeing bootmem node %x\n", nid); + totalram_pages += + free_all_bootmem_node(NODE_DATA(nid)); + } + } +#else + max_mapnr = num_physpages; + totalram_pages += free_all_bootmem(); +#endif + + for_each_pgdat(pgdat) { + for (i = 0; i < pgdat->node_spanned_pages; i++) { + page = pgdat->node_mem_map + i; + if (PageReserved(page)) + reservedpages++; + } + } + + codesize = (unsigned long)&_etext - (unsigned long)&_stext; + initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; + datasize = (unsigned long)&_edata - (unsigned long)&__init_end; + bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; + + printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " + "%luk reserved, %luk data, %luk bss, %luk init)\n", + (unsigned long)nr_free_pages() << (PAGE_SHIFT-10), + num_physpages << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + bsssize >> 10, + initsize >> 10); + + mem_init_done = 1; + +#ifdef CONFIG_PPC_ISERIES + iommu_vio_init(); +#endif + /* Initialize the vDSO */ + vdso_init(); +} + +/* + * This is called when a page has been modified by the kernel. + * It just marks the page as not i-cache clean. We do the i-cache + * flush later when the page is given to a user process, if necessary. + */ +void flush_dcache_page(struct page *page) +{ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* avoid an atomic op if possible */ + if (test_bit(PG_arch_1, &page->flags)) + clear_bit(PG_arch_1, &page->flags); +} +EXPORT_SYMBOL(flush_dcache_page); + +void clear_user_page(void *page, unsigned long vaddr, struct page *pg) +{ + clear_page(page); + + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* + * We shouldnt have to do this, but some versions of glibc + * require it (ld.so assumes zero filled pages are icache clean) + * - Anton + */ + + /* avoid an atomic op if possible */ + if (test_bit(PG_arch_1, &pg->flags)) + clear_bit(PG_arch_1, &pg->flags); +} +EXPORT_SYMBOL(clear_user_page); + +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *pg) +{ + copy_page(vto, vfrom); + + /* + * We should be able to use the following optimisation, however + * there are two problems. + * Firstly a bug in some versions of binutils meant PLT sections + * were not marked executable. + * Secondly the first word in the GOT section is blrl, used + * to establish the GOT address. Until recently the GOT was + * not marked executable. + * - Anton + */ +#if 0 + if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) + return; +#endif + + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + + /* avoid an atomic op if possible */ + if (test_bit(PG_arch_1, &pg->flags)) + clear_bit(PG_arch_1, &pg->flags); +} + +void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + unsigned long maddr; + + maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK); + flush_icache_range(maddr, maddr + len); +} +EXPORT_SYMBOL(flush_icache_user_range); + +/* + * This is called at the end of handling a user page fault, when the + * fault has been handled by updating a PTE in the linux page tables. + * We use it to preload an HPTE into the hash table corresponding to + * the updated linux PTE. + * + * This must always be called with the mm->page_table_lock held + */ +void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea, + pte_t pte) +{ + unsigned long vsid; + void *pgdir; + pte_t *ptep; + int local = 0; + cpumask_t tmp; + unsigned long flags; + + /* handle i-cache coherency */ + if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && + !cpu_has_feature(CPU_FTR_NOEXECUTE)) { + unsigned long pfn = pte_pfn(pte); + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + if (!PageReserved(page) + && !test_bit(PG_arch_1, &page->flags)) { + __flush_dcache_icache(page_address(page)); + set_bit(PG_arch_1, &page->flags); + } + } + } + + /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ + if (!pte_young(pte)) + return; + + pgdir = vma->vm_mm->pgd; + if (pgdir == NULL) + return; + + ptep = find_linux_pte(pgdir, ea); + if (!ptep) + return; + + vsid = get_vsid(vma->vm_mm->context.id, ea); + + local_irq_save(flags); + tmp = cpumask_of_cpu(smp_processor_id()); + if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) + local = 1; + + __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep, + 0x300, local); + local_irq_restore(flags); +} + +void __iomem * reserve_phb_iospace(unsigned long size) +{ + void __iomem *virt_addr; + + if (phbs_io_bot >= IMALLOC_BASE) + panic("reserve_phb_iospace(): phb io space overflow\n"); + + virt_addr = (void __iomem *) phbs_io_bot; + phbs_io_bot += size; + + return virt_addr; +} + +kmem_cache_t *zero_cache; + +static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags) +{ + memset(pte, 0, PAGE_SIZE); +} + +void pgtable_cache_init(void) +{ + zero_cache = kmem_cache_create("zero", + PAGE_SIZE, + 0, + SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, + zero_ctor, + NULL); + if (!zero_cache) + panic("pgtable_cache_init(): could not create zero_cache!\n"); +} + +pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, + unsigned long size, pgprot_t vma_prot) +{ + if (ppc_md.phys_mem_access_prot) + return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); + + if (!page_is_ram(addr >> PAGE_SHIFT)) + vma_prot = __pgprot(pgprot_val(vma_prot) + | _PAGE_GUARDED | _PAGE_NO_CACHE); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); diff --git a/arch/ppc64/mm/mmap.c b/arch/ppc64/mm/mmap.c new file mode 100644 index 00000000000..fe65f522aff --- /dev/null +++ b/arch/ppc64/mm/mmap.c @@ -0,0 +1,86 @@ +/* + * linux/arch/ppc64/mm/mmap.c + * + * flexible mmap layout support + * + * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * Started by Ingo Molnar + */ + +#include +#include + +/* + * Top of mmap area (just below the process stack). + * + * Leave an at least ~128 MB hole. + */ +#define MIN_GAP (128*1024*1024) +#define MAX_GAP (TASK_SIZE/6*5) + +static inline unsigned long mmap_base(void) +{ + unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return TASK_SIZE - (gap & PAGE_MASK); +} + +static inline int mmap_is_legacy(void) +{ + /* + * Force standard allocation for 64 bit programs. + */ + if (!test_thread_flag(TIF_32BIT)) + return 1; + + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + + if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) + return 1; + + return sysctl_legacy_va_layout; +} + +/* + * This function, called very early during the creation of a new + * process VM image, sets up which VM layout function to use: + */ +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + /* + * Fall back to the standard layout if the personality + * bit is set, or if the expected stack growth is unlimited: + */ + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c new file mode 100644 index 00000000000..ea862ec643d --- /dev/null +++ b/arch/ppc64/mm/numa.c @@ -0,0 +1,734 @@ +/* + * pSeries NUMA support + * + * Copyright (C) 2002 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int numa_enabled = 1; + +static int numa_debug; +#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } + +#ifdef DEBUG_NUMA +#define ARRAY_INITIALISER -1 +#else +#define ARRAY_INITIALISER 0 +#endif + +int numa_cpu_lookup_table[NR_CPUS] = { [ 0 ... (NR_CPUS - 1)] = + ARRAY_INITIALISER}; +char *numa_memory_lookup_table; +cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; +int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0}; + +struct pglist_data *node_data[MAX_NUMNODES]; +bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; +static int min_common_depth; + +/* + * We need somewhere to store start/span for each node until we have + * allocated the real node_data structures. + */ +static struct { + unsigned long node_start_pfn; + unsigned long node_end_pfn; + unsigned long node_present_pages; +} init_node_data[MAX_NUMNODES] __initdata; + +EXPORT_SYMBOL(node_data); +EXPORT_SYMBOL(numa_cpu_lookup_table); +EXPORT_SYMBOL(numa_memory_lookup_table); +EXPORT_SYMBOL(numa_cpumask_lookup_table); +EXPORT_SYMBOL(nr_cpus_in_node); + +static inline void map_cpu_to_node(int cpu, int node) +{ + numa_cpu_lookup_table[cpu] = node; + if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) { + cpu_set(cpu, numa_cpumask_lookup_table[node]); + nr_cpus_in_node[node]++; + } +} + +#ifdef CONFIG_HOTPLUG_CPU +static void unmap_cpu_from_node(unsigned long cpu) +{ + int node = numa_cpu_lookup_table[cpu]; + + dbg("removing cpu %lu from node %d\n", cpu, node); + + if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { + cpu_clear(cpu, numa_cpumask_lookup_table[node]); + nr_cpus_in_node[node]--; + } else { + printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", + cpu, node); + } +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static struct device_node * __devinit find_cpu_node(unsigned int cpu) +{ + unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); + struct device_node *cpu_node = NULL; + unsigned int *interrupt_server, *reg; + int len; + + while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { + /* Try interrupt server first */ + interrupt_server = (unsigned int *)get_property(cpu_node, + "ibm,ppc-interrupt-server#s", &len); + + len = len / sizeof(u32); + + if (interrupt_server && (len > 0)) { + while (len--) { + if (interrupt_server[len] == hw_cpuid) + return cpu_node; + } + } else { + reg = (unsigned int *)get_property(cpu_node, + "reg", &len); + if (reg && (len > 0) && (reg[0] == hw_cpuid)) + return cpu_node; + } + } + + return NULL; +} + +/* must hold reference to node during call */ +static int *of_get_associativity(struct device_node *dev) +{ + return (unsigned int *)get_property(dev, "ibm,associativity", NULL); +} + +static int of_node_numa_domain(struct device_node *device) +{ + int numa_domain; + unsigned int *tmp; + + if (min_common_depth == -1) + return 0; + + tmp = of_get_associativity(device); + if (tmp && (tmp[0] >= min_common_depth)) { + numa_domain = tmp[min_common_depth]; + } else { + dbg("WARNING: no NUMA information for %s\n", + device->full_name); + numa_domain = 0; + } + return numa_domain; +} + +/* + * In theory, the "ibm,associativity" property may contain multiple + * associativity lists because a resource may be multiply connected + * into the machine. This resource then has different associativity + * characteristics relative to its multiple connections. We ignore + * this for now. We also assume that all cpu and memory sets have + * their distances represented at a common level. This won't be + * true for heirarchical NUMA. + * + * In any case the ibm,associativity-reference-points should give + * the correct depth for a normal NUMA system. + * + * - Dave Hansen + */ +static int __init find_min_common_depth(void) +{ + int depth; + unsigned int *ref_points; + struct device_node *rtas_root; + unsigned int len; + + rtas_root = of_find_node_by_path("/rtas"); + + if (!rtas_root) + return -1; + + /* + * this property is 2 32-bit integers, each representing a level of + * depth in the associativity nodes. The first is for an SMP + * configuration (should be all 0's) and the second is for a normal + * NUMA configuration. + */ + ref_points = (unsigned int *)get_property(rtas_root, + "ibm,associativity-reference-points", &len); + + if ((len >= 1) && ref_points) { + depth = ref_points[1]; + } else { + dbg("WARNING: could not find NUMA " + "associativity reference point\n"); + depth = -1; + } + of_node_put(rtas_root); + + return depth; +} + +static int __init get_mem_addr_cells(void) +{ + struct device_node *memory = NULL; + int rc; + + memory = of_find_node_by_type(memory, "memory"); + if (!memory) + return 0; /* it won't matter */ + + rc = prom_n_addr_cells(memory); + return rc; +} + +static int __init get_mem_size_cells(void) +{ + struct device_node *memory = NULL; + int rc; + + memory = of_find_node_by_type(memory, "memory"); + if (!memory) + return 0; /* it won't matter */ + rc = prom_n_size_cells(memory); + return rc; +} + +static unsigned long read_n_cells(int n, unsigned int **buf) +{ + unsigned long result = 0; + + while (n--) { + result = (result << 32) | **buf; + (*buf)++; + } + return result; +} + +/* + * Figure out to which domain a cpu belongs and stick it there. + * Return the id of the domain used. + */ +static int numa_setup_cpu(unsigned long lcpu) +{ + int numa_domain = 0; + struct device_node *cpu = find_cpu_node(lcpu); + + if (!cpu) { + WARN_ON(1); + goto out; + } + + numa_domain = of_node_numa_domain(cpu); + + if (numa_domain >= num_online_nodes()) { + /* + * POWER4 LPAR uses 0xffff as invalid node, + * dont warn in this case. + */ + if (numa_domain != 0xffff) + printk(KERN_ERR "WARNING: cpu %ld " + "maps to invalid NUMA node %d\n", + lcpu, numa_domain); + numa_domain = 0; + } +out: + node_set_online(numa_domain); + + map_cpu_to_node(lcpu, numa_domain); + + of_node_put(cpu); + + return numa_domain; +} + +static int cpu_numa_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned long lcpu = (unsigned long)hcpu; + int ret = NOTIFY_DONE; + + switch (action) { + case CPU_UP_PREPARE: + if (min_common_depth == -1 || !numa_enabled) + map_cpu_to_node(lcpu, 0); + else + numa_setup_cpu(lcpu); + ret = NOTIFY_OK; + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + case CPU_UP_CANCELED: + unmap_cpu_from_node(lcpu); + break; + ret = NOTIFY_OK; +#endif + } + return ret; +} + +/* + * Check and possibly modify a memory region to enforce the memory limit. + * + * Returns the size the region should have to enforce the memory limit. + * This will either be the original value of size, a truncated value, + * or zero. If the returned value of size is 0 the region should be + * discarded as it lies wholy above the memory limit. + */ +static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsigned long size) +{ + /* + * We use lmb_end_of_DRAM() in here instead of memory_limit because + * we've already adjusted it for the limit and it takes care of + * having memory holes below the limit. + */ + extern unsigned long memory_limit; + + if (! memory_limit) + return size; + + if (start + size <= lmb_end_of_DRAM()) + return size; + + if (start >= lmb_end_of_DRAM()) + return 0; + + return lmb_end_of_DRAM() - start; +} + +static int __init parse_numa_properties(void) +{ + struct device_node *cpu = NULL; + struct device_node *memory = NULL; + int addr_cells, size_cells; + int max_domain = 0; + long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT; + unsigned long i; + + if (numa_enabled == 0) { + printk(KERN_WARNING "NUMA disabled by user\n"); + return -1; + } + + numa_memory_lookup_table = + (char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1)); + memset(numa_memory_lookup_table, 0, entries * sizeof(char)); + + for (i = 0; i < entries ; i++) + numa_memory_lookup_table[i] = ARRAY_INITIALISER; + + min_common_depth = find_min_common_depth(); + + dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); + if (min_common_depth < 0) + return min_common_depth; + + max_domain = numa_setup_cpu(boot_cpuid); + + /* + * Even though we connect cpus to numa domains later in SMP init, + * we need to know the maximum node id now. This is because each + * node id must have NODE_DATA etc backing it. + * As a result of hotplug we could still have cpus appear later on + * with larger node ids. In that case we force the cpu into node 0. + */ + for_each_cpu(i) { + int numa_domain; + + cpu = find_cpu_node(i); + + if (cpu) { + numa_domain = of_node_numa_domain(cpu); + of_node_put(cpu); + + if (numa_domain < MAX_NUMNODES && + max_domain < numa_domain) + max_domain = numa_domain; + } + } + + addr_cells = get_mem_addr_cells(); + size_cells = get_mem_size_cells(); + memory = NULL; + while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { + unsigned long start; + unsigned long size; + int numa_domain; + int ranges; + unsigned int *memcell_buf; + unsigned int len; + + memcell_buf = (unsigned int *)get_property(memory, "reg", &len); + if (!memcell_buf || len <= 0) + continue; + + ranges = memory->n_addrs; +new_range: + /* these are order-sensitive, and modify the buffer pointer */ + start = read_n_cells(addr_cells, &memcell_buf); + size = read_n_cells(size_cells, &memcell_buf); + + start = _ALIGN_DOWN(start, MEMORY_INCREMENT); + size = _ALIGN_UP(size, MEMORY_INCREMENT); + + numa_domain = of_node_numa_domain(memory); + + if (numa_domain >= MAX_NUMNODES) { + if (numa_domain != 0xffff) + printk(KERN_ERR "WARNING: memory at %lx maps " + "to invalid NUMA node %d\n", start, + numa_domain); + numa_domain = 0; + } + + if (max_domain < numa_domain) + max_domain = numa_domain; + + if (! (size = numa_enforce_memory_limit(start, size))) { + if (--ranges) + goto new_range; + else + continue; + } + + /* + * Initialize new node struct, or add to an existing one. + */ + if (init_node_data[numa_domain].node_end_pfn) { + if ((start / PAGE_SIZE) < + init_node_data[numa_domain].node_start_pfn) + init_node_data[numa_domain].node_start_pfn = + start / PAGE_SIZE; + if (((start / PAGE_SIZE) + (size / PAGE_SIZE)) > + init_node_data[numa_domain].node_end_pfn) + init_node_data[numa_domain].node_end_pfn = + (start / PAGE_SIZE) + + (size / PAGE_SIZE); + + init_node_data[numa_domain].node_present_pages += + size / PAGE_SIZE; + } else { + node_set_online(numa_domain); + + init_node_data[numa_domain].node_start_pfn = + start / PAGE_SIZE; + init_node_data[numa_domain].node_end_pfn = + init_node_data[numa_domain].node_start_pfn + + size / PAGE_SIZE; + init_node_data[numa_domain].node_present_pages = + size / PAGE_SIZE; + } + + for (i = start ; i < (start+size); i += MEMORY_INCREMENT) + numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = + numa_domain; + + if (--ranges) + goto new_range; + } + + for (i = 0; i <= max_domain; i++) + node_set_online(i); + + return 0; +} + +static void __init setup_nonnuma(void) +{ + unsigned long top_of_ram = lmb_end_of_DRAM(); + unsigned long total_ram = lmb_phys_mem_size(); + unsigned long i; + + printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", + top_of_ram, total_ram); + printk(KERN_INFO "Memory hole size: %ldMB\n", + (top_of_ram - total_ram) >> 20); + + if (!numa_memory_lookup_table) { + long entries = top_of_ram >> MEMORY_INCREMENT_SHIFT; + numa_memory_lookup_table = + (char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1)); + memset(numa_memory_lookup_table, 0, entries * sizeof(char)); + for (i = 0; i < entries ; i++) + numa_memory_lookup_table[i] = ARRAY_INITIALISER; + } + + map_cpu_to_node(boot_cpuid, 0); + + node_set_online(0); + + init_node_data[0].node_start_pfn = 0; + init_node_data[0].node_end_pfn = lmb_end_of_DRAM() / PAGE_SIZE; + init_node_data[0].node_present_pages = total_ram / PAGE_SIZE; + + for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) + numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; +} + +static void __init dump_numa_topology(void) +{ + unsigned int node; + unsigned int count; + + if (min_common_depth == -1 || !numa_enabled) + return; + + for_each_online_node(node) { + unsigned long i; + + printk(KERN_INFO "Node %d Memory:", node); + + count = 0; + + for (i = 0; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) { + if (numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] == node) { + if (count == 0) + printk(" 0x%lx", i); + ++count; + } else { + if (count > 0) + printk("-0x%lx", i); + count = 0; + } + } + + if (count > 0) + printk("-0x%lx", i); + printk("\n"); + } + return; +} + +/* + * Allocate some memory, satisfying the lmb or bootmem allocator where + * required. nid is the preferred node and end is the physical address of + * the highest address in the node. + * + * Returns the physical address of the memory. + */ +static unsigned long careful_allocation(int nid, unsigned long size, + unsigned long align, unsigned long end) +{ + unsigned long ret = lmb_alloc_base(size, align, end); + + /* retry over all memory */ + if (!ret) + ret = lmb_alloc_base(size, align, lmb_end_of_DRAM()); + + if (!ret) + panic("numa.c: cannot allocate %lu bytes on node %d", + size, nid); + + /* + * If the memory came from a previously allocated node, we must + * retry with the bootmem allocator. + */ + if (pa_to_nid(ret) < nid) { + nid = pa_to_nid(ret); + ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(nid), + size, align, 0); + + if (!ret) + panic("numa.c: cannot allocate %lu bytes on node %d", + size, nid); + + ret = virt_to_abs(ret); + + dbg("alloc_bootmem %lx %lx\n", ret, size); + } + + return ret; +} + +void __init do_init_bootmem(void) +{ + int nid; + int addr_cells, size_cells; + struct device_node *memory = NULL; + static struct notifier_block ppc64_numa_nb = { + .notifier_call = cpu_numa_callback, + .priority = 1 /* Must run before sched domains notifier. */ + }; + + min_low_pfn = 0; + max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; + max_pfn = max_low_pfn; + + if (parse_numa_properties()) + setup_nonnuma(); + else + dump_numa_topology(); + + register_cpu_notifier(&ppc64_numa_nb); + + for_each_online_node(nid) { + unsigned long start_paddr, end_paddr; + int i; + unsigned long bootmem_paddr; + unsigned long bootmap_pages; + + start_paddr = init_node_data[nid].node_start_pfn * PAGE_SIZE; + end_paddr = init_node_data[nid].node_end_pfn * PAGE_SIZE; + + /* Allocate the node structure node local if possible */ + NODE_DATA(nid) = (struct pglist_data *)careful_allocation(nid, + sizeof(struct pglist_data), + SMP_CACHE_BYTES, end_paddr); + NODE_DATA(nid) = abs_to_virt(NODE_DATA(nid)); + memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); + + dbg("node %d\n", nid); + dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); + + NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; + NODE_DATA(nid)->node_start_pfn = + init_node_data[nid].node_start_pfn; + NODE_DATA(nid)->node_spanned_pages = + end_paddr - start_paddr; + + if (NODE_DATA(nid)->node_spanned_pages == 0) + continue; + + dbg("start_paddr = %lx\n", start_paddr); + dbg("end_paddr = %lx\n", end_paddr); + + bootmap_pages = bootmem_bootmap_pages((end_paddr - start_paddr) >> PAGE_SHIFT); + + bootmem_paddr = careful_allocation(nid, + bootmap_pages << PAGE_SHIFT, + PAGE_SIZE, end_paddr); + memset(abs_to_virt(bootmem_paddr), 0, + bootmap_pages << PAGE_SHIFT); + dbg("bootmap_paddr = %lx\n", bootmem_paddr); + + init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, + start_paddr >> PAGE_SHIFT, + end_paddr >> PAGE_SHIFT); + + /* + * We need to do another scan of all memory sections to + * associate memory with the correct node. + */ + addr_cells = get_mem_addr_cells(); + size_cells = get_mem_size_cells(); + memory = NULL; + while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { + unsigned long mem_start, mem_size; + int numa_domain, ranges; + unsigned int *memcell_buf; + unsigned int len; + + memcell_buf = (unsigned int *)get_property(memory, "reg", &len); + if (!memcell_buf || len <= 0) + continue; + + ranges = memory->n_addrs; /* ranges in cell */ +new_range: + mem_start = read_n_cells(addr_cells, &memcell_buf); + mem_size = read_n_cells(size_cells, &memcell_buf); + numa_domain = numa_enabled ? of_node_numa_domain(memory) : 0; + + if (numa_domain != nid) + continue; + + mem_size = numa_enforce_memory_limit(mem_start, mem_size); + if (mem_size) { + dbg("free_bootmem %lx %lx\n", mem_start, mem_size); + free_bootmem_node(NODE_DATA(nid), mem_start, mem_size); + } + + if (--ranges) /* process all ranges in cell */ + goto new_range; + } + + /* + * Mark reserved regions on this node + */ + for (i = 0; i < lmb.reserved.cnt; i++) { + unsigned long physbase = lmb.reserved.region[i].physbase; + unsigned long size = lmb.reserved.region[i].size; + + if (pa_to_nid(physbase) != nid && + pa_to_nid(physbase+size-1) != nid) + continue; + + if (physbase < end_paddr && + (physbase+size) > start_paddr) { + /* overlaps */ + if (physbase < start_paddr) { + size -= start_paddr - physbase; + physbase = start_paddr; + } + + if (size > end_paddr - physbase) + size = end_paddr - physbase; + + dbg("reserve_bootmem %lx %lx\n", physbase, + size); + reserve_bootmem_node(NODE_DATA(nid), physbase, + size); + } + } + } +} + +void __init paging_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES]; + unsigned long zholes_size[MAX_NR_ZONES]; + int nid; + + memset(zones_size, 0, sizeof(zones_size)); + memset(zholes_size, 0, sizeof(zholes_size)); + + for_each_online_node(nid) { + unsigned long start_pfn; + unsigned long end_pfn; + + start_pfn = init_node_data[nid].node_start_pfn; + end_pfn = init_node_data[nid].node_end_pfn; + + zones_size[ZONE_DMA] = end_pfn - start_pfn; + zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - + init_node_data[nid].node_present_pages; + + dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid, + zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]); + + free_area_init_node(nid, NODE_DATA(nid), zones_size, + start_pfn, zholes_size); + } +} + +static int __init early_numa(char *p) +{ + if (!p) + return 0; + + if (strstr(p, "off")) + numa_enabled = 0; + + if (strstr(p, "debug")) + numa_debug = 1; + + return 0; +} +early_param("numa", early_numa); diff --git a/arch/ppc64/mm/slb.c b/arch/ppc64/mm/slb.c new file mode 100644 index 00000000000..6a20773f695 --- /dev/null +++ b/arch/ppc64/mm/slb.c @@ -0,0 +1,159 @@ +/* + * PowerPC64 SLB support. + * + * Copyright (C) 2004 David Gibson , IBM + * Based on earlier code writteh by: + * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com + * Copyright (c) 2001 Dave Engebretsen + * Copyright (C) 2002 Anton Blanchard , IBM + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +extern void slb_allocate(unsigned long ea); + +static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot) +{ + return (ea & ESID_MASK) | SLB_ESID_V | slot; +} + +static inline unsigned long mk_vsid_data(unsigned long ea, unsigned long flags) +{ + return (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | flags; +} + +static inline void create_slbe(unsigned long ea, unsigned long vsid, + unsigned long flags, unsigned long entry) +{ + asm volatile("slbmte %0,%1" : + : "r" (mk_vsid_data(ea, flags)), + "r" (mk_esid_data(ea, entry)) + : "memory" ); +} + +static void slb_flush_and_rebolt(void) +{ + /* If you change this make sure you change SLB_NUM_BOLTED + * appropriately too. */ + unsigned long ksp_flags = SLB_VSID_KERNEL; + unsigned long ksp_esid_data; + + WARN_ON(!irqs_disabled()); + + if (cpu_has_feature(CPU_FTR_16M_PAGE)) + ksp_flags |= SLB_VSID_L; + + ksp_esid_data = mk_esid_data(get_paca()->kstack, 2); + if ((ksp_esid_data & ESID_MASK) == KERNELBASE) + ksp_esid_data &= ~SLB_ESID_V; + + /* We need to do this all in asm, so we're sure we don't touch + * the stack between the slbia and rebolting it. */ + asm volatile("isync\n" + "slbia\n" + /* Slot 1 - first VMALLOC segment */ + "slbmte %0,%1\n" + /* Slot 2 - kernel stack */ + "slbmte %2,%3\n" + "isync" + :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)), + "r"(mk_esid_data(VMALLOCBASE, 1)), + "r"(mk_vsid_data(ksp_esid_data, ksp_flags)), + "r"(ksp_esid_data) + : "memory"); +} + +/* Flush all user entries from the segment table of the current processor. */ +void switch_slb(struct task_struct *tsk, struct mm_struct *mm) +{ + unsigned long offset = get_paca()->slb_cache_ptr; + unsigned long esid_data = 0; + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; + + if (offset <= SLB_CACHE_ENTRIES) { + int i; + asm volatile("isync" : : : "memory"); + for (i = 0; i < offset; i++) { + esid_data = (unsigned long)get_paca()->slb_cache[i] + << SID_SHIFT; + asm volatile("slbie %0" : : "r" (esid_data)); + } + asm volatile("isync" : : : "memory"); + } else { + slb_flush_and_rebolt(); + } + + /* Workaround POWER5 < DD2.1 issue */ + if (offset == 1 || offset > SLB_CACHE_ENTRIES) + asm volatile("slbie %0" : : "r" (esid_data)); + + get_paca()->slb_cache_ptr = 0; + get_paca()->context = mm->context; + + /* + * preload some userspace segments into the SLB. + */ + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; + else + unmapped_base = TASK_UNMAPPED_BASE_USER64; + + if (pc >= KERNELBASE) + return; + slb_allocate(pc); + + if (GET_ESID(pc) == GET_ESID(stack)) + return; + + if (stack >= KERNELBASE) + return; + slb_allocate(stack); + + if ((GET_ESID(pc) == GET_ESID(unmapped_base)) + || (GET_ESID(stack) == GET_ESID(unmapped_base))) + return; + + if (unmapped_base >= KERNELBASE) + return; + slb_allocate(unmapped_base); +} + +void slb_initialize(void) +{ + /* On iSeries the bolted entries have already been set up by + * the hypervisor from the lparMap data in head.S */ +#ifndef CONFIG_PPC_ISERIES + unsigned long flags = SLB_VSID_KERNEL; + + /* Invalidate the entire SLB (even slot 0) & all the ERATS */ + if (cpu_has_feature(CPU_FTR_16M_PAGE)) + flags |= SLB_VSID_L; + + asm volatile("isync":::"memory"); + asm volatile("slbmte %0,%0"::"r" (0) : "memory"); + asm volatile("isync; slbia; isync":::"memory"); + create_slbe(KERNELBASE, get_kernel_vsid(KERNELBASE), flags, 0); + create_slbe(VMALLOCBASE, get_kernel_vsid(KERNELBASE), + SLB_VSID_KERNEL, 1); + /* We don't bolt the stack for the time being - we're in boot, + * so the stack is in the bolted segment. By the time it goes + * elsewhere, we'll call _switch() which will bolt in the new + * one. */ + asm volatile("isync":::"memory"); +#endif + + get_paca()->stab_rr = SLB_NUM_BOLTED; +} diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S new file mode 100644 index 00000000000..8379d678f70 --- /dev/null +++ b/arch/ppc64/mm/slb_low.S @@ -0,0 +1,154 @@ +/* + * arch/ppc64/mm/slb_low.S + * + * Low-level SLB routines + * + * Copyright (C) 2004 David Gibson , IBM + * + * Based on earlier C version: + * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com + * Copyright (c) 2001 Dave Engebretsen + * Copyright (C) 2002 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* void slb_allocate(unsigned long ea); + * + * Create an SLB entry for the given EA (user or kernel). + * r3 = faulting address, r13 = PACA + * r9, r10, r11 are clobbered by this function + * No other registers are examined or changed. + */ +_GLOBAL(slb_allocate) + /* + * First find a slot, round robin. Previously we tried to find + * a free slot first but that took too long. Unfortunately we + * dont have any LRU information to help us choose a slot. + */ +#ifdef CONFIG_PPC_ISERIES + /* + * On iSeries, the "bolted" stack segment can be cast out on + * shared processor switch so we need to check for a miss on + * it and restore it to the right slot. + */ + ld r9,PACAKSAVE(r13) + clrrdi r9,r9,28 + clrrdi r11,r3,28 + li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */ + cmpld r9,r11 + beq 3f +#endif /* CONFIG_PPC_ISERIES */ + + ld r10,PACASTABRR(r13) + addi r10,r10,1 + /* use a cpu feature mask if we ever change our slb size */ + cmpldi r10,SLB_NUM_ENTRIES + + blt+ 4f + li r10,SLB_NUM_BOLTED + +4: + std r10,PACASTABRR(r13) +3: + /* r3 = faulting address, r10 = entry */ + + srdi r9,r3,60 /* get region */ + srdi r3,r3,28 /* get esid */ + cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */ + + rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */ + oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */ + + /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */ + + blt cr7,0f /* user or kernel? */ + + /* kernel address: proto-VSID = ESID */ + /* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but + * this code will generate the protoVSID 0xfffffffff for the + * top segment. That's ok, the scramble below will translate + * it to VSID 0, which is reserved as a bad VSID - one which + * will never have any pages in it. */ + li r11,SLB_VSID_KERNEL +BEGIN_FTR_SECTION + bne cr7,9f + li r11,(SLB_VSID_KERNEL|SLB_VSID_L) +END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) + b 9f + +0: /* user address: proto-VSID = context<<15 | ESID */ + li r11,SLB_VSID_USER + + srdi. r9,r3,13 + bne- 8f /* invalid ea bits set */ + +#ifdef CONFIG_HUGETLB_PAGE +BEGIN_FTR_SECTION + /* check against the hugepage ranges */ + cmpldi r3,(TASK_HPAGE_END>>SID_SHIFT) + bge 6f /* >= TASK_HPAGE_END */ + cmpldi r3,(TASK_HPAGE_BASE>>SID_SHIFT) + bge 5f /* TASK_HPAGE_BASE..TASK_HPAGE_END */ + cmpldi r3,16 + bge 6f /* 4GB..TASK_HPAGE_BASE */ + + lhz r9,PACAHTLBSEGS(r13) + srd r9,r9,r3 + andi. r9,r9,1 + beq 6f + +5: /* this is a hugepage user address */ + li r11,(SLB_VSID_USER|SLB_VSID_L) +END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) +#endif /* CONFIG_HUGETLB_PAGE */ + +6: ld r9,PACACONTEXTID(r13) + rldimi r3,r9,USER_ESID_BITS,0 + +9: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */ + ASM_VSID_SCRAMBLE(r3,r9) + + rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */ + + /* + * No need for an isync before or after this slbmte. The exception + * we enter with and the rfid we exit with are context synchronizing. + */ + slbmte r11,r10 + + bgelr cr7 /* we're done for kernel addresses */ + + /* Update the slb cache */ + lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ + cmpldi r3,SLB_CACHE_ENTRIES + bge 1f + + /* still room in the slb cache */ + sldi r11,r3,1 /* r11 = offset * sizeof(u16) */ + rldicl r10,r10,36,28 /* get low 16 bits of the ESID */ + add r11,r11,r13 /* r11 = (u16 *)paca + offset */ + sth r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ + addi r3,r3,1 /* offset++ */ + b 2f +1: /* offset >= SLB_CACHE_ENTRIES */ + li r3,SLB_CACHE_ENTRIES+1 +2: + sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ + blr + +8: /* invalid EA */ + li r3,0 /* BAD_VSID */ + li r11,SLB_VSID_USER /* flags don't much matter */ + b 9b diff --git a/arch/ppc64/mm/stab.c b/arch/ppc64/mm/stab.c new file mode 100644 index 00000000000..31491131d5e --- /dev/null +++ b/arch/ppc64/mm/stab.c @@ -0,0 +1,239 @@ +/* + * PowerPC64 Segment Translation Support. + * + * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com + * Copyright (c) 2001 Dave Engebretsen + * + * Copyright (C) 2002 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +/* Both the segment table and SLB code uses the following cache */ +#define NR_STAB_CACHE_ENTRIES 8 +DEFINE_PER_CPU(long, stab_cache_ptr); +DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); + +/* + * Create a segment table entry for the given esid/vsid pair. + */ +static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) +{ + unsigned long esid_data, vsid_data; + unsigned long entry, group, old_esid, castout_entry, i; + unsigned int global_entry; + struct stab_entry *ste, *castout_ste; + unsigned long kernel_segment = (esid << SID_SHIFT) >= KERNELBASE; + + vsid_data = vsid << STE_VSID_SHIFT; + esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V; + if (! kernel_segment) + esid_data |= STE_ESID_KS; + + /* Search the primary group first. */ + global_entry = (esid & 0x1f) << 3; + ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); + + /* Find an empty entry, if one exists. */ + for (group = 0; group < 2; group++) { + for (entry = 0; entry < 8; entry++, ste++) { + if (!(ste->esid_data & STE_ESID_V)) { + ste->vsid_data = vsid_data; + asm volatile("eieio":::"memory"); + ste->esid_data = esid_data; + return (global_entry | entry); + } + } + /* Now search the secondary group. */ + global_entry = ((~esid) & 0x1f) << 3; + ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); + } + + /* + * Could not find empty entry, pick one with a round robin selection. + * Search all entries in the two groups. + */ + castout_entry = get_paca()->stab_rr; + for (i = 0; i < 16; i++) { + if (castout_entry < 8) { + global_entry = (esid & 0x1f) << 3; + ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); + castout_ste = ste + castout_entry; + } else { + global_entry = ((~esid) & 0x1f) << 3; + ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); + castout_ste = ste + (castout_entry - 8); + } + + /* Dont cast out the first kernel segment */ + if ((castout_ste->esid_data & ESID_MASK) != KERNELBASE) + break; + + castout_entry = (castout_entry + 1) & 0xf; + } + + get_paca()->stab_rr = (castout_entry + 1) & 0xf; + + /* Modify the old entry to the new value. */ + + /* Force previous translations to complete. DRENG */ + asm volatile("isync" : : : "memory"); + + old_esid = castout_ste->esid_data >> SID_SHIFT; + castout_ste->esid_data = 0; /* Invalidate old entry */ + + asm volatile("sync" : : : "memory"); /* Order update */ + + castout_ste->vsid_data = vsid_data; + asm volatile("eieio" : : : "memory"); /* Order update */ + castout_ste->esid_data = esid_data; + + asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); + /* Ensure completion of slbie */ + asm volatile("sync" : : : "memory"); + + return (global_entry | (castout_entry & 0x7)); +} + +/* + * Allocate a segment table entry for the given ea and mm + */ +static int __ste_allocate(unsigned long ea, struct mm_struct *mm) +{ + unsigned long vsid; + unsigned char stab_entry; + unsigned long offset; + + /* Kernel or user address? */ + if (ea >= KERNELBASE) { + vsid = get_kernel_vsid(ea); + } else { + if ((ea >= TASK_SIZE_USER64) || (! mm)) + return 1; + + vsid = get_vsid(mm->context.id, ea); + } + + stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid); + + if (ea < KERNELBASE) { + offset = __get_cpu_var(stab_cache_ptr); + if (offset < NR_STAB_CACHE_ENTRIES) + __get_cpu_var(stab_cache[offset++]) = stab_entry; + else + offset = NR_STAB_CACHE_ENTRIES+1; + __get_cpu_var(stab_cache_ptr) = offset; + + /* Order update */ + asm volatile("sync":::"memory"); + } + + return 0; +} + +int ste_allocate(unsigned long ea) +{ + return __ste_allocate(ea, current->mm); +} + +/* + * Do the segment table work for a context switch: flush all user + * entries from the table, then preload some probably useful entries + * for the new task + */ +void switch_stab(struct task_struct *tsk, struct mm_struct *mm) +{ + struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; + struct stab_entry *ste; + unsigned long offset = __get_cpu_var(stab_cache_ptr); + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; + + /* Force previous translations to complete. DRENG */ + asm volatile("isync" : : : "memory"); + + if (offset <= NR_STAB_CACHE_ENTRIES) { + int i; + + for (i = 0; i < offset; i++) { + ste = stab + __get_cpu_var(stab_cache[i]); + ste->esid_data = 0; /* invalidate entry */ + } + } else { + unsigned long entry; + + /* Invalidate all entries. */ + ste = stab; + + /* Never flush the first entry. */ + ste += 1; + for (entry = 1; + entry < (PAGE_SIZE / sizeof(struct stab_entry)); + entry++, ste++) { + unsigned long ea; + ea = ste->esid_data & ESID_MASK; + if (ea < KERNELBASE) { + ste->esid_data = 0; + } + } + } + + asm volatile("sync; slbia; sync":::"memory"); + + __get_cpu_var(stab_cache_ptr) = 0; + + /* Now preload some entries for the new task */ + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; + else + unmapped_base = TASK_UNMAPPED_BASE_USER64; + + __ste_allocate(pc, mm); + + if (GET_ESID(pc) == GET_ESID(stack)) + return; + + __ste_allocate(stack, mm); + + if ((GET_ESID(pc) == GET_ESID(unmapped_base)) + || (GET_ESID(stack) == GET_ESID(unmapped_base))) + return; + + __ste_allocate(unmapped_base, mm); + + /* Order update */ + asm volatile("sync" : : : "memory"); +} + +extern void slb_initialize(void); + +/* + * Build an entry for the base kernel segment and put it into + * the segment table or SLB. All other segment table or SLB + * entries are faulted in. + */ +void stab_initialize(unsigned long stab) +{ + unsigned long vsid = get_kernel_vsid(KERNELBASE); + + if (cpu_has_feature(CPU_FTR_SLB)) { + slb_initialize(); + } else { + asm volatile("isync; slbia; isync":::"memory"); + make_ste(stab, GET_ESID(KERNELBASE), vsid); + + /* Order update */ + asm volatile("sync":::"memory"); + } +} diff --git a/arch/ppc64/mm/tlb.c b/arch/ppc64/mm/tlb.c new file mode 100644 index 00000000000..26f0172c452 --- /dev/null +++ b/arch/ppc64/mm/tlb.c @@ -0,0 +1,180 @@ +/* + * This file contains the routines for flushing entries from the + * TLB and MMU hash table. + * + * Derived from arch/ppc64/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); + +/* This is declared as we are using the more or less generic + * include/asm-ppc64/tlb.h file -- tgall + */ +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); +unsigned long pte_freelist_forced_free; + +void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) +{ + /* This is safe as we are holding page_table_lock */ + cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); + struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + + if (atomic_read(&tlb->mm->mm_users) < 2 || + cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { + pte_free(ptepage); + return; + } + + if (*batchp == NULL) { + *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); + if (*batchp == NULL) { + pte_free_now(ptepage); + return; + } + (*batchp)->index = 0; + } + (*batchp)->pages[(*batchp)->index++] = ptepage; + if ((*batchp)->index == PTE_FREELIST_SIZE) { + pte_free_submit(*batchp); + *batchp = NULL; + } +} + +/* + * Update the MMU hash table to correspond with a change to + * a Linux PTE. If wrprot is true, it is permissible to + * change the existing HPTE to read-only rather than removing it + * (if we remove it we should clear the _PTE_HPTEFLAGS bits). + */ +void hpte_update(struct mm_struct *mm, unsigned long addr, + unsigned long pte, int wrprot) +{ + int i; + unsigned long context = 0; + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + + if (REGION_ID(addr) == USER_REGION_ID) + context = mm->context.id; + i = batch->index; + + /* + * This can happen when we are in the middle of a TLB batch and + * we encounter memory pressure (eg copy_page_range when it tries + * to allocate a new pte). If we have to reclaim memory and end + * up scanning and resetting referenced bits then our batch context + * will change mid stream. + */ + if (unlikely(i != 0 && context != batch->context)) { + flush_tlb_pending(); + i = 0; + } + + if (i == 0) { + batch->context = context; + batch->mm = mm; + } + batch->pte[i] = __pte(pte); + batch->addr[i] = addr; + batch->index = ++i; + if (i >= PPC64_TLB_BATCH_NR) + flush_tlb_pending(); +} + +void __flush_tlb_pending(struct ppc64_tlb_batch *batch) +{ + int i; + int cpu; + cpumask_t tmp; + int local = 0; + + BUG_ON(in_interrupt()); + + cpu = get_cpu(); + i = batch->index; + tmp = cpumask_of_cpu(cpu); + if (cpus_equal(batch->mm->cpu_vm_mask, tmp)) + local = 1; + + if (i == 1) + flush_hash_page(batch->context, batch->addr[0], batch->pte[0], + local); + else + flush_hash_range(batch->context, i, local); + batch->index = 0; + put_cpu(); +} + +#ifdef CONFIG_SMP +static void pte_free_smp_sync(void *arg) +{ + /* Do nothing, just ensure we sync with all CPUs */ +} +#endif + +/* This is only called when we are critically out of memory + * (and fail to get a page in pte_free_tlb). + */ +void pte_free_now(struct page *ptepage) +{ + pte_freelist_forced_free++; + + smp_call_function(pte_free_smp_sync, NULL, 0, 1); + + pte_free(ptepage); +} + +static void pte_free_rcu_callback(struct rcu_head *head) +{ + struct pte_freelist_batch *batch = + container_of(head, struct pte_freelist_batch, rcu); + unsigned int i; + + for (i = 0; i < batch->index; i++) + pte_free(batch->pages[i]); + free_page((unsigned long)batch); +} + +void pte_free_submit(struct pte_freelist_batch *batch) +{ + INIT_RCU_HEAD(&batch->rcu); + call_rcu(&batch->rcu, pte_free_rcu_callback); +} + +void pte_free_finish(void) +{ + /* This is safe as we are holding page_table_lock */ + struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + + if (*batchp == NULL) + return; + pte_free_submit(*batchp); + *batchp = NULL; +} diff --git a/arch/ppc64/oprofile/Kconfig b/arch/ppc64/oprofile/Kconfig new file mode 100644 index 00000000000..5ade19801b9 --- /dev/null +++ b/arch/ppc64/oprofile/Kconfig @@ -0,0 +1,23 @@ + +menu "Profiling support" + depends on EXPERIMENTAL + +config PROFILING + bool "Profiling support (EXPERIMENTAL)" + help + Say Y here to enable the extended profiling support mechanisms used + by profilers such as OProfile. + + +config OPROFILE + tristate "OProfile system profiling (EXPERIMENTAL)" + depends on PROFILING + help + OProfile is a profiling system capable of profiling the + whole system, include the kernel, kernel modules, libraries, + and applications. + + If unsure, say N. + +endmenu + diff --git a/arch/ppc64/oprofile/Makefile b/arch/ppc64/oprofile/Makefile new file mode 100644 index 00000000000..162dbf06c14 --- /dev/null +++ b/arch/ppc64/oprofile/Makefile @@ -0,0 +1,9 @@ +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +oprofile-y := $(DRIVER_OBJS) common.o op_model_rs64.o op_model_power4.o diff --git a/arch/ppc64/oprofile/common.c b/arch/ppc64/oprofile/common.c new file mode 100644 index 00000000000..b28bfda23d9 --- /dev/null +++ b/arch/ppc64/oprofile/common.c @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2004 Anton Blanchard , IBM + * + * Based on alpha version. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "op_impl.h" + +extern struct op_ppc64_model op_model_rs64; +extern struct op_ppc64_model op_model_power4; +static struct op_ppc64_model *model; + +static struct op_counter_config ctr[OP_MAX_COUNTER]; +static struct op_system_config sys; + +static void op_handle_interrupt(struct pt_regs *regs) +{ + model->handle_interrupt(regs, ctr); +} + +static int op_ppc64_setup(void) +{ + int err; + + /* Grab the hardware */ + err = reserve_pmc_hardware(op_handle_interrupt); + if (err) + return err; + + /* Pre-compute the values to stuff in the hardware registers. */ + model->reg_setup(ctr, &sys, model->num_counters); + + /* Configure the registers on all cpus. */ + on_each_cpu(model->cpu_setup, NULL, 0, 1); + + return 0; +} + +static void op_ppc64_shutdown(void) +{ + release_pmc_hardware(); +} + +static void op_ppc64_cpu_start(void *dummy) +{ + model->start(ctr); +} + +static int op_ppc64_start(void) +{ + on_each_cpu(op_ppc64_cpu_start, NULL, 0, 1); + return 0; +} + +static inline void op_ppc64_cpu_stop(void *dummy) +{ + model->stop(); +} + +static void op_ppc64_stop(void) +{ + on_each_cpu(op_ppc64_cpu_stop, NULL, 0, 1); +} + +static int op_ppc64_create_files(struct super_block *sb, struct dentry *root) +{ + int i; + + /* + * There is one mmcr0, mmcr1 and mmcra for setting the events for + * all of the counters. + */ + oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0); + oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1); + oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra); + + for (i = 0; i < model->num_counters; ++i) { + struct dentry *dir; + char buf[3]; + + snprintf(buf, sizeof buf, "%d", i); + dir = oprofilefs_mkdir(sb, root, buf); + + oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled); + oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event); + oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count); + /* + * We dont support per counter user/kernel selection, but + * we leave the entries because userspace expects them + */ + oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel); + oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user); + oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask); + } + + oprofilefs_create_ulong(sb, root, "enable_kernel", &sys.enable_kernel); + oprofilefs_create_ulong(sb, root, "enable_user", &sys.enable_user); + oprofilefs_create_ulong(sb, root, "backtrace_spinlocks", + &sys.backtrace_spinlocks); + + /* Default to tracing both kernel and user */ + sys.enable_kernel = 1; + sys.enable_user = 1; + + /* Turn on backtracing through spinlocks by default */ + sys.backtrace_spinlocks = 1; + + return 0; +} + +int __init oprofile_arch_init(struct oprofile_operations *ops) +{ + unsigned int pvr; + + pvr = mfspr(SPRN_PVR); + + switch (PVR_VER(pvr)) { + case PV_630: + case PV_630p: + model = &op_model_rs64; + model->num_counters = 8; + ops->cpu_type = "ppc64/power3"; + break; + + case PV_NORTHSTAR: + case PV_PULSAR: + case PV_ICESTAR: + case PV_SSTAR: + model = &op_model_rs64; + model->num_counters = 8; + ops->cpu_type = "ppc64/rs64"; + break; + + case PV_POWER4: + case PV_POWER4p: + model = &op_model_power4; + model->num_counters = 8; + ops->cpu_type = "ppc64/power4"; + break; + + case PV_970: + case PV_970FX: + model = &op_model_power4; + model->num_counters = 8; + ops->cpu_type = "ppc64/970"; + break; + + case PV_POWER5: + case PV_POWER5p: + model = &op_model_power4; + model->num_counters = 6; + ops->cpu_type = "ppc64/power5"; + break; + + default: + return -ENODEV; + } + + ops->create_files = op_ppc64_create_files; + ops->setup = op_ppc64_setup; + ops->shutdown = op_ppc64_shutdown; + ops->start = op_ppc64_start; + ops->stop = op_ppc64_stop; + + printk(KERN_INFO "oprofile: using %s performance monitoring.\n", + ops->cpu_type); + + return 0; +} + +void oprofile_arch_exit(void) +{ +} diff --git a/arch/ppc64/oprofile/op_impl.h b/arch/ppc64/oprofile/op_impl.h new file mode 100644 index 00000000000..7fa7eaabc03 --- /dev/null +++ b/arch/ppc64/oprofile/op_impl.h @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2004 Anton Blanchard , IBM + * + * Based on alpha version. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef OP_IMPL_H +#define OP_IMPL_H 1 + +#define OP_MAX_COUNTER 8 + +/* Per-counter configuration as set via oprofilefs. */ +struct op_counter_config { + unsigned long valid; + unsigned long enabled; + unsigned long event; + unsigned long count; + unsigned long kernel; + /* We dont support per counter user/kernel selection */ + unsigned long user; + unsigned long unit_mask; +}; + +/* System-wide configuration as set via oprofilefs. */ +struct op_system_config { + unsigned long mmcr0; + unsigned long mmcr1; + unsigned long mmcra; + unsigned long enable_kernel; + unsigned long enable_user; + unsigned long backtrace_spinlocks; +}; + +/* Per-arch configuration */ +struct op_ppc64_model { + void (*reg_setup) (struct op_counter_config *, + struct op_system_config *, + int num_counters); + void (*cpu_setup) (void *); + void (*start) (struct op_counter_config *); + void (*stop) (void); + void (*handle_interrupt) (struct pt_regs *, + struct op_counter_config *); + int num_counters; +}; + +static inline unsigned int ctr_read(unsigned int i) +{ + switch(i) { + case 0: + return mfspr(SPRN_PMC1); + case 1: + return mfspr(SPRN_PMC2); + case 2: + return mfspr(SPRN_PMC3); + case 3: + return mfspr(SPRN_PMC4); + case 4: + return mfspr(SPRN_PMC5); + case 5: + return mfspr(SPRN_PMC6); + case 6: + return mfspr(SPRN_PMC7); + case 7: + return mfspr(SPRN_PMC8); + default: + return 0; + } +} + +static inline void ctr_write(unsigned int i, unsigned int val) +{ + switch(i) { + case 0: + mtspr(SPRN_PMC1, val); + break; + case 1: + mtspr(SPRN_PMC2, val); + break; + case 2: + mtspr(SPRN_PMC3, val); + break; + case 3: + mtspr(SPRN_PMC4, val); + break; + case 4: + mtspr(SPRN_PMC5, val); + break; + case 5: + mtspr(SPRN_PMC6, val); + break; + case 6: + mtspr(SPRN_PMC7, val); + break; + case 7: + mtspr(SPRN_PMC8, val); + break; + default: + break; + } +} + +#endif diff --git a/arch/ppc64/oprofile/op_model_power4.c b/arch/ppc64/oprofile/op_model_power4.c new file mode 100644 index 00000000000..3d103d66870 --- /dev/null +++ b/arch/ppc64/oprofile/op_model_power4.c @@ -0,0 +1,313 @@ +/* + * Copyright (C) 2004 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define dbg(args...) + +#include "op_impl.h" + +static unsigned long reset_value[OP_MAX_COUNTER]; + +static int num_counters; +static int oprofile_running; +static int mmcra_has_sihv; + +/* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */ +static u32 mmcr0_val; +static u64 mmcr1_val; +static u32 mmcra_val; + +/* + * Since we do not have an NMI, backtracing through spinlocks is + * only a best guess. In light of this, allow it to be disabled at + * runtime. + */ +static int backtrace_spinlocks; + +static void power4_reg_setup(struct op_counter_config *ctr, + struct op_system_config *sys, + int num_ctrs) +{ + int i; + + num_counters = num_ctrs; + + /* + * SIHV / SIPR bits are only implemented on POWER4+ (GQ) and above. + * However we disable it on all POWER4 until we verify it works + * (I was seeing some strange behaviour last time I tried). + * + * It has been verified to work on POWER5 so we enable it there. + */ + if (cpu_has_feature(CPU_FTR_MMCRA_SIHV)) + mmcra_has_sihv = 1; + + /* + * The performance counter event settings are given in the mmcr0, + * mmcr1 and mmcra values passed from the user in the + * op_system_config structure (sys variable). + */ + mmcr0_val = sys->mmcr0; + mmcr1_val = sys->mmcr1; + mmcra_val = sys->mmcra; + + backtrace_spinlocks = sys->backtrace_spinlocks; + + for (i = 0; i < num_counters; ++i) + reset_value[i] = 0x80000000UL - ctr[i].count; + + /* setup user and kernel profiling */ + if (sys->enable_kernel) + mmcr0_val &= ~MMCR0_KERNEL_DISABLE; + else + mmcr0_val |= MMCR0_KERNEL_DISABLE; + + if (sys->enable_user) + mmcr0_val &= ~MMCR0_PROBLEM_DISABLE; + else + mmcr0_val |= MMCR0_PROBLEM_DISABLE; +} + +extern void ppc64_enable_pmcs(void); + +static void power4_cpu_setup(void *unused) +{ + unsigned int mmcr0 = mmcr0_val; + unsigned long mmcra = mmcra_val; + + ppc64_enable_pmcs(); + + /* set the freeze bit */ + mmcr0 |= MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); + + mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE; + mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE; + mtspr(SPRN_MMCR0, mmcr0); + + mtspr(SPRN_MMCR1, mmcr1_val); + + mmcra |= MMCRA_SAMPLE_ENABLE; + mtspr(SPRN_MMCRA, mmcra); + + dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(), + mfspr(SPRN_MMCR0)); + dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), + mfspr(SPRN_MMCR1)); + dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(), + mfspr(SPRN_MMCRA)); +} + +static void power4_start(struct op_counter_config *ctr) +{ + int i; + unsigned int mmcr0; + + /* set the PMM bit (see comment below) */ + mtmsrd(mfmsr() | MSR_PMM); + + for (i = 0; i < num_counters; ++i) { + if (ctr[i].enabled) { + ctr_write(i, reset_value[i]); + } else { + ctr_write(i, 0); + } + } + + mmcr0 = mfspr(SPRN_MMCR0); + + /* + * We must clear the PMAO bit on some (GQ) chips. Just do it + * all the time + */ + mmcr0 &= ~MMCR0_PMAO; + + /* + * now clear the freeze bit, counting will not start until we + * rfid from this excetion, because only at that point will + * the PMM bit be cleared + */ + mmcr0 &= ~MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); + + oprofile_running = 1; + + dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); +} + +static void power4_stop(void) +{ + unsigned int mmcr0; + + /* freeze counters */ + mmcr0 = mfspr(SPRN_MMCR0); + mmcr0 |= MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); + + oprofile_running = 0; + + dbg("stop on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); + + mb(); +} + +/* Fake functions used by canonicalize_pc */ +static void __attribute_used__ hypervisor_bucket(void) +{ +} + +static void __attribute_used__ rtas_bucket(void) +{ +} + +static void __attribute_used__ kernel_unknown_bucket(void) +{ +} + +static unsigned long check_spinlock_pc(struct pt_regs *regs, + unsigned long profile_pc) +{ + unsigned long pc = instruction_pointer(regs); + + /* + * If both the SIAR (sampled instruction) and the perfmon exception + * occurred in a spinlock region then we account the sample to the + * calling function. This isnt 100% correct, we really need soft + * IRQ disable so we always get the perfmon exception at the + * point at which the SIAR is set. + */ + if (backtrace_spinlocks && in_lock_functions(pc) && + in_lock_functions(profile_pc)) + return regs->link; + else + return profile_pc; +} + +/* + * On GQ and newer the MMCRA stores the HV and PR bits at the time + * the SIAR was sampled. We use that to work out if the SIAR was sampled in + * the hypervisor, our exception vectors or RTAS. + */ +static unsigned long get_pc(struct pt_regs *regs) +{ + unsigned long pc = mfspr(SPRN_SIAR); + unsigned long mmcra; + + /* Cant do much about it */ + if (!mmcra_has_sihv) + return check_spinlock_pc(regs, pc); + + mmcra = mfspr(SPRN_MMCRA); + + /* Were we in the hypervisor? */ + if ((systemcfg->platform == PLATFORM_PSERIES_LPAR) && + (mmcra & MMCRA_SIHV)) + /* function descriptor madness */ + return *((unsigned long *)hypervisor_bucket); + + /* We were in userspace, nothing to do */ + if (mmcra & MMCRA_SIPR) + return pc; + +#ifdef CONFIG_PPC_RTAS + /* Were we in RTAS? */ + if (pc >= rtas.base && pc < (rtas.base + rtas.size)) + /* function descriptor madness */ + return *((unsigned long *)rtas_bucket); +#endif + + /* Were we in our exception vectors or SLB real mode miss handler? */ + if (pc < 0x1000000UL) + return (unsigned long)__va(pc); + + /* Not sure where we were */ + if (pc < KERNELBASE) + /* function descriptor madness */ + return *((unsigned long *)kernel_unknown_bucket); + + return check_spinlock_pc(regs, pc); +} + +static int get_kernel(unsigned long pc) +{ + int is_kernel; + + if (!mmcra_has_sihv) { + is_kernel = (pc >= KERNELBASE); + } else { + unsigned long mmcra = mfspr(SPRN_MMCRA); + is_kernel = ((mmcra & MMCRA_SIPR) == 0); + } + + return is_kernel; +} + +static void power4_handle_interrupt(struct pt_regs *regs, + struct op_counter_config *ctr) +{ + unsigned long pc; + int is_kernel; + int val; + int i; + unsigned int mmcr0; + + pc = get_pc(regs); + is_kernel = get_kernel(pc); + + /* set the PMM bit (see comment below) */ + mtmsrd(mfmsr() | MSR_PMM); + + for (i = 0; i < num_counters; ++i) { + val = ctr_read(i); + if (val < 0) { + if (oprofile_running && ctr[i].enabled) { + oprofile_add_pc(pc, is_kernel, i); + ctr_write(i, reset_value[i]); + } else { + ctr_write(i, 0); + } + } + } + + mmcr0 = mfspr(SPRN_MMCR0); + + /* reset the perfmon trigger */ + mmcr0 |= MMCR0_PMXE; + + /* + * We must clear the PMAO bit on some (GQ) chips. Just do it + * all the time + */ + mmcr0 &= ~MMCR0_PMAO; + + /* + * now clear the freeze bit, counting will not start until we + * rfid from this exception, because only at that point will + * the PMM bit be cleared + */ + mmcr0 &= ~MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); +} + +struct op_ppc64_model op_model_power4 = { + .reg_setup = power4_reg_setup, + .cpu_setup = power4_cpu_setup, + .start = power4_start, + .stop = power4_stop, + .handle_interrupt = power4_handle_interrupt, +}; diff --git a/arch/ppc64/oprofile/op_model_rs64.c b/arch/ppc64/oprofile/op_model_rs64.c new file mode 100644 index 00000000000..bcec506c266 --- /dev/null +++ b/arch/ppc64/oprofile/op_model_rs64.c @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2004 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define dbg(args...) + +#include "op_impl.h" + +static void ctrl_write(unsigned int i, unsigned int val) +{ + unsigned int tmp = 0; + unsigned long shift = 0, mask = 0; + + dbg("ctrl_write %d %x\n", i, val); + + switch(i) { + case 0: + tmp = mfspr(SPRN_MMCR0); + shift = 6; + mask = 0x7F; + break; + case 1: + tmp = mfspr(SPRN_MMCR0); + shift = 0; + mask = 0x3F; + break; + case 2: + tmp = mfspr(SPRN_MMCR1); + shift = 31 - 4; + mask = 0x1F; + break; + case 3: + tmp = mfspr(SPRN_MMCR1); + shift = 31 - 9; + mask = 0x1F; + break; + case 4: + tmp = mfspr(SPRN_MMCR1); + shift = 31 - 14; + mask = 0x1F; + break; + case 5: + tmp = mfspr(SPRN_MMCR1); + shift = 31 - 19; + mask = 0x1F; + break; + case 6: + tmp = mfspr(SPRN_MMCR1); + shift = 31 - 24; + mask = 0x1F; + break; + case 7: + tmp = mfspr(SPRN_MMCR1); + shift = 31 - 28; + mask = 0xF; + break; + } + + tmp = tmp & ~(mask << shift); + tmp |= val << shift; + + switch(i) { + case 0: + case 1: + mtspr(SPRN_MMCR0, tmp); + break; + default: + mtspr(SPRN_MMCR1, tmp); + } + + dbg("ctrl_write mmcr0 %lx mmcr1 %lx\n", mfspr(SPRN_MMCR0), + mfspr(SPRN_MMCR1)); +} + +static unsigned long reset_value[OP_MAX_COUNTER]; + +static int num_counters; + +static void rs64_reg_setup(struct op_counter_config *ctr, + struct op_system_config *sys, + int num_ctrs) +{ + int i; + + num_counters = num_ctrs; + + for (i = 0; i < num_counters; ++i) + reset_value[i] = 0x80000000UL - ctr[i].count; + + /* XXX setup user and kernel profiling */ +} + +static void rs64_cpu_setup(void *unused) +{ + unsigned int mmcr0; + + /* reset MMCR0 and set the freeze bit */ + mmcr0 = MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); + + /* reset MMCR1, MMCRA */ + mtspr(SPRN_MMCR1, 0); + + if (cpu_has_feature(CPU_FTR_MMCRA)) + mtspr(SPRN_MMCRA, 0); + + mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE; + /* Only applies to POWER3, but should be safe on RS64 */ + mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE; + mtspr(SPRN_MMCR0, mmcr0); + + dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(), + mfspr(SPRN_MMCR0)); + dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), + mfspr(SPRN_MMCR1)); +} + +static void rs64_start(struct op_counter_config *ctr) +{ + int i; + unsigned int mmcr0; + + /* set the PMM bit (see comment below) */ + mtmsrd(mfmsr() | MSR_PMM); + + for (i = 0; i < num_counters; ++i) { + if (ctr[i].enabled) { + ctr_write(i, reset_value[i]); + ctrl_write(i, ctr[i].event); + } else { + ctr_write(i, 0); + } + } + + mmcr0 = mfspr(SPRN_MMCR0); + + /* + * now clear the freeze bit, counting will not start until we + * rfid from this excetion, because only at that point will + * the PMM bit be cleared + */ + mmcr0 &= ~MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); + + dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); +} + +static void rs64_stop(void) +{ + unsigned int mmcr0; + + /* freeze counters */ + mmcr0 = mfspr(SPRN_MMCR0); + mmcr0 |= MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); + + dbg("stop on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); + + mb(); +} + +static void rs64_handle_interrupt(struct pt_regs *regs, + struct op_counter_config *ctr) +{ + unsigned int mmcr0; + int val; + int i; + unsigned long pc = mfspr(SPRN_SIAR); + int is_kernel = (pc >= KERNELBASE); + + /* set the PMM bit (see comment below) */ + mtmsrd(mfmsr() | MSR_PMM); + + for (i = 0; i < num_counters; ++i) { + val = ctr_read(i); + if (val < 0) { + if (ctr[i].enabled) { + oprofile_add_pc(pc, is_kernel, i); + ctr_write(i, reset_value[i]); + } else { + ctr_write(i, 0); + } + } + } + + mmcr0 = mfspr(SPRN_MMCR0); + + /* reset the perfmon trigger */ + mmcr0 |= MMCR0_PMXE; + + /* + * now clear the freeze bit, counting will not start until we + * rfid from this exception, because only at that point will + * the PMM bit be cleared + */ + mmcr0 &= ~MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); +} + +struct op_ppc64_model op_model_rs64 = { + .reg_setup = rs64_reg_setup, + .cpu_setup = rs64_cpu_setup, + .start = rs64_start, + .stop = rs64_stop, + .handle_interrupt = rs64_handle_interrupt, +}; diff --git a/arch/ppc64/xmon/Makefile b/arch/ppc64/xmon/Makefile new file mode 100644 index 00000000000..fb21a7088d3 --- /dev/null +++ b/arch/ppc64/xmon/Makefile @@ -0,0 +1,5 @@ +# Makefile for xmon + +EXTRA_CFLAGS += -mno-minimal-toc + +obj-y := start.o xmon.o ppc-dis.o ppc-opc.o subr_prf.o setjmp.o diff --git a/arch/ppc64/xmon/ansidecl.h b/arch/ppc64/xmon/ansidecl.h new file mode 100644 index 00000000000..c9b9f0929e9 --- /dev/null +++ b/arch/ppc64/xmon/ansidecl.h @@ -0,0 +1,141 @@ +/* ANSI and traditional C compatibility macros + Copyright 1991, 1992 Free Software Foundation, Inc. + This file is part of the GNU C Library. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* ANSI and traditional C compatibility macros + + ANSI C is assumed if __STDC__ is #defined. + + Macro ANSI C definition Traditional C definition + ----- ---- - ---------- ----------- - ---------- + PTR `void *' `char *' + LONG_DOUBLE `long double' `double' + VOLATILE `volatile' `' + SIGNED `signed' `' + PTRCONST `void *const' `char *' + ANSI_PROTOTYPES 1 not defined + + CONST is also defined, but is obsolete. Just use const. + + DEFUN (name, arglist, args) + + Defines function NAME. + + ARGLIST lists the arguments, separated by commas and enclosed in + parentheses. ARGLIST becomes the argument list in traditional C. + + ARGS list the arguments with their types. It becomes a prototype in + ANSI C, and the type declarations in traditional C. Arguments should + be separated with `AND'. For functions with a variable number of + arguments, the last thing listed should be `DOTS'. + + DEFUN_VOID (name) + + Defines a function NAME, which takes no arguments. + + obsolete -- EXFUN (name, (prototype)) -- obsolete. + + Replaced by PARAMS. Do not use; will disappear someday soon. + Was used in external function declarations. + In ANSI C it is `NAME PROTOTYPE' (so PROTOTYPE should be enclosed in + parentheses). In traditional C it is `NAME()'. + For a function that takes no arguments, PROTOTYPE should be `(void)'. + + PARAMS ((args)) + + We could use the EXFUN macro to handle prototype declarations, but + the name is misleading and the result is ugly. So we just define a + simple macro to handle the parameter lists, as in: + + static int foo PARAMS ((int, char)); + + This produces: `static int foo();' or `static int foo (int, char);' + + EXFUN would have done it like this: + + static int EXFUN (foo, (int, char)); + + but the function is not external...and it's hard to visually parse + the function name out of the mess. EXFUN should be considered + obsolete; new code should be written to use PARAMS. + + For example: + extern int printf PARAMS ((CONST char *format DOTS)); + int DEFUN(fprintf, (stream, format), + FILE *stream AND CONST char *format DOTS) { ... } + void DEFUN_VOID(abort) { ... } +*/ + +#ifndef _ANSIDECL_H + +#define _ANSIDECL_H 1 + + +/* Every source file includes this file, + so they will all get the switch for lint. */ +/* LINTLIBRARY */ + + +#if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(WIN32) +/* All known AIX compilers implement these things (but don't always + define __STDC__). The RISC/OS MIPS compiler defines these things + in SVR4 mode, but does not define __STDC__. */ + +#define PTR void * +#define PTRCONST void *CONST +#define LONG_DOUBLE long double + +#define AND , +#define NOARGS void +#define CONST const +#define VOLATILE volatile +#define SIGNED signed +#define DOTS , ... + +#define EXFUN(name, proto) name proto +#define DEFUN(name, arglist, args) name(args) +#define DEFUN_VOID(name) name(void) + +#define PROTO(type, name, arglist) type name arglist +#define PARAMS(paramlist) paramlist +#define ANSI_PROTOTYPES 1 + +#else /* Not ANSI C. */ + +#define PTR char * +#define PTRCONST PTR +#define LONG_DOUBLE double + +#define AND ; +#define NOARGS +#define CONST +#ifndef const /* some systems define it in header files for non-ansi mode */ +#define const +#endif +#define VOLATILE +#define SIGNED +#define DOTS + +#define EXFUN(name, proto) name() +#define DEFUN(name, arglist, args) name arglist args; +#define DEFUN_VOID(name) name() +#define PROTO(type, name, arglist) type name () +#define PARAMS(paramlist) () + +#endif /* ANSI C. */ + +#endif /* ansidecl.h */ diff --git a/arch/ppc64/xmon/nonstdio.h b/arch/ppc64/xmon/nonstdio.h new file mode 100644 index 00000000000..84211a21c6f --- /dev/null +++ b/arch/ppc64/xmon/nonstdio.h @@ -0,0 +1,22 @@ +typedef int FILE; +extern FILE *xmon_stdin, *xmon_stdout; +#define EOF (-1) +#define stdin xmon_stdin +#define stdout xmon_stdout +#define printf xmon_printf +#define fprintf xmon_fprintf +#define fputs xmon_fputs +#define fgets xmon_fgets +#define putchar xmon_putchar +#define getchar xmon_getchar +#define putc xmon_putc +#define getc xmon_getc +#define fopen(n, m) NULL +#define fflush(f) do {} while (0) +#define fclose(f) do {} while (0) +extern char *fgets(char *, int, void *); +extern void xmon_printf(const char *, ...); +extern void xmon_fprintf(void *, const char *, ...); +extern void xmon_sprintf(char *, const char *, ...); + +#define perror(s) printf("%s: no files!\n", (s)) diff --git a/arch/ppc64/xmon/ppc-dis.c b/arch/ppc64/xmon/ppc-dis.c new file mode 100644 index 00000000000..ac0a9d2427e --- /dev/null +++ b/arch/ppc64/xmon/ppc-dis.c @@ -0,0 +1,184 @@ +/* ppc-dis.c -- Disassemble PowerPC instructions + Copyright 1994 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Cygnus Support + +This file is part of GDB, GAS, and the GNU binutils. + +GDB, GAS, and the GNU binutils are free software; you can redistribute +them and/or modify them under the terms of the GNU General Public +License as published by the Free Software Foundation; either version +2, or (at your option) any later version. + +GDB, GAS, and the GNU binutils are distributed in the hope that they +will be useful, but WITHOUT ANY WARRANTY; without even the implied +warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See +the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this file; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#include "nonstdio.h" +#include "ansidecl.h" +#include "ppc.h" + +extern void print_address (unsigned long memaddr); + +/* Print a PowerPC or POWER instruction. */ + +int +print_insn_powerpc (unsigned long insn, unsigned long memaddr, int dialect) +{ + const struct powerpc_opcode *opcode; + const struct powerpc_opcode *opcode_end; + unsigned long op; + + if (dialect == 0) + dialect = PPC_OPCODE_PPC | PPC_OPCODE_CLASSIC | PPC_OPCODE_COMMON + | PPC_OPCODE_64 | PPC_OPCODE_POWER4 | PPC_OPCODE_ALTIVEC; + + /* Get the major opcode of the instruction. */ + op = PPC_OP (insn); + + /* Find the first match in the opcode table. We could speed this up + a bit by doing a binary search on the major opcode. */ + opcode_end = powerpc_opcodes + powerpc_num_opcodes; + again: + for (opcode = powerpc_opcodes; opcode < opcode_end; opcode++) + { + unsigned long table_op; + const unsigned char *opindex; + const struct powerpc_operand *operand; + int invalid; + int need_comma; + int need_paren; + + table_op = PPC_OP (opcode->opcode); + if (op < table_op) + break; + if (op > table_op) + continue; + + if ((insn & opcode->mask) != opcode->opcode + || (opcode->flags & dialect) == 0) + continue; + + /* Make two passes over the operands. First see if any of them + have extraction functions, and, if they do, make sure the + instruction is valid. */ + invalid = 0; + for (opindex = opcode->operands; *opindex != 0; opindex++) + { + operand = powerpc_operands + *opindex; + if (operand->extract) + (*operand->extract) (insn, dialect, &invalid); + } + if (invalid) + continue; + + /* The instruction is valid. */ + printf("%s", opcode->name); + if (opcode->operands[0] != 0) + printf("\t"); + + /* Now extract and print the operands. */ + need_comma = 0; + need_paren = 0; + for (opindex = opcode->operands; *opindex != 0; opindex++) + { + long value; + + operand = powerpc_operands + *opindex; + + /* Operands that are marked FAKE are simply ignored. We + already made sure that the extract function considered + the instruction to be valid. */ + if ((operand->flags & PPC_OPERAND_FAKE) != 0) + continue; + + /* Extract the value from the instruction. */ + if (operand->extract) + value = (*operand->extract) (insn, dialect, &invalid); + else + { + value = (insn >> operand->shift) & ((1 << operand->bits) - 1); + if ((operand->flags & PPC_OPERAND_SIGNED) != 0 + && (value & (1 << (operand->bits - 1))) != 0) + value -= 1 << operand->bits; + } + + /* If the operand is optional, and the value is zero, don't + print anything. */ + if ((operand->flags & PPC_OPERAND_OPTIONAL) != 0 + && (operand->flags & PPC_OPERAND_NEXT) == 0 + && value == 0) + continue; + + if (need_comma) + { + printf(","); + need_comma = 0; + } + + /* Print the operand as directed by the flags. */ + if ((operand->flags & PPC_OPERAND_GPR) != 0) + printf("r%ld", value); + else if ((operand->flags & PPC_OPERAND_FPR) != 0) + printf("f%ld", value); + else if ((operand->flags & PPC_OPERAND_VR) != 0) + printf("v%ld", value); + else if ((operand->flags & PPC_OPERAND_RELATIVE) != 0) + print_address (memaddr + value); + else if ((operand->flags & PPC_OPERAND_ABSOLUTE) != 0) + print_address (value & 0xffffffff); + else if ((operand->flags & PPC_OPERAND_CR) == 0 + || (dialect & PPC_OPCODE_PPC) == 0) + printf("%ld", value); + else + { + if (operand->bits == 3) + printf("cr%d", value); + else + { + static const char *cbnames[4] = { "lt", "gt", "eq", "so" }; + int cr; + int cc; + + cr = value >> 2; + if (cr != 0) + printf("4*cr%d+", cr); + cc = value & 3; + printf("%s", cbnames[cc]); + } + } + + if (need_paren) + { + printf(")"); + need_paren = 0; + } + + if ((operand->flags & PPC_OPERAND_PARENS) == 0) + need_comma = 1; + else + { + printf("("); + need_paren = 1; + } + } + + /* We have found and printed an instruction; return. */ + return 4; + } + + if ((dialect & PPC_OPCODE_ANY) != 0) + { + dialect = ~PPC_OPCODE_ANY; + goto again; + } + + /* We could not find a match. */ + printf(".long 0x%lx", insn); + + return 4; +} diff --git a/arch/ppc64/xmon/ppc-opc.c b/arch/ppc64/xmon/ppc-opc.c new file mode 100644 index 00000000000..1e4e7e31997 --- /dev/null +++ b/arch/ppc64/xmon/ppc-opc.c @@ -0,0 +1,4620 @@ +/* ppc-opc.c -- PowerPC opcode list + Copyright 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003 + Free Software Foundation, Inc. + Written by Ian Lance Taylor, Cygnus Support + + This file is part of GDB, GAS, and the GNU binutils. + + GDB, GAS, and the GNU binutils are free software; you can redistribute + them and/or modify them under the terms of the GNU General Public + License as published by the Free Software Foundation; either version + 2, or (at your option) any later version. + + GDB, GAS, and the GNU binutils are distributed in the hope that they + will be useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this file; see the file COPYING. If not, write to the Free + Software Foundation, 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. */ + +#include "nonstdio.h" +#include "ppc.h" + +#define ATTRIBUTE_UNUSED +#define _(x) x + +/* This file holds the PowerPC opcode table. The opcode table + includes almost all of the extended instruction mnemonics. This + permits the disassembler to use them, and simplifies the assembler + logic, at the cost of increasing the table size. The table is + strictly constant data, so the compiler should be able to put it in + the .text section. + + This file also holds the operand table. All knowledge about + inserting operands into instructions and vice-versa is kept in this + file. */ + +/* Local insertion and extraction functions. */ + +static unsigned long insert_bat (unsigned long, long, int, const char **); +static long extract_bat (unsigned long, int, int *); +static unsigned long insert_bba (unsigned long, long, int, const char **); +static long extract_bba (unsigned long, int, int *); +static unsigned long insert_bd (unsigned long, long, int, const char **); +static long extract_bd (unsigned long, int, int *); +static unsigned long insert_bdm (unsigned long, long, int, const char **); +static long extract_bdm (unsigned long, int, int *); +static unsigned long insert_bdp (unsigned long, long, int, const char **); +static long extract_bdp (unsigned long, int, int *); +static unsigned long insert_bo (unsigned long, long, int, const char **); +static long extract_bo (unsigned long, int, int *); +static unsigned long insert_boe (unsigned long, long, int, const char **); +static long extract_boe (unsigned long, int, int *); +static unsigned long insert_dq (unsigned long, long, int, const char **); +static long extract_dq (unsigned long, int, int *); +static unsigned long insert_ds (unsigned long, long, int, const char **); +static long extract_ds (unsigned long, int, int *); +static unsigned long insert_de (unsigned long, long, int, const char **); +static long extract_de (unsigned long, int, int *); +static unsigned long insert_des (unsigned long, long, int, const char **); +static long extract_des (unsigned long, int, int *); +static unsigned long insert_fxm (unsigned long, long, int, const char **); +static long extract_fxm (unsigned long, int, int *); +static unsigned long insert_li (unsigned long, long, int, const char **); +static long extract_li (unsigned long, int, int *); +static unsigned long insert_mbe (unsigned long, long, int, const char **); +static long extract_mbe (unsigned long, int, int *); +static unsigned long insert_mb6 (unsigned long, long, int, const char **); +static long extract_mb6 (unsigned long, int, int *); +static unsigned long insert_nb (unsigned long, long, int, const char **); +static long extract_nb (unsigned long, int, int *); +static unsigned long insert_nsi (unsigned long, long, int, const char **); +static long extract_nsi (unsigned long, int, int *); +static unsigned long insert_ral (unsigned long, long, int, const char **); +static unsigned long insert_ram (unsigned long, long, int, const char **); +static unsigned long insert_raq (unsigned long, long, int, const char **); +static unsigned long insert_ras (unsigned long, long, int, const char **); +static unsigned long insert_rbs (unsigned long, long, int, const char **); +static long extract_rbs (unsigned long, int, int *); +static unsigned long insert_rsq (unsigned long, long, int, const char **); +static unsigned long insert_rtq (unsigned long, long, int, const char **); +static unsigned long insert_sh6 (unsigned long, long, int, const char **); +static long extract_sh6 (unsigned long, int, int *); +static unsigned long insert_spr (unsigned long, long, int, const char **); +static long extract_spr (unsigned long, int, int *); +static unsigned long insert_tbr (unsigned long, long, int, const char **); +static long extract_tbr (unsigned long, int, int *); +static unsigned long insert_ev2 (unsigned long, long, int, const char **); +static long extract_ev2 (unsigned long, int, int *); +static unsigned long insert_ev4 (unsigned long, long, int, const char **); +static long extract_ev4 (unsigned long, int, int *); +static unsigned long insert_ev8 (unsigned long, long, int, const char **); +static long extract_ev8 (unsigned long, int, int *); + +/* The operands table. + + The fields are bits, shift, insert, extract, flags. + + We used to put parens around the various additions, like the one + for BA just below. However, that caused trouble with feeble + compilers with a limit on depth of a parenthesized expression, like + (reportedly) the compiler in Microsoft Developer Studio 5. So we + omit the parens, since the macros are never used in a context where + the addition will be ambiguous. */ + +const struct powerpc_operand powerpc_operands[] = +{ + /* The zero index is used to indicate the end of the list of + operands. */ +#define UNUSED 0 + { 0, 0, 0, 0, 0 }, + + /* The BA field in an XL form instruction. */ +#define BA UNUSED + 1 +#define BA_MASK (0x1f << 16) + { 5, 16, 0, 0, PPC_OPERAND_CR }, + + /* The BA field in an XL form instruction when it must be the same + as the BT field in the same instruction. */ +#define BAT BA + 1 + { 5, 16, insert_bat, extract_bat, PPC_OPERAND_FAKE }, + + /* The BB field in an XL form instruction. */ +#define BB BAT + 1 +#define BB_MASK (0x1f << 11) + { 5, 11, 0, 0, PPC_OPERAND_CR }, + + /* The BB field in an XL form instruction when it must be the same + as the BA field in the same instruction. */ +#define BBA BB + 1 + { 5, 11, insert_bba, extract_bba, PPC_OPERAND_FAKE }, + + /* The BD field in a B form instruction. The lower two bits are + forced to zero. */ +#define BD BBA + 1 + { 16, 0, insert_bd, extract_bd, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED }, + + /* The BD field in a B form instruction when absolute addressing is + used. */ +#define BDA BD + 1 + { 16, 0, insert_bd, extract_bd, PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED }, + + /* The BD field in a B form instruction when the - modifier is used. + This sets the y bit of the BO field appropriately. */ +#define BDM BDA + 1 + { 16, 0, insert_bdm, extract_bdm, + PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED }, + + /* The BD field in a B form instruction when the - modifier is used + and absolute address is used. */ +#define BDMA BDM + 1 + { 16, 0, insert_bdm, extract_bdm, + PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED }, + + /* The BD field in a B form instruction when the + modifier is used. + This sets the y bit of the BO field appropriately. */ +#define BDP BDMA + 1 + { 16, 0, insert_bdp, extract_bdp, + PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED }, + + /* The BD field in a B form instruction when the + modifier is used + and absolute addressing is used. */ +#define BDPA BDP + 1 + { 16, 0, insert_bdp, extract_bdp, + PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED }, + + /* The BF field in an X or XL form instruction. */ +#define BF BDPA + 1 + { 3, 23, 0, 0, PPC_OPERAND_CR }, + + /* An optional BF field. This is used for comparison instructions, + in which an omitted BF field is taken as zero. */ +#define OBF BF + 1 + { 3, 23, 0, 0, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL }, + + /* The BFA field in an X or XL form instruction. */ +#define BFA OBF + 1 + { 3, 18, 0, 0, PPC_OPERAND_CR }, + + /* The BI field in a B form or XL form instruction. */ +#define BI BFA + 1 +#define BI_MASK (0x1f << 16) + { 5, 16, 0, 0, PPC_OPERAND_CR }, + + /* The BO field in a B form instruction. Certain values are + illegal. */ +#define BO BI + 1 +#define BO_MASK (0x1f << 21) + { 5, 21, insert_bo, extract_bo, 0 }, + + /* The BO field in a B form instruction when the + or - modifier is + used. This is like the BO field, but it must be even. */ +#define BOE BO + 1 + { 5, 21, insert_boe, extract_boe, 0 }, + + /* The BT field in an X or XL form instruction. */ +#define BT BOE + 1 + { 5, 21, 0, 0, PPC_OPERAND_CR }, + + /* The condition register number portion of the BI field in a B form + or XL form instruction. This is used for the extended + conditional branch mnemonics, which set the lower two bits of the + BI field. This field is optional. */ +#define CR BT + 1 + { 3, 18, 0, 0, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL }, + + /* The CRB field in an X form instruction. */ +#define CRB CR + 1 + { 5, 6, 0, 0, 0 }, + + /* The CRFD field in an X form instruction. */ +#define CRFD CRB + 1 + { 3, 23, 0, 0, PPC_OPERAND_CR }, + + /* The CRFS field in an X form instruction. */ +#define CRFS CRFD + 1 + { 3, 0, 0, 0, PPC_OPERAND_CR }, + + /* The CT field in an X form instruction. */ +#define CT CRFS + 1 + { 5, 21, 0, 0, PPC_OPERAND_OPTIONAL }, + + /* The D field in a D form instruction. This is a displacement off + a register, and implies that the next operand is a register in + parentheses. */ +#define D CT + 1 + { 16, 0, 0, 0, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED }, + + /* The DE field in a DE form instruction. This is like D, but is 12 + bits only. */ +#define DE D + 1 + { 14, 0, insert_de, extract_de, PPC_OPERAND_PARENS }, + + /* The DES field in a DES form instruction. This is like DS, but is 14 + bits only (12 stored.) */ +#define DES DE + 1 + { 14, 0, insert_des, extract_des, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED }, + + /* The DQ field in a DQ form instruction. This is like D, but the + lower four bits are forced to zero. */ +#define DQ DES + 1 + { 16, 0, insert_dq, extract_dq, + PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED | PPC_OPERAND_DQ }, + + /* The DS field in a DS form instruction. This is like D, but the + lower two bits are forced to zero. */ +#define DS DQ + 1 + { 16, 0, insert_ds, extract_ds, + PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED | PPC_OPERAND_DS }, + + /* The E field in a wrteei instruction. */ +#define E DS + 1 + { 1, 15, 0, 0, 0 }, + + /* The FL1 field in a POWER SC form instruction. */ +#define FL1 E + 1 + { 4, 12, 0, 0, 0 }, + + /* The FL2 field in a POWER SC form instruction. */ +#define FL2 FL1 + 1 + { 3, 2, 0, 0, 0 }, + + /* The FLM field in an XFL form instruction. */ +#define FLM FL2 + 1 + { 8, 17, 0, 0, 0 }, + + /* The FRA field in an X or A form instruction. */ +#define FRA FLM + 1 +#define FRA_MASK (0x1f << 16) + { 5, 16, 0, 0, PPC_OPERAND_FPR }, + + /* The FRB field in an X or A form instruction. */ +#define FRB FRA + 1 +#define FRB_MASK (0x1f << 11) + { 5, 11, 0, 0, PPC_OPERAND_FPR }, + + /* The FRC field in an A form instruction. */ +#define FRC FRB + 1 +#define FRC_MASK (0x1f << 6) + { 5, 6, 0, 0, PPC_OPERAND_FPR }, + + /* The FRS field in an X form instruction or the FRT field in a D, X + or A form instruction. */ +#define FRS FRC + 1 +#define FRT FRS + { 5, 21, 0, 0, PPC_OPERAND_FPR }, + + /* The FXM field in an XFX instruction. */ +#define FXM FRS + 1 +#define FXM_MASK (0xff << 12) + { 8, 12, insert_fxm, extract_fxm, 0 }, + + /* Power4 version for mfcr. */ +#define FXM4 FXM + 1 + { 8, 12, insert_fxm, extract_fxm, PPC_OPERAND_OPTIONAL }, + + /* The L field in a D or X form instruction. */ +#define L FXM4 + 1 + { 1, 21, 0, 0, PPC_OPERAND_OPTIONAL }, + + /* The LEV field in a POWER SC form instruction. */ +#define LEV L + 1 + { 7, 5, 0, 0, 0 }, + + /* The LI field in an I form instruction. The lower two bits are + forced to zero. */ +#define LI LEV + 1 + { 26, 0, insert_li, extract_li, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED }, + + /* The LI field in an I form instruction when used as an absolute + address. */ +#define LIA LI + 1 + { 26, 0, insert_li, extract_li, PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED }, + + /* The LS field in an X (sync) form instruction. */ +#define LS LIA + 1 + { 2, 21, 0, 0, PPC_OPERAND_OPTIONAL }, + + /* The MB field in an M form instruction. */ +#define MB LS + 1 +#define MB_MASK (0x1f << 6) + { 5, 6, 0, 0, 0 }, + + /* The ME field in an M form instruction. */ +#define ME MB + 1 +#define ME_MASK (0x1f << 1) + { 5, 1, 0, 0, 0 }, + + /* The MB and ME fields in an M form instruction expressed a single + operand which is a bitmask indicating which bits to select. This + is a two operand form using PPC_OPERAND_NEXT. See the + description in opcode/ppc.h for what this means. */ +#define MBE ME + 1 + { 5, 6, 0, 0, PPC_OPERAND_OPTIONAL | PPC_OPERAND_NEXT }, + { 32, 0, insert_mbe, extract_mbe, 0 }, + + /* The MB or ME field in an MD or MDS form instruction. The high + bit is wrapped to the low end. */ +#define MB6 MBE + 2 +#define ME6 MB6 +#define MB6_MASK (0x3f << 5) + { 6, 5, insert_mb6, extract_mb6, 0 }, + + /* The MO field in an mbar instruction. */ +#define MO MB6 + 1 + { 5, 21, 0, 0, 0 }, + + /* The NB field in an X form instruction. The value 32 is stored as + 0. */ +#define NB MO + 1 + { 6, 11, insert_nb, extract_nb, 0 }, + + /* The NSI field in a D form instruction. This is the same as the + SI field, only negated. */ +#define NSI NB + 1 + { 16, 0, insert_nsi, extract_nsi, + PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED }, + + /* The RA field in an D, DS, DQ, X, XO, M, or MDS form instruction. */ +#define RA NSI + 1 +#define RA_MASK (0x1f << 16) + { 5, 16, 0, 0, PPC_OPERAND_GPR }, + + /* The RA field in the DQ form lq instruction, which has special + value restrictions. */ +#define RAQ RA + 1 + { 5, 16, insert_raq, 0, PPC_OPERAND_GPR }, + + /* The RA field in a D or X form instruction which is an updating + load, which means that the RA field may not be zero and may not + equal the RT field. */ +#define RAL RAQ + 1 + { 5, 16, insert_ral, 0, PPC_OPERAND_GPR }, + + /* The RA field in an lmw instruction, which has special value + restrictions. */ +#define RAM RAL + 1 + { 5, 16, insert_ram, 0, PPC_OPERAND_GPR }, + + /* The RA field in a D or X form instruction which is an updating + store or an updating floating point load, which means that the RA + field may not be zero. */ +#define RAS RAM + 1 + { 5, 16, insert_ras, 0, PPC_OPERAND_GPR }, + + /* The RB field in an X, XO, M, or MDS form instruction. */ +#define RB RAS + 1 +#define RB_MASK (0x1f << 11) + { 5, 11, 0, 0, PPC_OPERAND_GPR }, + + /* The RB field in an X form instruction when it must be the same as + the RS field in the instruction. This is used for extended + mnemonics like mr. */ +#define RBS RB + 1 + { 5, 1, insert_rbs, extract_rbs, PPC_OPERAND_FAKE }, + + /* The RS field in a D, DS, X, XFX, XS, M, MD or MDS form + instruction or the RT field in a D, DS, X, XFX or XO form + instruction. */ +#define RS RBS + 1 +#define RT RS +#define RT_MASK (0x1f << 21) + { 5, 21, 0, 0, PPC_OPERAND_GPR }, + + /* The RS field of the DS form stq instruction, which has special + value restrictions. */ +#define RSQ RS + 1 + { 5, 21, insert_rsq, 0, PPC_OPERAND_GPR }, + + /* The RT field of the DQ form lq instruction, which has special + value restrictions. */ +#define RTQ RSQ + 1 + { 5, 21, insert_rtq, 0, PPC_OPERAND_GPR }, + + /* The SH field in an X or M form instruction. */ +#define SH RTQ + 1 +#define SH_MASK (0x1f << 11) + { 5, 11, 0, 0, 0 }, + + /* The SH field in an MD form instruction. This is split. */ +#define SH6 SH + 1 +#define SH6_MASK ((0x1f << 11) | (1 << 1)) + { 6, 1, insert_sh6, extract_sh6, 0 }, + + /* The SI field in a D form instruction. */ +#define SI SH6 + 1 + { 16, 0, 0, 0, PPC_OPERAND_SIGNED }, + + /* The SI field in a D form instruction when we accept a wide range + of positive values. */ +#define SISIGNOPT SI + 1 + { 16, 0, 0, 0, PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT }, + + /* The SPR field in an XFX form instruction. This is flipped--the + lower 5 bits are stored in the upper 5 and vice- versa. */ +#define SPR SISIGNOPT + 1 +#define PMR SPR +#define SPR_MASK (0x3ff << 11) + { 10, 11, insert_spr, extract_spr, 0 }, + + /* The BAT index number in an XFX form m[ft]ibat[lu] instruction. */ +#define SPRBAT SPR + 1 +#define SPRBAT_MASK (0x3 << 17) + { 2, 17, 0, 0, 0 }, + + /* The SPRG register number in an XFX form m[ft]sprg instruction. */ +#define SPRG SPRBAT + 1 +#define SPRG_MASK (0x3 << 16) + { 2, 16, 0, 0, 0 }, + + /* The SR field in an X form instruction. */ +#define SR SPRG + 1 + { 4, 16, 0, 0, 0 }, + + /* The STRM field in an X AltiVec form instruction. */ +#define STRM SR + 1 +#define STRM_MASK (0x3 << 21) + { 2, 21, 0, 0, 0 }, + + /* The SV field in a POWER SC form instruction. */ +#define SV STRM + 1 + { 14, 2, 0, 0, 0 }, + + /* The TBR field in an XFX form instruction. This is like the SPR + field, but it is optional. */ +#define TBR SV + 1 + { 10, 11, insert_tbr, extract_tbr, PPC_OPERAND_OPTIONAL }, + + /* The TO field in a D or X form instruction. */ +#define TO TBR + 1 +#define TO_MASK (0x1f << 21) + { 5, 21, 0, 0, 0 }, + + /* The U field in an X form instruction. */ +#define U TO + 1 + { 4, 12, 0, 0, 0 }, + + /* The UI field in a D form instruction. */ +#define UI U + 1 + { 16, 0, 0, 0, 0 }, + + /* The VA field in a VA, VX or VXR form instruction. */ +#define VA UI + 1 +#define VA_MASK (0x1f << 16) + { 5, 16, 0, 0, PPC_OPERAND_VR }, + + /* The VB field in a VA, VX or VXR form instruction. */ +#define VB VA + 1 +#define VB_MASK (0x1f << 11) + { 5, 11, 0, 0, PPC_OPERAND_VR }, + + /* The VC field in a VA form instruction. */ +#define VC VB + 1 +#define VC_MASK (0x1f << 6) + { 5, 6, 0, 0, PPC_OPERAND_VR }, + + /* The VD or VS field in a VA, VX, VXR or X form instruction. */ +#define VD VC + 1 +#define VS VD +#define VD_MASK (0x1f << 21) + { 5, 21, 0, 0, PPC_OPERAND_VR }, + + /* The SIMM field in a VX form instruction. */ +#define SIMM VD + 1 + { 5, 16, 0, 0, PPC_OPERAND_SIGNED}, + + /* The UIMM field in a VX form instruction. */ +#define UIMM SIMM + 1 + { 5, 16, 0, 0, 0 }, + + /* The SHB field in a VA form instruction. */ +#define SHB UIMM + 1 + { 4, 6, 0, 0, 0 }, + + /* The other UIMM field in a EVX form instruction. */ +#define EVUIMM SHB + 1 + { 5, 11, 0, 0, 0 }, + + /* The other UIMM field in a half word EVX form instruction. */ +#define EVUIMM_2 EVUIMM + 1 + { 32, 11, insert_ev2, extract_ev2, PPC_OPERAND_PARENS }, + + /* The other UIMM field in a word EVX form instruction. */ +#define EVUIMM_4 EVUIMM_2 + 1 + { 32, 11, insert_ev4, extract_ev4, PPC_OPERAND_PARENS }, + + /* The other UIMM field in a double EVX form instruction. */ +#define EVUIMM_8 EVUIMM_4 + 1 + { 32, 11, insert_ev8, extract_ev8, PPC_OPERAND_PARENS }, + + /* The WS field. */ +#define WS EVUIMM_8 + 1 +#define WS_MASK (0x7 << 11) + { 3, 11, 0, 0, 0 }, + + /* The L field in an mtmsrd instruction */ +#define MTMSRD_L WS + 1 + { 1, 16, 0, 0, PPC_OPERAND_OPTIONAL }, + +}; + +/* The functions used to insert and extract complicated operands. */ + +/* The BA field in an XL form instruction when it must be the same as + the BT field in the same instruction. This operand is marked FAKE. + The insertion function just copies the BT field into the BA field, + and the extraction function just checks that the fields are the + same. */ + +/*ARGSUSED*/ +static unsigned long +insert_bat (unsigned long insn, + long value ATTRIBUTE_UNUSED, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg ATTRIBUTE_UNUSED) +{ + return insn | (((insn >> 21) & 0x1f) << 16); +} + +static long +extract_bat (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid) +{ + if (((insn >> 21) & 0x1f) != ((insn >> 16) & 0x1f)) + *invalid = 1; + return 0; +} + +/* The BB field in an XL form instruction when it must be the same as + the BA field in the same instruction. This operand is marked FAKE. + The insertion function just copies the BA field into the BB field, + and the extraction function just checks that the fields are the + same. */ + +/*ARGSUSED*/ +static unsigned long +insert_bba (unsigned long insn, + long value ATTRIBUTE_UNUSED, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg ATTRIBUTE_UNUSED) +{ + return insn | (((insn >> 16) & 0x1f) << 11); +} + +static long +extract_bba (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid) +{ + if (((insn >> 16) & 0x1f) != ((insn >> 11) & 0x1f)) + *invalid = 1; + return 0; +} + +/* The BD field in a B form instruction. The lower two bits are + forced to zero. */ + +/*ARGSUSED*/ +static unsigned long +insert_bd (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg ATTRIBUTE_UNUSED) +{ + return insn | (value & 0xfffc); +} + +/*ARGSUSED*/ +static long +extract_bd (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + return ((insn & 0xfffc) ^ 0x8000) - 0x8000; +} + +/* The BD field in a B form instruction when the - modifier is used. + This modifier means that the branch is not expected to be taken. + For chips built to versions of the architecture prior to version 2 + (ie. not Power4 compatible), we set the y bit of the BO field to 1 + if the offset is negative. When extracting, we require that the y + bit be 1 and that the offset be positive, since if the y bit is 0 + we just want to print the normal form of the instruction. + Power4 compatible targets use two bits, "a", and "t", instead of + the "y" bit. "at" == 00 => no hint, "at" == 01 => unpredictable, + "at" == 10 => not taken, "at" == 11 => taken. The "t" bit is 00001 + in BO field, the "a" bit is 00010 for branch on CR(BI) and 01000 + for branch on CTR. We only handle the taken/not-taken hint here. */ + +/*ARGSUSED*/ +static unsigned long +insert_bdm (unsigned long insn, + long value, + int dialect, + const char **errmsg ATTRIBUTE_UNUSED) +{ + if ((dialect & PPC_OPCODE_POWER4) == 0) + { + if ((value & 0x8000) != 0) + insn |= 1 << 21; + } + else + { + if ((insn & (0x14 << 21)) == (0x04 << 21)) + insn |= 0x02 << 21; + else if ((insn & (0x14 << 21)) == (0x10 << 21)) + insn |= 0x08 << 21; + } + return insn | (value & 0xfffc); +} + +static long +extract_bdm (unsigned long insn, + int dialect, + int *invalid) +{ + if ((dialect & PPC_OPCODE_POWER4) == 0) + { + if (((insn & (1 << 21)) == 0) != ((insn & (1 << 15)) == 0)) + *invalid = 1; + } + else + { + if ((insn & (0x17 << 21)) != (0x06 << 21) + && (insn & (0x1d << 21)) != (0x18 << 21)) + *invalid = 1; + } + + return ((insn & 0xfffc) ^ 0x8000) - 0x8000; +} + +/* The BD field in a B form instruction when the + modifier is used. + This is like BDM, above, except that the branch is expected to be + taken. */ + +/*ARGSUSED*/ +static unsigned long +insert_bdp (unsigned long insn, + long value, + int dialect, + const char **errmsg ATTRIBUTE_UNUSED) +{ + if ((dialect & PPC_OPCODE_POWER4) == 0) + { + if ((value & 0x8000) == 0) + insn |= 1 << 21; + } + else + { + if ((insn & (0x14 << 21)) == (0x04 << 21)) + insn |= 0x03 << 21; + else if ((insn & (0x14 << 21)) == (0x10 << 21)) + insn |= 0x09 << 21; + } + return insn | (value & 0xfffc); +} + +static long +extract_bdp (unsigned long insn, + int dialect, + int *invalid) +{ + if ((dialect & PPC_OPCODE_POWER4) == 0) + { + if (((insn & (1 << 21)) == 0) == ((insn & (1 << 15)) == 0)) + *invalid = 1; + } + else + { + if ((insn & (0x17 << 21)) != (0x07 << 21) + && (insn & (0x1d << 21)) != (0x19 << 21)) + *invalid = 1; + } + + return ((insn & 0xfffc) ^ 0x8000) - 0x8000; +} + +/* Check for legal values of a BO field. */ + +static int +valid_bo (long value, int dialect) +{ + if ((dialect & PPC_OPCODE_POWER4) == 0) + { + /* Certain encodings have bits that are required to be zero. + These are (z must be zero, y may be anything): + 001zy + 011zy + 1z00y + 1z01y + 1z1zz + */ + switch (value & 0x14) + { + default: + case 0: + return 1; + case 0x4: + return (value & 0x2) == 0; + case 0x10: + return (value & 0x8) == 0; + case 0x14: + return value == 0x14; + } + } + else + { + /* Certain encodings have bits that are required to be zero. + These are (z must be zero, a & t may be anything): + 0000z + 0001z + 0100z + 0101z + 001at + 011at + 1a00t + 1a01t + 1z1zz + */ + if ((value & 0x14) == 0) + return (value & 0x1) == 0; + else if ((value & 0x14) == 0x14) + return value == 0x14; + else + return 1; + } +} + +/* The BO field in a B form instruction. Warn about attempts to set + the field to an illegal value. */ + +static unsigned long +insert_bo (unsigned long insn, + long value, + int dialect, + const char **errmsg) +{ + if (!valid_bo (value, dialect)) + *errmsg = _("invalid conditional option"); + return insn | ((value & 0x1f) << 21); +} + +static long +extract_bo (unsigned long insn, + int dialect, + int *invalid) +{ + long value; + + value = (insn >> 21) & 0x1f; + if (!valid_bo (value, dialect)) + *invalid = 1; + return value; +} + +/* The BO field in a B form instruction when the + or - modifier is + used. This is like the BO field, but it must be even. When + extracting it, we force it to be even. */ + +static unsigned long +insert_boe (unsigned long insn, + long value, + int dialect, + const char **errmsg) +{ + if (!valid_bo (value, dialect)) + *errmsg = _("invalid conditional option"); + else if ((value & 1) != 0) + *errmsg = _("attempt to set y bit when using + or - modifier"); + + return insn | ((value & 0x1f) << 21); +} + +static long +extract_boe (unsigned long insn, + int dialect, + int *invalid) +{ + long value; + + value = (insn >> 21) & 0x1f; + if (!valid_bo (value, dialect)) + *invalid = 1; + return value & 0x1e; +} + +/* The DQ field in a DQ form instruction. This is like D, but the + lower four bits are forced to zero. */ + +/*ARGSUSED*/ +static unsigned long +insert_dq (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if ((value & 0xf) != 0) + *errmsg = _("offset not a multiple of 16"); + return insn | (value & 0xfff0); +} + +/*ARGSUSED*/ +static long +extract_dq (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + return ((insn & 0xfff0) ^ 0x8000) - 0x8000; +} + +static unsigned long +insert_ev2 (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if ((value & 1) != 0) + *errmsg = _("offset not a multiple of 2"); + if ((value > 62) != 0) + *errmsg = _("offset greater than 62"); + return insn | ((value & 0x3e) << 10); +} + +static long +extract_ev2 (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + return (insn >> 10) & 0x3e; +} + +static unsigned long +insert_ev4 (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if ((value & 3) != 0) + *errmsg = _("offset not a multiple of 4"); + if ((value > 124) != 0) + *errmsg = _("offset greater than 124"); + return insn | ((value & 0x7c) << 9); +} + +static long +extract_ev4 (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + return (insn >> 9) & 0x7c; +} + +static unsigned long +insert_ev8 (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if ((value & 7) != 0) + *errmsg = _("offset not a multiple of 8"); + if ((value > 248) != 0) + *errmsg = _("offset greater than 248"); + return insn | ((value & 0xf8) << 8); +} + +static long +extract_ev8 (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + return (insn >> 8) & 0xf8; +} + +/* The DS field in a DS form instruction. This is like D, but the + lower two bits are forced to zero. */ + +/*ARGSUSED*/ +static unsigned long +insert_ds (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if ((value & 3) != 0) + *errmsg = _("offset not a multiple of 4"); + return insn | (value & 0xfffc); +} + +/*ARGSUSED*/ +static long +extract_ds (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + return ((insn & 0xfffc) ^ 0x8000) - 0x8000; +} + +/* The DE field in a DE form instruction. */ + +/*ARGSUSED*/ +static unsigned long +insert_de (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if (value > 2047 || value < -2048) + *errmsg = _("offset not between -2048 and 2047"); + return insn | ((value << 4) & 0xfff0); +} + +/*ARGSUSED*/ +static long +extract_de (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + return (insn & 0xfff0) >> 4; +} + +/* The DES field in a DES form instruction. */ + +/*ARGSUSED*/ +static unsigned long +insert_des (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if (value > 8191 || value < -8192) + *errmsg = _("offset not between -8192 and 8191"); + else if ((value & 3) != 0) + *errmsg = _("offset not a multiple of 4"); + return insn | ((value << 2) & 0xfff0); +} + +/*ARGSUSED*/ +static long +extract_des (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + return (((insn >> 2) & 0x3ffc) ^ 0x2000) - 0x2000; +} + +/* FXM mask in mfcr and mtcrf instructions. */ + +static unsigned long +insert_fxm (unsigned long insn, + long value, + int dialect, + const char **errmsg) +{ + /* If the optional field on mfcr is missing that means we want to use + the old form of the instruction that moves the whole cr. In that + case we'll have VALUE zero. There doesn't seem to be a way to + distinguish this from the case where someone writes mfcr %r3,0. */ + if (value == 0) + ; + + /* If only one bit of the FXM field is set, we can use the new form + of the instruction, which is faster. Unlike the Power4 branch hint + encoding, this is not backward compatible. */ + else if ((dialect & PPC_OPCODE_POWER4) != 0 && (value & -value) == value) + insn |= 1 << 20; + + /* Any other value on mfcr is an error. */ + else if ((insn & (0x3ff << 1)) == 19 << 1) + { + *errmsg = _("ignoring invalid mfcr mask"); + value = 0; + } + + return insn | ((value & 0xff) << 12); +} + +static long +extract_fxm (unsigned long insn, + int dialect, + int *invalid) +{ + long mask = (insn >> 12) & 0xff; + + /* Is this a Power4 insn? */ + if ((insn & (1 << 20)) != 0) + { + if ((dialect & PPC_OPCODE_POWER4) == 0) + *invalid = 1; + else + { + /* Exactly one bit of MASK should be set. */ + if (mask == 0 || (mask & -mask) != mask) + *invalid = 1; + } + } + + /* Check that non-power4 form of mfcr has a zero MASK. */ + else if ((insn & (0x3ff << 1)) == 19 << 1) + { + if (mask != 0) + *invalid = 1; + } + + return mask; +} + +/* The LI field in an I form instruction. The lower two bits are + forced to zero. */ + +/*ARGSUSED*/ +static unsigned long +insert_li (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if ((value & 3) != 0) + *errmsg = _("ignoring least significant bits in branch offset"); + return insn | (value & 0x3fffffc); +} + +/*ARGSUSED*/ +static long +extract_li (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + return ((insn & 0x3fffffc) ^ 0x2000000) - 0x2000000; +} + +/* The MB and ME fields in an M form instruction expressed as a single + operand which is itself a bitmask. The extraction function always + marks it as invalid, since we never want to recognize an + instruction which uses a field of this type. */ + +static unsigned long +insert_mbe (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + unsigned long uval, mask; + int mb, me, mx, count, last; + + uval = value; + + if (uval == 0) + { + *errmsg = _("illegal bitmask"); + return insn; + } + + mb = 0; + me = 32; + if ((uval & 1) != 0) + last = 1; + else + last = 0; + count = 0; + + /* mb: location of last 0->1 transition */ + /* me: location of last 1->0 transition */ + /* count: # transitions */ + + for (mx = 0, mask = 1L << 31; mx < 32; ++mx, mask >>= 1) + { + if ((uval & mask) && !last) + { + ++count; + mb = mx; + last = 1; + } + else if (!(uval & mask) && last) + { + ++count; + me = mx; + last = 0; + } + } + if (me == 0) + me = 32; + + if (count != 2 && (count != 0 || ! last)) + *errmsg = _("illegal bitmask"); + + return insn | (mb << 6) | ((me - 1) << 1); +} + +static long +extract_mbe (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid) +{ + long ret; + int mb, me; + int i; + + *invalid = 1; + + mb = (insn >> 6) & 0x1f; + me = (insn >> 1) & 0x1f; + if (mb < me + 1) + { + ret = 0; + for (i = mb; i <= me; i++) + ret |= 1L << (31 - i); + } + else if (mb == me + 1) + ret = ~0; + else /* (mb > me + 1) */ + { + ret = ~0; + for (i = me + 1; i < mb; i++) + ret &= ~(1L << (31 - i)); + } + return ret; +} + +/* The MB or ME field in an MD or MDS form instruction. The high bit + is wrapped to the low end. */ + +/*ARGSUSED*/ +static unsigned long +insert_mb6 (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg ATTRIBUTE_UNUSED) +{ + return insn | ((value & 0x1f) << 6) | (value & 0x20); +} + +/*ARGSUSED*/ +static long +extract_mb6 (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + return ((insn >> 6) & 0x1f) | (insn & 0x20); +} + +/* The NB field in an X form instruction. The value 32 is stored as + 0. */ + +static unsigned long +insert_nb (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if (value < 0 || value > 32) + *errmsg = _("value out of range"); + if (value == 32) + value = 0; + return insn | ((value & 0x1f) << 11); +} + +/*ARGSUSED*/ +static long +extract_nb (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + long ret; + + ret = (insn >> 11) & 0x1f; + if (ret == 0) + ret = 32; + return ret; +} + +/* The NSI field in a D form instruction. This is the same as the SI + field, only negated. The extraction function always marks it as + invalid, since we never want to recognize an instruction which uses + a field of this type. */ + +/*ARGSUSED*/ +static unsigned long +insert_nsi (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg ATTRIBUTE_UNUSED) +{ + return insn | (-value & 0xffff); +} + +static long +extract_nsi (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid) +{ + *invalid = 1; + return -(((insn & 0xffff) ^ 0x8000) - 0x8000); +} + +/* The RA field in a D or X form instruction which is an updating + load, which means that the RA field may not be zero and may not + equal the RT field. */ + +static unsigned long +insert_ral (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if (value == 0 + || (unsigned long) value == ((insn >> 21) & 0x1f)) + *errmsg = "invalid register operand when updating"; + return insn | ((value & 0x1f) << 16); +} + +/* The RA field in an lmw instruction, which has special value + restrictions. */ + +static unsigned long +insert_ram (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if ((unsigned long) value >= ((insn >> 21) & 0x1f)) + *errmsg = _("index register in load range"); + return insn | ((value & 0x1f) << 16); +} + +/* The RA field in the DQ form lq instruction, which has special + value restrictions. */ + +/*ARGSUSED*/ +static unsigned long +insert_raq (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + long rtvalue = (insn & RT_MASK) >> 21; + + if (value == rtvalue) + *errmsg = _("source and target register operands must be different"); + return insn | ((value & 0x1f) << 16); +} + +/* The RA field in a D or X form instruction which is an updating + store or an updating floating point load, which means that the RA + field may not be zero. */ + +static unsigned long +insert_ras (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if (value == 0) + *errmsg = _("invalid register operand when updating"); + return insn | ((value & 0x1f) << 16); +} + +/* The RB field in an X form instruction when it must be the same as + the RS field in the instruction. This is used for extended + mnemonics like mr. This operand is marked FAKE. The insertion + function just copies the BT field into the BA field, and the + extraction function just checks that the fields are the same. */ + +/*ARGSUSED*/ +static unsigned long +insert_rbs (unsigned long insn, + long value ATTRIBUTE_UNUSED, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg ATTRIBUTE_UNUSED) +{ + return insn | (((insn >> 21) & 0x1f) << 11); +} + +static long +extract_rbs (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid) +{ + if (((insn >> 21) & 0x1f) != ((insn >> 11) & 0x1f)) + *invalid = 1; + return 0; +} + +/* The RT field of the DQ form lq instruction, which has special + value restrictions. */ + +/*ARGSUSED*/ +static unsigned long +insert_rtq (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if ((value & 1) != 0) + *errmsg = _("target register operand must be even"); + return insn | ((value & 0x1f) << 21); +} + +/* The RS field of the DS form stq instruction, which has special + value restrictions. */ + +/*ARGSUSED*/ +static unsigned long +insert_rsq (unsigned long insn, + long value ATTRIBUTE_UNUSED, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if ((value & 1) != 0) + *errmsg = _("source register operand must be even"); + return insn | ((value & 0x1f) << 21); +} + +/* The SH field in an MD form instruction. This is split. */ + +/*ARGSUSED*/ +static unsigned long +insert_sh6 (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg ATTRIBUTE_UNUSED) +{ + return insn | ((value & 0x1f) << 11) | ((value & 0x20) >> 4); +} + +/*ARGSUSED*/ +static long +extract_sh6 (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + return ((insn >> 11) & 0x1f) | ((insn << 4) & 0x20); +} + +/* The SPR field in an XFX form instruction. This is flipped--the + lower 5 bits are stored in the upper 5 and vice- versa. */ + +static unsigned long +insert_spr (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg ATTRIBUTE_UNUSED) +{ + return insn | ((value & 0x1f) << 16) | ((value & 0x3e0) << 6); +} + +static long +extract_spr (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + return ((insn >> 16) & 0x1f) | ((insn >> 6) & 0x3e0); +} + +/* The TBR field in an XFX instruction. This is just like SPR, but it + is optional. When TBR is omitted, it must be inserted as 268 (the + magic number of the TB register). These functions treat 0 + (indicating an omitted optional operand) as 268. This means that + ``mftb 4,0'' is not handled correctly. This does not matter very + much, since the architecture manual does not define mftb as + accepting any values other than 268 or 269. */ + +#define TB (268) + +static unsigned long +insert_tbr (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg ATTRIBUTE_UNUSED) +{ + if (value == 0) + value = TB; + return insn | ((value & 0x1f) << 16) | ((value & 0x3e0) << 6); +} + +static long +extract_tbr (unsigned long insn, + int dialect ATTRIBUTE_UNUSED, + int *invalid ATTRIBUTE_UNUSED) +{ + long ret; + + ret = ((insn >> 16) & 0x1f) | ((insn >> 6) & 0x3e0); + if (ret == TB) + ret = 0; + return ret; +} + +/* Macros used to form opcodes. */ + +/* The main opcode. */ +#define OP(x) ((((unsigned long)(x)) & 0x3f) << 26) +#define OP_MASK OP (0x3f) + +/* The main opcode combined with a trap code in the TO field of a D + form instruction. Used for extended mnemonics for the trap + instructions. */ +#define OPTO(x,to) (OP (x) | ((((unsigned long)(to)) & 0x1f) << 21)) +#define OPTO_MASK (OP_MASK | TO_MASK) + +/* The main opcode combined with a comparison size bit in the L field + of a D form or X form instruction. Used for extended mnemonics for + the comparison instructions. */ +#define OPL(x,l) (OP (x) | ((((unsigned long)(l)) & 1) << 21)) +#define OPL_MASK OPL (0x3f,1) + +/* An A form instruction. */ +#define A(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1) | (((unsigned long)(rc)) & 1)) +#define A_MASK A (0x3f, 0x1f, 1) + +/* An A_MASK with the FRB field fixed. */ +#define AFRB_MASK (A_MASK | FRB_MASK) + +/* An A_MASK with the FRC field fixed. */ +#define AFRC_MASK (A_MASK | FRC_MASK) + +/* An A_MASK with the FRA and FRC fields fixed. */ +#define AFRAFRC_MASK (A_MASK | FRA_MASK | FRC_MASK) + +/* A B form instruction. */ +#define B(op, aa, lk) (OP (op) | ((((unsigned long)(aa)) & 1) << 1) | ((lk) & 1)) +#define B_MASK B (0x3f, 1, 1) + +/* A B form instruction setting the BO field. */ +#define BBO(op, bo, aa, lk) (B ((op), (aa), (lk)) | ((((unsigned long)(bo)) & 0x1f) << 21)) +#define BBO_MASK BBO (0x3f, 0x1f, 1, 1) + +/* A BBO_MASK with the y bit of the BO field removed. This permits + matching a conditional branch regardless of the setting of the y + bit. Similarly for the 'at' bits used for power4 branch hints. */ +#define Y_MASK (((unsigned long) 1) << 21) +#define AT1_MASK (((unsigned long) 3) << 21) +#define AT2_MASK (((unsigned long) 9) << 21) +#define BBOY_MASK (BBO_MASK &~ Y_MASK) +#define BBOAT_MASK (BBO_MASK &~ AT1_MASK) + +/* A B form instruction setting the BO field and the condition bits of + the BI field. */ +#define BBOCB(op, bo, cb, aa, lk) \ + (BBO ((op), (bo), (aa), (lk)) | ((((unsigned long)(cb)) & 0x3) << 16)) +#define BBOCB_MASK BBOCB (0x3f, 0x1f, 0x3, 1, 1) + +/* A BBOCB_MASK with the y bit of the BO field removed. */ +#define BBOYCB_MASK (BBOCB_MASK &~ Y_MASK) +#define BBOATCB_MASK (BBOCB_MASK &~ AT1_MASK) +#define BBOAT2CB_MASK (BBOCB_MASK &~ AT2_MASK) + +/* A BBOYCB_MASK in which the BI field is fixed. */ +#define BBOYBI_MASK (BBOYCB_MASK | BI_MASK) +#define BBOATBI_MASK (BBOAT2CB_MASK | BI_MASK) + +/* An Context form instruction. */ +#define CTX(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x7)) +#define CTX_MASK CTX(0x3f, 0x7) + +/* An User Context form instruction. */ +#define UCTX(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x1f)) +#define UCTX_MASK UCTX(0x3f, 0x1f) + +/* The main opcode mask with the RA field clear. */ +#define DRA_MASK (OP_MASK | RA_MASK) + +/* A DS form instruction. */ +#define DSO(op, xop) (OP (op) | ((xop) & 0x3)) +#define DS_MASK DSO (0x3f, 3) + +/* A DE form instruction. */ +#define DEO(op, xop) (OP (op) | ((xop) & 0xf)) +#define DE_MASK DEO (0x3e, 0xf) + +/* An EVSEL form instruction. */ +#define EVSEL(op, xop) (OP (op) | (((unsigned long)(xop)) & 0xff) << 3) +#define EVSEL_MASK EVSEL(0x3f, 0xff) + +/* An M form instruction. */ +#define M(op, rc) (OP (op) | ((rc) & 1)) +#define M_MASK M (0x3f, 1) + +/* An M form instruction with the ME field specified. */ +#define MME(op, me, rc) (M ((op), (rc)) | ((((unsigned long)(me)) & 0x1f) << 1)) + +/* An M_MASK with the MB and ME fields fixed. */ +#define MMBME_MASK (M_MASK | MB_MASK | ME_MASK) + +/* An M_MASK with the SH and ME fields fixed. */ +#define MSHME_MASK (M_MASK | SH_MASK | ME_MASK) + +/* An MD form instruction. */ +#define MD(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x7) << 2) | ((rc) & 1)) +#define MD_MASK MD (0x3f, 0x7, 1) + +/* An MD_MASK with the MB field fixed. */ +#define MDMB_MASK (MD_MASK | MB6_MASK) + +/* An MD_MASK with the SH field fixed. */ +#define MDSH_MASK (MD_MASK | SH6_MASK) + +/* An MDS form instruction. */ +#define MDS(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0xf) << 1) | ((rc) & 1)) +#define MDS_MASK MDS (0x3f, 0xf, 1) + +/* An MDS_MASK with the MB field fixed. */ +#define MDSMB_MASK (MDS_MASK | MB6_MASK) + +/* An SC form instruction. */ +#define SC(op, sa, lk) (OP (op) | ((((unsigned long)(sa)) & 1) << 1) | ((lk) & 1)) +#define SC_MASK (OP_MASK | (((unsigned long)0x3ff) << 16) | (((unsigned long)1) << 1) | 1) + +/* An VX form instruction. */ +#define VX(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x7ff)) + +/* The mask for an VX form instruction. */ +#define VX_MASK VX(0x3f, 0x7ff) + +/* An VA form instruction. */ +#define VXA(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x03f)) + +/* The mask for an VA form instruction. */ +#define VXA_MASK VXA(0x3f, 0x3f) + +/* An VXR form instruction. */ +#define VXR(op, xop, rc) (OP (op) | (((rc) & 1) << 10) | (((unsigned long)(xop)) & 0x3ff)) + +/* The mask for a VXR form instruction. */ +#define VXR_MASK VXR(0x3f, 0x3ff, 1) + +/* An X form instruction. */ +#define X(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1)) + +/* An X form instruction with the RC bit specified. */ +#define XRC(op, xop, rc) (X ((op), (xop)) | ((rc) & 1)) + +/* The mask for an X form instruction. */ +#define X_MASK XRC (0x3f, 0x3ff, 1) + +/* An X_MASK with the RA field fixed. */ +#define XRA_MASK (X_MASK | RA_MASK) + +/* An X_MASK with the RB field fixed. */ +#define XRB_MASK (X_MASK | RB_MASK) + +/* An X_MASK with the RT field fixed. */ +#define XRT_MASK (X_MASK | RT_MASK) + +/* An X_MASK with the RA and RB fields fixed. */ +#define XRARB_MASK (X_MASK | RA_MASK | RB_MASK) + +/* An XRARB_MASK, but with the L bit clear. */ +#define XRLARB_MASK (XRARB_MASK & ~((unsigned long) 1 << 16)) + +/* An X_MASK with the RT and RA fields fixed. */ +#define XRTRA_MASK (X_MASK | RT_MASK | RA_MASK) + +/* An XRTRA_MASK, but with L bit clear. */ +#define XRTLRA_MASK (XRTRA_MASK & ~((unsigned long) 1 << 21)) + +/* An X form comparison instruction. */ +#define XCMPL(op, xop, l) (X ((op), (xop)) | ((((unsigned long)(l)) & 1) << 21)) + +/* The mask for an X form comparison instruction. */ +#define XCMP_MASK (X_MASK | (((unsigned long)1) << 22)) + +/* The mask for an X form comparison instruction with the L field + fixed. */ +#define XCMPL_MASK (XCMP_MASK | (((unsigned long)1) << 21)) + +/* An X form trap instruction with the TO field specified. */ +#define XTO(op, xop, to) (X ((op), (xop)) | ((((unsigned long)(to)) & 0x1f) << 21)) +#define XTO_MASK (X_MASK | TO_MASK) + +/* An X form tlb instruction with the SH field specified. */ +#define XTLB(op, xop, sh) (X ((op), (xop)) | ((((unsigned long)(sh)) & 0x1f) << 11)) +#define XTLB_MASK (X_MASK | SH_MASK) + +/* An X form sync instruction. */ +#define XSYNC(op, xop, l) (X ((op), (xop)) | ((((unsigned long)(l)) & 3) << 21)) + +/* An X form sync instruction with everything filled in except the LS field. */ +#define XSYNC_MASK (0xff9fffff) + +/* An X form AltiVec dss instruction. */ +#define XDSS(op, xop, a) (X ((op), (xop)) | ((((unsigned long)(a)) & 1) << 25)) +#define XDSS_MASK XDSS(0x3f, 0x3ff, 1) + +/* An XFL form instruction. */ +#define XFL(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1) | (((unsigned long)(rc)) & 1)) +#define XFL_MASK (XFL (0x3f, 0x3ff, 1) | (((unsigned long)1) << 25) | (((unsigned long)1) << 16)) + +/* An X form isel instruction. */ +#define XISEL(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1)) +#define XISEL_MASK XISEL(0x3f, 0x1f) + +/* An XL form instruction with the LK field set to 0. */ +#define XL(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1)) + +/* An XL form instruction which uses the LK field. */ +#define XLLK(op, xop, lk) (XL ((op), (xop)) | ((lk) & 1)) + +/* The mask for an XL form instruction. */ +#define XL_MASK XLLK (0x3f, 0x3ff, 1) + +/* An XL form instruction which explicitly sets the BO field. */ +#define XLO(op, bo, xop, lk) \ + (XLLK ((op), (xop), (lk)) | ((((unsigned long)(bo)) & 0x1f) << 21)) +#define XLO_MASK (XL_MASK | BO_MASK) + +/* An XL form instruction which explicitly sets the y bit of the BO + field. */ +#define XLYLK(op, xop, y, lk) (XLLK ((op), (xop), (lk)) | ((((unsigned long)(y)) & 1) << 21)) +#define XLYLK_MASK (XL_MASK | Y_MASK) + +/* An XL form instruction which sets the BO field and the condition + bits of the BI field. */ +#define XLOCB(op, bo, cb, xop, lk) \ + (XLO ((op), (bo), (xop), (lk)) | ((((unsigned long)(cb)) & 3) << 16)) +#define XLOCB_MASK XLOCB (0x3f, 0x1f, 0x3, 0x3ff, 1) + +/* An XL_MASK or XLYLK_MASK or XLOCB_MASK with the BB field fixed. */ +#define XLBB_MASK (XL_MASK | BB_MASK) +#define XLYBB_MASK (XLYLK_MASK | BB_MASK) +#define XLBOCBBB_MASK (XLOCB_MASK | BB_MASK) + +/* An XL_MASK with the BO and BB fields fixed. */ +#define XLBOBB_MASK (XL_MASK | BO_MASK | BB_MASK) + +/* An XL_MASK with the BO, BI and BB fields fixed. */ +#define XLBOBIBB_MASK (XL_MASK | BO_MASK | BI_MASK | BB_MASK) + +/* An XO form instruction. */ +#define XO(op, xop, oe, rc) \ + (OP (op) | ((((unsigned long)(xop)) & 0x1ff) << 1) | ((((unsigned long)(oe)) & 1) << 10) | (((unsigned long)(rc)) & 1)) +#define XO_MASK XO (0x3f, 0x1ff, 1, 1) + +/* An XO_MASK with the RB field fixed. */ +#define XORB_MASK (XO_MASK | RB_MASK) + +/* An XS form instruction. */ +#define XS(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x1ff) << 2) | (((unsigned long)(rc)) & 1)) +#define XS_MASK XS (0x3f, 0x1ff, 1) + +/* A mask for the FXM version of an XFX form instruction. */ +#define XFXFXM_MASK (X_MASK | (1 << 11)) + +/* An XFX form instruction with the FXM field filled in. */ +#define XFXM(op, xop, fxm) \ + (X ((op), (xop)) | ((((unsigned long)(fxm)) & 0xff) << 12)) + +/* An XFX form instruction with the SPR field filled in. */ +#define XSPR(op, xop, spr) \ + (X ((op), (xop)) | ((((unsigned long)(spr)) & 0x1f) << 16) | ((((unsigned long)(spr)) & 0x3e0) << 6)) +#define XSPR_MASK (X_MASK | SPR_MASK) + +/* An XFX form instruction with the SPR field filled in except for the + SPRBAT field. */ +#define XSPRBAT_MASK (XSPR_MASK &~ SPRBAT_MASK) + +/* An XFX form instruction with the SPR field filled in except for the + SPRG field. */ +#define XSPRG_MASK (XSPR_MASK &~ SPRG_MASK) + +/* An X form instruction with everything filled in except the E field. */ +#define XE_MASK (0xffff7fff) + +/* An X form user context instruction. */ +#define XUC(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x1f)) +#define XUC_MASK XUC(0x3f, 0x1f) + +/* The BO encodings used in extended conditional branch mnemonics. */ +#define BODNZF (0x0) +#define BODNZFP (0x1) +#define BODZF (0x2) +#define BODZFP (0x3) +#define BODNZT (0x8) +#define BODNZTP (0x9) +#define BODZT (0xa) +#define BODZTP (0xb) + +#define BOF (0x4) +#define BOFP (0x5) +#define BOFM4 (0x6) +#define BOFP4 (0x7) +#define BOT (0xc) +#define BOTP (0xd) +#define BOTM4 (0xe) +#define BOTP4 (0xf) + +#define BODNZ (0x10) +#define BODNZP (0x11) +#define BODZ (0x12) +#define BODZP (0x13) +#define BODNZM4 (0x18) +#define BODNZP4 (0x19) +#define BODZM4 (0x1a) +#define BODZP4 (0x1b) + +#define BOU (0x14) + +/* The BI condition bit encodings used in extended conditional branch + mnemonics. */ +#define CBLT (0) +#define CBGT (1) +#define CBEQ (2) +#define CBSO (3) + +/* The TO encodings used in extended trap mnemonics. */ +#define TOLGT (0x1) +#define TOLLT (0x2) +#define TOEQ (0x4) +#define TOLGE (0x5) +#define TOLNL (0x5) +#define TOLLE (0x6) +#define TOLNG (0x6) +#define TOGT (0x8) +#define TOGE (0xc) +#define TONL (0xc) +#define TOLT (0x10) +#define TOLE (0x14) +#define TONG (0x14) +#define TONE (0x18) +#define TOU (0x1f) + +/* Smaller names for the flags so each entry in the opcodes table will + fit on a single line. */ +#undef PPC +#define PPC PPC_OPCODE_PPC +#define PPCCOM PPC_OPCODE_PPC | PPC_OPCODE_COMMON +#define NOPOWER4 PPC_OPCODE_NOPOWER4 | PPCCOM +#define POWER4 PPC_OPCODE_POWER4 +#define PPC32 PPC_OPCODE_32 | PPC_OPCODE_PPC +#define PPC64 PPC_OPCODE_64 | PPC_OPCODE_PPC +#define PPC403 PPC_OPCODE_403 +#define PPC405 PPC403 +#define PPC440 PPC_OPCODE_440 +#define PPC750 PPC +#define PPC860 PPC +#define PPCVEC PPC_OPCODE_ALTIVEC | PPC_OPCODE_PPC +#define POWER PPC_OPCODE_POWER +#define POWER2 PPC_OPCODE_POWER | PPC_OPCODE_POWER2 +#define PPCPWR2 PPC_OPCODE_PPC | PPC_OPCODE_POWER | PPC_OPCODE_POWER2 +#define POWER32 PPC_OPCODE_POWER | PPC_OPCODE_32 +#define COM PPC_OPCODE_POWER | PPC_OPCODE_PPC | PPC_OPCODE_COMMON +#define COM32 PPC_OPCODE_POWER | PPC_OPCODE_PPC | PPC_OPCODE_COMMON | PPC_OPCODE_32 +#define M601 PPC_OPCODE_POWER | PPC_OPCODE_601 +#define PWRCOM PPC_OPCODE_POWER | PPC_OPCODE_601 | PPC_OPCODE_COMMON +#define MFDEC1 PPC_OPCODE_POWER +#define MFDEC2 PPC_OPCODE_PPC | PPC_OPCODE_601 | PPC_OPCODE_BOOKE +#define BOOKE PPC_OPCODE_BOOKE +#define BOOKE64 PPC_OPCODE_BOOKE64 +#define CLASSIC PPC_OPCODE_CLASSIC +#define PPCSPE PPC_OPCODE_SPE +#define PPCISEL PPC_OPCODE_ISEL +#define PPCEFS PPC_OPCODE_EFS +#define PPCBRLK PPC_OPCODE_BRLOCK +#define PPCPMR PPC_OPCODE_PMR +#define PPCCHLK PPC_OPCODE_CACHELCK +#define PPCCHLK64 PPC_OPCODE_CACHELCK | PPC_OPCODE_BOOKE64 +#define PPCRFMCI PPC_OPCODE_RFMCI + +/* The opcode table. + + The format of the opcode table is: + + NAME OPCODE MASK FLAGS { OPERANDS } + + NAME is the name of the instruction. + OPCODE is the instruction opcode. + MASK is the opcode mask; this is used to tell the disassembler + which bits in the actual opcode must match OPCODE. + FLAGS are flags indicated what processors support the instruction. + OPERANDS is the list of operands. + + The disassembler reads the table in order and prints the first + instruction which matches, so this table is sorted to put more + specific instructions before more general instructions. It is also + sorted by major opcode. */ + +const struct powerpc_opcode powerpc_opcodes[] = { +{ "attn", X(0,256), X_MASK, POWER4, { 0 } }, +{ "tdlgti", OPTO(2,TOLGT), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdllti", OPTO(2,TOLLT), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdeqi", OPTO(2,TOEQ), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdlgei", OPTO(2,TOLGE), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdlnli", OPTO(2,TOLNL), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdllei", OPTO(2,TOLLE), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdlngi", OPTO(2,TOLNG), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdgti", OPTO(2,TOGT), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdgei", OPTO(2,TOGE), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdnli", OPTO(2,TONL), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdlti", OPTO(2,TOLT), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdlei", OPTO(2,TOLE), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdngi", OPTO(2,TONG), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdnei", OPTO(2,TONE), OPTO_MASK, PPC64, { RA, SI } }, +{ "tdi", OP(2), OP_MASK, PPC64, { TO, RA, SI } }, + +{ "twlgti", OPTO(3,TOLGT), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "tlgti", OPTO(3,TOLGT), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "twllti", OPTO(3,TOLLT), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "tllti", OPTO(3,TOLLT), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "tweqi", OPTO(3,TOEQ), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "teqi", OPTO(3,TOEQ), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "twlgei", OPTO(3,TOLGE), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "tlgei", OPTO(3,TOLGE), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "twlnli", OPTO(3,TOLNL), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "tlnli", OPTO(3,TOLNL), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "twllei", OPTO(3,TOLLE), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "tllei", OPTO(3,TOLLE), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "twlngi", OPTO(3,TOLNG), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "tlngi", OPTO(3,TOLNG), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "twgti", OPTO(3,TOGT), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "tgti", OPTO(3,TOGT), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "twgei", OPTO(3,TOGE), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "tgei", OPTO(3,TOGE), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "twnli", OPTO(3,TONL), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "tnli", OPTO(3,TONL), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "twlti", OPTO(3,TOLT), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "tlti", OPTO(3,TOLT), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "twlei", OPTO(3,TOLE), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "tlei", OPTO(3,TOLE), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "twngi", OPTO(3,TONG), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "tngi", OPTO(3,TONG), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "twnei", OPTO(3,TONE), OPTO_MASK, PPCCOM, { RA, SI } }, +{ "tnei", OPTO(3,TONE), OPTO_MASK, PWRCOM, { RA, SI } }, +{ "twi", OP(3), OP_MASK, PPCCOM, { TO, RA, SI } }, +{ "ti", OP(3), OP_MASK, PWRCOM, { TO, RA, SI } }, + +{ "macchw", XO(4,172,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchw.", XO(4,172,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchwo", XO(4,172,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchwo.", XO(4,172,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchws", XO(4,236,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchws.", XO(4,236,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchwso", XO(4,236,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchwso.", XO(4,236,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchwsu", XO(4,204,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchwsu.", XO(4,204,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchwsuo", XO(4,204,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchwsuo.", XO(4,204,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchwu", XO(4,140,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchwu.", XO(4,140,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchwuo", XO(4,140,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "macchwuo.", XO(4,140,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhw", XO(4,44,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhw.", XO(4,44,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhwo", XO(4,44,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhwo.", XO(4,44,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhws", XO(4,108,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhws.", XO(4,108,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhwso", XO(4,108,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhwso.", XO(4,108,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhwsu", XO(4,76,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhwsu.", XO(4,76,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhwsuo", XO(4,76,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhwsuo.", XO(4,76,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhwu", XO(4,12,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhwu.", XO(4,12,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhwuo", XO(4,12,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "machhwuo.", XO(4,12,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhw", XO(4,428,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhw.", XO(4,428,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhwo", XO(4,428,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhwo.", XO(4,428,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhws", XO(4,492,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhws.", XO(4,492,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhwso", XO(4,492,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhwso.", XO(4,492,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhwsu", XO(4,460,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhwsu.", XO(4,460,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhwsuo", XO(4,460,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhwsuo.", XO(4,460,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhwu", XO(4,396,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhwu.", XO(4,396,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhwuo", XO(4,396,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "maclhwuo.", XO(4,396,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "mulchw", XRC(4,168,0), X_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "mulchw.", XRC(4,168,1), X_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "mulchwu", XRC(4,136,0), X_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "mulchwu.", XRC(4,136,1), X_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "mulhhw", XRC(4,40,0), X_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "mulhhw.", XRC(4,40,1), X_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "mulhhwu", XRC(4,8,0), X_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "mulhhwu.", XRC(4,8,1), X_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "mullhw", XRC(4,424,0), X_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "mullhw.", XRC(4,424,1), X_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "mullhwu", XRC(4,392,0), X_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "mullhwu.", XRC(4,392,1), X_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmacchw", XO(4,174,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmacchw.", XO(4,174,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmacchwo", XO(4,174,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmacchwo.", XO(4,174,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmacchws", XO(4,238,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmacchws.", XO(4,238,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmacchwso", XO(4,238,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmacchwso.", XO(4,238,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmachhw", XO(4,46,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmachhw.", XO(4,46,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmachhwo", XO(4,46,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmachhwo.", XO(4,46,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmachhws", XO(4,110,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmachhws.", XO(4,110,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmachhwso", XO(4,110,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmachhwso.", XO(4,110,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmaclhw", XO(4,430,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmaclhw.", XO(4,430,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmaclhwo", XO(4,430,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmaclhwo.", XO(4,430,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmaclhws", XO(4,494,0,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmaclhws.", XO(4,494,0,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmaclhwso", XO(4,494,1,0), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "nmaclhwso.", XO(4,494,1,1), XO_MASK, PPC405|PPC440, { RT, RA, RB } }, +{ "mfvscr", VX(4, 1540), VX_MASK, PPCVEC, { VD } }, +{ "mtvscr", VX(4, 1604), VX_MASK, PPCVEC, { VB } }, +{ "vaddcuw", VX(4, 384), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vaddfp", VX(4, 10), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vaddsbs", VX(4, 768), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vaddshs", VX(4, 832), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vaddsws", VX(4, 896), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vaddubm", VX(4, 0), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vaddubs", VX(4, 512), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vadduhm", VX(4, 64), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vadduhs", VX(4, 576), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vadduwm", VX(4, 128), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vadduws", VX(4, 640), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vand", VX(4, 1028), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vandc", VX(4, 1092), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vavgsb", VX(4, 1282), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vavgsh", VX(4, 1346), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vavgsw", VX(4, 1410), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vavgub", VX(4, 1026), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vavguh", VX(4, 1090), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vavguw", VX(4, 1154), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcfsx", VX(4, 842), VX_MASK, PPCVEC, { VD, VB, UIMM } }, +{ "vcfux", VX(4, 778), VX_MASK, PPCVEC, { VD, VB, UIMM } }, +{ "vcmpbfp", VXR(4, 966, 0), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpbfp.", VXR(4, 966, 1), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpeqfp", VXR(4, 198, 0), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpeqfp.", VXR(4, 198, 1), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpequb", VXR(4, 6, 0), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpequb.", VXR(4, 6, 1), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpequh", VXR(4, 70, 0), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpequh.", VXR(4, 70, 1), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpequw", VXR(4, 134, 0), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpequw.", VXR(4, 134, 1), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgefp", VXR(4, 454, 0), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgefp.", VXR(4, 454, 1), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtfp", VXR(4, 710, 0), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtfp.", VXR(4, 710, 1), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtsb", VXR(4, 774, 0), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtsb.", VXR(4, 774, 1), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtsh", VXR(4, 838, 0), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtsh.", VXR(4, 838, 1), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtsw", VXR(4, 902, 0), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtsw.", VXR(4, 902, 1), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtub", VXR(4, 518, 0), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtub.", VXR(4, 518, 1), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtuh", VXR(4, 582, 0), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtuh.", VXR(4, 582, 1), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtuw", VXR(4, 646, 0), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vcmpgtuw.", VXR(4, 646, 1), VXR_MASK, PPCVEC, { VD, VA, VB } }, +{ "vctsxs", VX(4, 970), VX_MASK, PPCVEC, { VD, VB, UIMM } }, +{ "vctuxs", VX(4, 906), VX_MASK, PPCVEC, { VD, VB, UIMM } }, +{ "vexptefp", VX(4, 394), VX_MASK, PPCVEC, { VD, VB } }, +{ "vlogefp", VX(4, 458), VX_MASK, PPCVEC, { VD, VB } }, +{ "vmaddfp", VXA(4, 46), VXA_MASK, PPCVEC, { VD, VA, VC, VB } }, +{ "vmaxfp", VX(4, 1034), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmaxsb", VX(4, 258), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmaxsh", VX(4, 322), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmaxsw", VX(4, 386), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmaxub", VX(4, 2), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmaxuh", VX(4, 66), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmaxuw", VX(4, 130), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmhaddshs", VXA(4, 32), VXA_MASK, PPCVEC, { VD, VA, VB, VC } }, +{ "vmhraddshs", VXA(4, 33), VXA_MASK, PPCVEC, { VD, VA, VB, VC } }, +{ "vminfp", VX(4, 1098), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vminsb", VX(4, 770), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vminsh", VX(4, 834), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vminsw", VX(4, 898), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vminub", VX(4, 514), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vminuh", VX(4, 578), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vminuw", VX(4, 642), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmladduhm", VXA(4, 34), VXA_MASK, PPCVEC, { VD, VA, VB, VC } }, +{ "vmrghb", VX(4, 12), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmrghh", VX(4, 76), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmrghw", VX(4, 140), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmrglb", VX(4, 268), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmrglh", VX(4, 332), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmrglw", VX(4, 396), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmsummbm", VXA(4, 37), VXA_MASK, PPCVEC, { VD, VA, VB, VC } }, +{ "vmsumshm", VXA(4, 40), VXA_MASK, PPCVEC, { VD, VA, VB, VC } }, +{ "vmsumshs", VXA(4, 41), VXA_MASK, PPCVEC, { VD, VA, VB, VC } }, +{ "vmsumubm", VXA(4, 36), VXA_MASK, PPCVEC, { VD, VA, VB, VC } }, +{ "vmsumuhm", VXA(4, 38), VXA_MASK, PPCVEC, { VD, VA, VB, VC } }, +{ "vmsumuhs", VXA(4, 39), VXA_MASK, PPCVEC, { VD, VA, VB, VC } }, +{ "vmulesb", VX(4, 776), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmulesh", VX(4, 840), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmuleub", VX(4, 520), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmuleuh", VX(4, 584), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmulosb", VX(4, 264), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmulosh", VX(4, 328), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmuloub", VX(4, 8), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vmulouh", VX(4, 72), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vnmsubfp", VXA(4, 47), VXA_MASK, PPCVEC, { VD, VA, VC, VB } }, +{ "vnor", VX(4, 1284), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vor", VX(4, 1156), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vperm", VXA(4, 43), VXA_MASK, PPCVEC, { VD, VA, VB, VC } }, +{ "vpkpx", VX(4, 782), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vpkshss", VX(4, 398), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vpkshus", VX(4, 270), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vpkswss", VX(4, 462), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vpkswus", VX(4, 334), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vpkuhum", VX(4, 14), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vpkuhus", VX(4, 142), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vpkuwum", VX(4, 78), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vpkuwus", VX(4, 206), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vrefp", VX(4, 266), VX_MASK, PPCVEC, { VD, VB } }, +{ "vrfim", VX(4, 714), VX_MASK, PPCVEC, { VD, VB } }, +{ "vrfin", VX(4, 522), VX_MASK, PPCVEC, { VD, VB } }, +{ "vrfip", VX(4, 650), VX_MASK, PPCVEC, { VD, VB } }, +{ "vrfiz", VX(4, 586), VX_MASK, PPCVEC, { VD, VB } }, +{ "vrlb", VX(4, 4), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vrlh", VX(4, 68), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vrlw", VX(4, 132), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vrsqrtefp", VX(4, 330), VX_MASK, PPCVEC, { VD, VB } }, +{ "vsel", VXA(4, 42), VXA_MASK, PPCVEC, { VD, VA, VB, VC } }, +{ "vsl", VX(4, 452), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vslb", VX(4, 260), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsldoi", VXA(4, 44), VXA_MASK, PPCVEC, { VD, VA, VB, SHB } }, +{ "vslh", VX(4, 324), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vslo", VX(4, 1036), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vslw", VX(4, 388), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vspltb", VX(4, 524), VX_MASK, PPCVEC, { VD, VB, UIMM } }, +{ "vsplth", VX(4, 588), VX_MASK, PPCVEC, { VD, VB, UIMM } }, +{ "vspltisb", VX(4, 780), VX_MASK, PPCVEC, { VD, SIMM } }, +{ "vspltish", VX(4, 844), VX_MASK, PPCVEC, { VD, SIMM } }, +{ "vspltisw", VX(4, 908), VX_MASK, PPCVEC, { VD, SIMM } }, +{ "vspltw", VX(4, 652), VX_MASK, PPCVEC, { VD, VB, UIMM } }, +{ "vsr", VX(4, 708), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsrab", VX(4, 772), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsrah", VX(4, 836), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsraw", VX(4, 900), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsrb", VX(4, 516), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsrh", VX(4, 580), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsro", VX(4, 1100), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsrw", VX(4, 644), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsubcuw", VX(4, 1408), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsubfp", VX(4, 74), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsubsbs", VX(4, 1792), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsubshs", VX(4, 1856), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsubsws", VX(4, 1920), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsububm", VX(4, 1024), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsububs", VX(4, 1536), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsubuhm", VX(4, 1088), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsubuhs", VX(4, 1600), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsubuwm", VX(4, 1152), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsubuws", VX(4, 1664), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsumsws", VX(4, 1928), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsum2sws", VX(4, 1672), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsum4sbs", VX(4, 1800), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsum4shs", VX(4, 1608), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vsum4ubs", VX(4, 1544), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vupkhpx", VX(4, 846), VX_MASK, PPCVEC, { VD, VB } }, +{ "vupkhsb", VX(4, 526), VX_MASK, PPCVEC, { VD, VB } }, +{ "vupkhsh", VX(4, 590), VX_MASK, PPCVEC, { VD, VB } }, +{ "vupklpx", VX(4, 974), VX_MASK, PPCVEC, { VD, VB } }, +{ "vupklsb", VX(4, 654), VX_MASK, PPCVEC, { VD, VB } }, +{ "vupklsh", VX(4, 718), VX_MASK, PPCVEC, { VD, VB } }, +{ "vxor", VX(4, 1220), VX_MASK, PPCVEC, { VD, VA, VB } }, + +{ "evaddw", VX(4, 512), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evaddiw", VX(4, 514), VX_MASK, PPCSPE, { RS, RB, UIMM } }, +{ "evsubfw", VX(4, 516), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evsubw", VX(4, 516), VX_MASK, PPCSPE, { RS, RB, RA } }, +{ "evsubifw", VX(4, 518), VX_MASK, PPCSPE, { RS, UIMM, RB } }, +{ "evsubiw", VX(4, 518), VX_MASK, PPCSPE, { RS, RB, UIMM } }, +{ "evabs", VX(4, 520), VX_MASK, PPCSPE, { RS, RA } }, +{ "evneg", VX(4, 521), VX_MASK, PPCSPE, { RS, RA } }, +{ "evextsb", VX(4, 522), VX_MASK, PPCSPE, { RS, RA } }, +{ "evextsh", VX(4, 523), VX_MASK, PPCSPE, { RS, RA } }, +{ "evrndw", VX(4, 524), VX_MASK, PPCSPE, { RS, RA } }, +{ "evcntlzw", VX(4, 525), VX_MASK, PPCSPE, { RS, RA } }, +{ "evcntlsw", VX(4, 526), VX_MASK, PPCSPE, { RS, RA } }, + +{ "brinc", VX(4, 527), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evand", VX(4, 529), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evandc", VX(4, 530), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmr", VX(4, 535), VX_MASK, PPCSPE, { RS, RA, BBA } }, +{ "evor", VX(4, 535), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evorc", VX(4, 539), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evxor", VX(4, 534), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "eveqv", VX(4, 537), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evnand", VX(4, 542), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evnot", VX(4, 536), VX_MASK, PPCSPE, { RS, RA, BBA } }, +{ "evnor", VX(4, 536), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evrlw", VX(4, 552), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evrlwi", VX(4, 554), VX_MASK, PPCSPE, { RS, RA, EVUIMM } }, +{ "evslw", VX(4, 548), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evslwi", VX(4, 550), VX_MASK, PPCSPE, { RS, RA, EVUIMM } }, +{ "evsrws", VX(4, 545), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evsrwu", VX(4, 544), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evsrwis", VX(4, 547), VX_MASK, PPCSPE, { RS, RA, EVUIMM } }, +{ "evsrwiu", VX(4, 546), VX_MASK, PPCSPE, { RS, RA, EVUIMM } }, +{ "evsplati", VX(4, 553), VX_MASK, PPCSPE, { RS, SIMM } }, +{ "evsplatfi", VX(4, 555), VX_MASK, PPCSPE, { RS, SIMM } }, +{ "evmergehi", VX(4, 556), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmergelo", VX(4, 557), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmergehilo",VX(4,558), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmergelohi",VX(4,559), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evcmpgts", VX(4, 561), VX_MASK, PPCSPE, { CRFD, RA, RB } }, +{ "evcmpgtu", VX(4, 560), VX_MASK, PPCSPE, { CRFD, RA, RB } }, +{ "evcmplts", VX(4, 563), VX_MASK, PPCSPE, { CRFD, RA, RB } }, +{ "evcmpltu", VX(4, 562), VX_MASK, PPCSPE, { CRFD, RA, RB } }, +{ "evcmpeq", VX(4, 564), VX_MASK, PPCSPE, { CRFD, RA, RB } }, +{ "evsel", EVSEL(4,79),EVSEL_MASK, PPCSPE, { RS, RA, RB, CRFS } }, + +{ "evldd", VX(4, 769), VX_MASK, PPCSPE, { RS, EVUIMM_8, RA } }, +{ "evlddx", VX(4, 768), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evldw", VX(4, 771), VX_MASK, PPCSPE, { RS, EVUIMM_8, RA } }, +{ "evldwx", VX(4, 770), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evldh", VX(4, 773), VX_MASK, PPCSPE, { RS, EVUIMM_8, RA } }, +{ "evldhx", VX(4, 772), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evlwhe", VX(4, 785), VX_MASK, PPCSPE, { RS, EVUIMM_4, RA } }, +{ "evlwhex", VX(4, 784), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evlwhou", VX(4, 789), VX_MASK, PPCSPE, { RS, EVUIMM_4, RA } }, +{ "evlwhoux", VX(4, 788), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evlwhos", VX(4, 791), VX_MASK, PPCSPE, { RS, EVUIMM_4, RA } }, +{ "evlwhosx", VX(4, 790), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evlwwsplat",VX(4, 793), VX_MASK, PPCSPE, { RS, EVUIMM_4, RA } }, +{ "evlwwsplatx",VX(4, 792), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evlwhsplat",VX(4, 797), VX_MASK, PPCSPE, { RS, EVUIMM_4, RA } }, +{ "evlwhsplatx",VX(4, 796), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evlhhesplat",VX(4, 777), VX_MASK, PPCSPE, { RS, EVUIMM_2, RA } }, +{ "evlhhesplatx",VX(4, 776), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evlhhousplat",VX(4, 781), VX_MASK, PPCSPE, { RS, EVUIMM_2, RA } }, +{ "evlhhousplatx",VX(4, 780), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evlhhossplat",VX(4, 783), VX_MASK, PPCSPE, { RS, EVUIMM_2, RA } }, +{ "evlhhossplatx",VX(4, 782), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evstdd", VX(4, 801), VX_MASK, PPCSPE, { RS, EVUIMM_8, RA } }, +{ "evstddx", VX(4, 800), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evstdw", VX(4, 803), VX_MASK, PPCSPE, { RS, EVUIMM_8, RA } }, +{ "evstdwx", VX(4, 802), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evstdh", VX(4, 805), VX_MASK, PPCSPE, { RS, EVUIMM_8, RA } }, +{ "evstdhx", VX(4, 804), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evstwwe", VX(4, 825), VX_MASK, PPCSPE, { RS, EVUIMM_4, RA } }, +{ "evstwwex", VX(4, 824), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evstwwo", VX(4, 829), VX_MASK, PPCSPE, { RS, EVUIMM_4, RA } }, +{ "evstwwox", VX(4, 828), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evstwhe", VX(4, 817), VX_MASK, PPCSPE, { RS, EVUIMM_4, RA } }, +{ "evstwhex", VX(4, 816), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evstwho", VX(4, 821), VX_MASK, PPCSPE, { RS, EVUIMM_4, RA } }, +{ "evstwhox", VX(4, 820), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evfsabs", VX(4, 644), VX_MASK, PPCSPE, { RS, RA } }, +{ "evfsnabs", VX(4, 645), VX_MASK, PPCSPE, { RS, RA } }, +{ "evfsneg", VX(4, 646), VX_MASK, PPCSPE, { RS, RA } }, +{ "evfsadd", VX(4, 640), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evfssub", VX(4, 641), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evfsmul", VX(4, 648), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evfsdiv", VX(4, 649), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evfscmpgt", VX(4, 652), VX_MASK, PPCSPE, { CRFD, RA, RB } }, +{ "evfscmplt", VX(4, 653), VX_MASK, PPCSPE, { CRFD, RA, RB } }, +{ "evfscmpeq", VX(4, 654), VX_MASK, PPCSPE, { CRFD, RA, RB } }, +{ "evfststgt", VX(4, 668), VX_MASK, PPCSPE, { CRFD, RA, RB } }, +{ "evfststlt", VX(4, 669), VX_MASK, PPCSPE, { CRFD, RA, RB } }, +{ "evfststeq", VX(4, 670), VX_MASK, PPCSPE, { CRFD, RA, RB } }, +{ "evfscfui", VX(4, 656), VX_MASK, PPCSPE, { RS, RB } }, +{ "evfsctuiz", VX(4, 664), VX_MASK, PPCSPE, { RS, RB } }, +{ "evfscfsi", VX(4, 657), VX_MASK, PPCSPE, { RS, RB } }, +{ "evfscfuf", VX(4, 658), VX_MASK, PPCSPE, { RS, RB } }, +{ "evfscfsf", VX(4, 659), VX_MASK, PPCSPE, { RS, RB } }, +{ "evfsctui", VX(4, 660), VX_MASK, PPCSPE, { RS, RB } }, +{ "evfsctsi", VX(4, 661), VX_MASK, PPCSPE, { RS, RB } }, +{ "evfsctsiz", VX(4, 666), VX_MASK, PPCSPE, { RS, RB } }, +{ "evfsctuf", VX(4, 662), VX_MASK, PPCSPE, { RS, RB } }, +{ "evfsctsf", VX(4, 663), VX_MASK, PPCSPE, { RS, RB } }, + +{ "efsabs", VX(4, 708), VX_MASK, PPCEFS, { RS, RA } }, +{ "efsnabs", VX(4, 709), VX_MASK, PPCEFS, { RS, RA } }, +{ "efsneg", VX(4, 710), VX_MASK, PPCEFS, { RS, RA } }, +{ "efsadd", VX(4, 704), VX_MASK, PPCEFS, { RS, RA, RB } }, +{ "efssub", VX(4, 705), VX_MASK, PPCEFS, { RS, RA, RB } }, +{ "efsmul", VX(4, 712), VX_MASK, PPCEFS, { RS, RA, RB } }, +{ "efsdiv", VX(4, 713), VX_MASK, PPCEFS, { RS, RA, RB } }, +{ "efscmpgt", VX(4, 716), VX_MASK, PPCEFS, { CRFD, RA, RB } }, +{ "efscmplt", VX(4, 717), VX_MASK, PPCEFS, { CRFD, RA, RB } }, +{ "efscmpeq", VX(4, 718), VX_MASK, PPCEFS, { CRFD, RA, RB } }, +{ "efststgt", VX(4, 732), VX_MASK, PPCEFS, { CRFD, RA, RB } }, +{ "efststlt", VX(4, 733), VX_MASK, PPCEFS, { CRFD, RA, RB } }, +{ "efststeq", VX(4, 734), VX_MASK, PPCEFS, { CRFD, RA, RB } }, +{ "efscfui", VX(4, 720), VX_MASK, PPCEFS, { RS, RB } }, +{ "efsctuiz", VX(4, 728), VX_MASK, PPCEFS, { RS, RB } }, +{ "efscfsi", VX(4, 721), VX_MASK, PPCEFS, { RS, RB } }, +{ "efscfuf", VX(4, 722), VX_MASK, PPCEFS, { RS, RB } }, +{ "efscfsf", VX(4, 723), VX_MASK, PPCEFS, { RS, RB } }, +{ "efsctui", VX(4, 724), VX_MASK, PPCEFS, { RS, RB } }, +{ "efsctsi", VX(4, 725), VX_MASK, PPCEFS, { RS, RB } }, +{ "efsctsiz", VX(4, 730), VX_MASK, PPCEFS, { RS, RB } }, +{ "efsctuf", VX(4, 726), VX_MASK, PPCEFS, { RS, RB } }, +{ "efsctsf", VX(4, 727), VX_MASK, PPCEFS, { RS, RB } }, + +{ "evmhossf", VX(4, 1031), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhossfa", VX(4, 1063), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhosmf", VX(4, 1039), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhosmfa", VX(4, 1071), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhosmi", VX(4, 1037), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhosmia", VX(4, 1069), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhoumi", VX(4, 1036), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhoumia", VX(4, 1068), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhessf", VX(4, 1027), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhessfa", VX(4, 1059), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhesmf", VX(4, 1035), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhesmfa", VX(4, 1067), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhesmi", VX(4, 1033), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhesmia", VX(4, 1065), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmheumi", VX(4, 1032), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmheumia", VX(4, 1064), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evmhossfaaw",VX(4, 1287), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhossiaaw",VX(4, 1285), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhosmfaaw",VX(4, 1295), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhosmiaaw",VX(4, 1293), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhousiaaw",VX(4, 1284), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhoumiaaw",VX(4, 1292), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhessfaaw",VX(4, 1283), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhessiaaw",VX(4, 1281), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhesmfaaw",VX(4, 1291), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhesmiaaw",VX(4, 1289), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmheusiaaw",VX(4, 1280), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmheumiaaw",VX(4, 1288), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evmhossfanw",VX(4, 1415), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhossianw",VX(4, 1413), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhosmfanw",VX(4, 1423), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhosmianw",VX(4, 1421), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhousianw",VX(4, 1412), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhoumianw",VX(4, 1420), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhessfanw",VX(4, 1411), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhessianw",VX(4, 1409), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhesmfanw",VX(4, 1419), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhesmianw",VX(4, 1417), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmheusianw",VX(4, 1408), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmheumianw",VX(4, 1416), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evmhogsmfaa",VX(4, 1327), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhogsmiaa",VX(4, 1325), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhogumiaa",VX(4, 1324), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhegsmfaa",VX(4, 1323), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhegsmiaa",VX(4, 1321), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhegumiaa",VX(4, 1320), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evmhogsmfan",VX(4, 1455), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhogsmian",VX(4, 1453), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhogumian",VX(4, 1452), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhegsmfan",VX(4, 1451), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhegsmian",VX(4, 1449), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmhegumian",VX(4, 1448), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evmwhssf", VX(4, 1095), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwhssfa", VX(4, 1127), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwhsmf", VX(4, 1103), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwhsmfa", VX(4, 1135), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwhsmi", VX(4, 1101), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwhsmia", VX(4, 1133), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwhumi", VX(4, 1100), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwhumia", VX(4, 1132), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evmwlumi", VX(4, 1096), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwlumia", VX(4, 1128), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evmwlssiaaw",VX(4, 1345), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwlsmiaaw",VX(4, 1353), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwlusiaaw",VX(4, 1344), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwlumiaaw",VX(4, 1352), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evmwlssianw",VX(4, 1473), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwlsmianw",VX(4, 1481), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwlusianw",VX(4, 1472), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwlumianw",VX(4, 1480), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evmwssf", VX(4, 1107), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwssfa", VX(4, 1139), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwsmf", VX(4, 1115), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwsmfa", VX(4, 1147), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwsmi", VX(4, 1113), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwsmia", VX(4, 1145), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwumi", VX(4, 1112), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwumia", VX(4, 1144), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evmwssfaa", VX(4, 1363), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwsmfaa", VX(4, 1371), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwsmiaa", VX(4, 1369), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwumiaa", VX(4, 1368), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evmwssfan", VX(4, 1491), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwsmfan", VX(4, 1499), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwsmian", VX(4, 1497), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evmwumian", VX(4, 1496), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "evaddssiaaw",VX(4, 1217), VX_MASK, PPCSPE, { RS, RA } }, +{ "evaddsmiaaw",VX(4, 1225), VX_MASK, PPCSPE, { RS, RA } }, +{ "evaddusiaaw",VX(4, 1216), VX_MASK, PPCSPE, { RS, RA } }, +{ "evaddumiaaw",VX(4, 1224), VX_MASK, PPCSPE, { RS, RA } }, + +{ "evsubfssiaaw",VX(4, 1219), VX_MASK, PPCSPE, { RS, RA } }, +{ "evsubfsmiaaw",VX(4, 1227), VX_MASK, PPCSPE, { RS, RA } }, +{ "evsubfusiaaw",VX(4, 1218), VX_MASK, PPCSPE, { RS, RA } }, +{ "evsubfumiaaw",VX(4, 1226), VX_MASK, PPCSPE, { RS, RA } }, + +{ "evmra", VX(4, 1220), VX_MASK, PPCSPE, { RS, RA } }, + +{ "evdivws", VX(4, 1222), VX_MASK, PPCSPE, { RS, RA, RB } }, +{ "evdivwu", VX(4, 1223), VX_MASK, PPCSPE, { RS, RA, RB } }, + +{ "mulli", OP(7), OP_MASK, PPCCOM, { RT, RA, SI } }, +{ "muli", OP(7), OP_MASK, PWRCOM, { RT, RA, SI } }, + +{ "subfic", OP(8), OP_MASK, PPCCOM, { RT, RA, SI } }, +{ "sfi", OP(8), OP_MASK, PWRCOM, { RT, RA, SI } }, + +{ "dozi", OP(9), OP_MASK, M601, { RT, RA, SI } }, + +{ "bce", B(9,0,0), B_MASK, BOOKE64, { BO, BI, BD } }, +{ "bcel", B(9,0,1), B_MASK, BOOKE64, { BO, BI, BD } }, +{ "bcea", B(9,1,0), B_MASK, BOOKE64, { BO, BI, BDA } }, +{ "bcela", B(9,1,1), B_MASK, BOOKE64, { BO, BI, BDA } }, + +{ "cmplwi", OPL(10,0), OPL_MASK, PPCCOM, { OBF, RA, UI } }, +{ "cmpldi", OPL(10,1), OPL_MASK, PPC64, { OBF, RA, UI } }, +{ "cmpli", OP(10), OP_MASK, PPC, { BF, L, RA, UI } }, +{ "cmpli", OP(10), OP_MASK, PWRCOM, { BF, RA, UI } }, + +{ "cmpwi", OPL(11,0), OPL_MASK, PPCCOM, { OBF, RA, SI } }, +{ "cmpdi", OPL(11,1), OPL_MASK, PPC64, { OBF, RA, SI } }, +{ "cmpi", OP(11), OP_MASK, PPC, { BF, L, RA, SI } }, +{ "cmpi", OP(11), OP_MASK, PWRCOM, { BF, RA, SI } }, + +{ "addic", OP(12), OP_MASK, PPCCOM, { RT, RA, SI } }, +{ "ai", OP(12), OP_MASK, PWRCOM, { RT, RA, SI } }, +{ "subic", OP(12), OP_MASK, PPCCOM, { RT, RA, NSI } }, + +{ "addic.", OP(13), OP_MASK, PPCCOM, { RT, RA, SI } }, +{ "ai.", OP(13), OP_MASK, PWRCOM, { RT, RA, SI } }, +{ "subic.", OP(13), OP_MASK, PPCCOM, { RT, RA, NSI } }, + +{ "li", OP(14), DRA_MASK, PPCCOM, { RT, SI } }, +{ "lil", OP(14), DRA_MASK, PWRCOM, { RT, SI } }, +{ "addi", OP(14), OP_MASK, PPCCOM, { RT, RA, SI } }, +{ "cal", OP(14), OP_MASK, PWRCOM, { RT, D, RA } }, +{ "subi", OP(14), OP_MASK, PPCCOM, { RT, RA, NSI } }, +{ "la", OP(14), OP_MASK, PPCCOM, { RT, D, RA } }, + +{ "lis", OP(15), DRA_MASK, PPCCOM, { RT, SISIGNOPT } }, +{ "liu", OP(15), DRA_MASK, PWRCOM, { RT, SISIGNOPT } }, +{ "addis", OP(15), OP_MASK, PPCCOM, { RT,RA,SISIGNOPT } }, +{ "cau", OP(15), OP_MASK, PWRCOM, { RT,RA,SISIGNOPT } }, +{ "subis", OP(15), OP_MASK, PPCCOM, { RT, RA, NSI } }, + +{ "bdnz-", BBO(16,BODNZ,0,0), BBOATBI_MASK, PPCCOM, { BDM } }, +{ "bdnz+", BBO(16,BODNZ,0,0), BBOATBI_MASK, PPCCOM, { BDP } }, +{ "bdnz", BBO(16,BODNZ,0,0), BBOATBI_MASK, PPCCOM, { BD } }, +{ "bdn", BBO(16,BODNZ,0,0), BBOATBI_MASK, PWRCOM, { BD } }, +{ "bdnzl-", BBO(16,BODNZ,0,1), BBOATBI_MASK, PPCCOM, { BDM } }, +{ "bdnzl+", BBO(16,BODNZ,0,1), BBOATBI_MASK, PPCCOM, { BDP } }, +{ "bdnzl", BBO(16,BODNZ,0,1), BBOATBI_MASK, PPCCOM, { BD } }, +{ "bdnl", BBO(16,BODNZ,0,1), BBOATBI_MASK, PWRCOM, { BD } }, +{ "bdnza-", BBO(16,BODNZ,1,0), BBOATBI_MASK, PPCCOM, { BDMA } }, +{ "bdnza+", BBO(16,BODNZ,1,0), BBOATBI_MASK, PPCCOM, { BDPA } }, +{ "bdnza", BBO(16,BODNZ,1,0), BBOATBI_MASK, PPCCOM, { BDA } }, +{ "bdna", BBO(16,BODNZ,1,0), BBOATBI_MASK, PWRCOM, { BDA } }, +{ "bdnzla-", BBO(16,BODNZ,1,1), BBOATBI_MASK, PPCCOM, { BDMA } }, +{ "bdnzla+", BBO(16,BODNZ,1,1), BBOATBI_MASK, PPCCOM, { BDPA } }, +{ "bdnzla", BBO(16,BODNZ,1,1), BBOATBI_MASK, PPCCOM, { BDA } }, +{ "bdnla", BBO(16,BODNZ,1,1), BBOATBI_MASK, PWRCOM, { BDA } }, +{ "bdz-", BBO(16,BODZ,0,0), BBOATBI_MASK, PPCCOM, { BDM } }, +{ "bdz+", BBO(16,BODZ,0,0), BBOATBI_MASK, PPCCOM, { BDP } }, +{ "bdz", BBO(16,BODZ,0,0), BBOATBI_MASK, COM, { BD } }, +{ "bdzl-", BBO(16,BODZ,0,1), BBOATBI_MASK, PPCCOM, { BDM } }, +{ "bdzl+", BBO(16,BODZ,0,1), BBOATBI_MASK, PPCCOM, { BDP } }, +{ "bdzl", BBO(16,BODZ,0,1), BBOATBI_MASK, COM, { BD } }, +{ "bdza-", BBO(16,BODZ,1,0), BBOATBI_MASK, PPCCOM, { BDMA } }, +{ "bdza+", BBO(16,BODZ,1,0), BBOATBI_MASK, PPCCOM, { BDPA } }, +{ "bdza", BBO(16,BODZ,1,0), BBOATBI_MASK, COM, { BDA } }, +{ "bdzla-", BBO(16,BODZ,1,1), BBOATBI_MASK, PPCCOM, { BDMA } }, +{ "bdzla+", BBO(16,BODZ,1,1), BBOATBI_MASK, PPCCOM, { BDPA } }, +{ "bdzla", BBO(16,BODZ,1,1), BBOATBI_MASK, COM, { BDA } }, +{ "blt-", BBOCB(16,BOT,CBLT,0,0), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "blt+", BBOCB(16,BOT,CBLT,0,0), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "blt", BBOCB(16,BOT,CBLT,0,0), BBOATCB_MASK, COM, { CR, BD } }, +{ "bltl-", BBOCB(16,BOT,CBLT,0,1), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bltl+", BBOCB(16,BOT,CBLT,0,1), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bltl", BBOCB(16,BOT,CBLT,0,1), BBOATCB_MASK, COM, { CR, BD } }, +{ "blta-", BBOCB(16,BOT,CBLT,1,0), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "blta+", BBOCB(16,BOT,CBLT,1,0), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "blta", BBOCB(16,BOT,CBLT,1,0), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bltla-", BBOCB(16,BOT,CBLT,1,1), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bltla+", BBOCB(16,BOT,CBLT,1,1), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bltla", BBOCB(16,BOT,CBLT,1,1), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bgt-", BBOCB(16,BOT,CBGT,0,0), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bgt+", BBOCB(16,BOT,CBGT,0,0), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bgt", BBOCB(16,BOT,CBGT,0,0), BBOATCB_MASK, COM, { CR, BD } }, +{ "bgtl-", BBOCB(16,BOT,CBGT,0,1), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bgtl+", BBOCB(16,BOT,CBGT,0,1), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bgtl", BBOCB(16,BOT,CBGT,0,1), BBOATCB_MASK, COM, { CR, BD } }, +{ "bgta-", BBOCB(16,BOT,CBGT,1,0), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bgta+", BBOCB(16,BOT,CBGT,1,0), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bgta", BBOCB(16,BOT,CBGT,1,0), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bgtla-", BBOCB(16,BOT,CBGT,1,1), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bgtla+", BBOCB(16,BOT,CBGT,1,1), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bgtla", BBOCB(16,BOT,CBGT,1,1), BBOATCB_MASK, COM, { CR, BDA } }, +{ "beq-", BBOCB(16,BOT,CBEQ,0,0), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "beq+", BBOCB(16,BOT,CBEQ,0,0), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "beq", BBOCB(16,BOT,CBEQ,0,0), BBOATCB_MASK, COM, { CR, BD } }, +{ "beql-", BBOCB(16,BOT,CBEQ,0,1), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "beql+", BBOCB(16,BOT,CBEQ,0,1), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "beql", BBOCB(16,BOT,CBEQ,0,1), BBOATCB_MASK, COM, { CR, BD } }, +{ "beqa-", BBOCB(16,BOT,CBEQ,1,0), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "beqa+", BBOCB(16,BOT,CBEQ,1,0), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "beqa", BBOCB(16,BOT,CBEQ,1,0), BBOATCB_MASK, COM, { CR, BDA } }, +{ "beqla-", BBOCB(16,BOT,CBEQ,1,1), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "beqla+", BBOCB(16,BOT,CBEQ,1,1), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "beqla", BBOCB(16,BOT,CBEQ,1,1), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bso-", BBOCB(16,BOT,CBSO,0,0), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bso+", BBOCB(16,BOT,CBSO,0,0), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bso", BBOCB(16,BOT,CBSO,0,0), BBOATCB_MASK, COM, { CR, BD } }, +{ "bsol-", BBOCB(16,BOT,CBSO,0,1), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bsol+", BBOCB(16,BOT,CBSO,0,1), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bsol", BBOCB(16,BOT,CBSO,0,1), BBOATCB_MASK, COM, { CR, BD } }, +{ "bsoa-", BBOCB(16,BOT,CBSO,1,0), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bsoa+", BBOCB(16,BOT,CBSO,1,0), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bsoa", BBOCB(16,BOT,CBSO,1,0), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bsola-", BBOCB(16,BOT,CBSO,1,1), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bsola+", BBOCB(16,BOT,CBSO,1,1), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bsola", BBOCB(16,BOT,CBSO,1,1), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bun-", BBOCB(16,BOT,CBSO,0,0), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bun+", BBOCB(16,BOT,CBSO,0,0), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bun", BBOCB(16,BOT,CBSO,0,0), BBOATCB_MASK, PPCCOM, { CR, BD } }, +{ "bunl-", BBOCB(16,BOT,CBSO,0,1), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bunl+", BBOCB(16,BOT,CBSO,0,1), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bunl", BBOCB(16,BOT,CBSO,0,1), BBOATCB_MASK, PPCCOM, { CR, BD } }, +{ "buna-", BBOCB(16,BOT,CBSO,1,0), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "buna+", BBOCB(16,BOT,CBSO,1,0), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "buna", BBOCB(16,BOT,CBSO,1,0), BBOATCB_MASK, PPCCOM, { CR, BDA } }, +{ "bunla-", BBOCB(16,BOT,CBSO,1,1), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bunla+", BBOCB(16,BOT,CBSO,1,1), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bunla", BBOCB(16,BOT,CBSO,1,1), BBOATCB_MASK, PPCCOM, { CR, BDA } }, +{ "bge-", BBOCB(16,BOF,CBLT,0,0), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bge+", BBOCB(16,BOF,CBLT,0,0), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bge", BBOCB(16,BOF,CBLT,0,0), BBOATCB_MASK, COM, { CR, BD } }, +{ "bgel-", BBOCB(16,BOF,CBLT,0,1), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bgel+", BBOCB(16,BOF,CBLT,0,1), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bgel", BBOCB(16,BOF,CBLT,0,1), BBOATCB_MASK, COM, { CR, BD } }, +{ "bgea-", BBOCB(16,BOF,CBLT,1,0), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bgea+", BBOCB(16,BOF,CBLT,1,0), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bgea", BBOCB(16,BOF,CBLT,1,0), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bgela-", BBOCB(16,BOF,CBLT,1,1), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bgela+", BBOCB(16,BOF,CBLT,1,1), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bgela", BBOCB(16,BOF,CBLT,1,1), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bnl-", BBOCB(16,BOF,CBLT,0,0), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bnl+", BBOCB(16,BOF,CBLT,0,0), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bnl", BBOCB(16,BOF,CBLT,0,0), BBOATCB_MASK, COM, { CR, BD } }, +{ "bnll-", BBOCB(16,BOF,CBLT,0,1), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bnll+", BBOCB(16,BOF,CBLT,0,1), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bnll", BBOCB(16,BOF,CBLT,0,1), BBOATCB_MASK, COM, { CR, BD } }, +{ "bnla-", BBOCB(16,BOF,CBLT,1,0), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bnla+", BBOCB(16,BOF,CBLT,1,0), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bnla", BBOCB(16,BOF,CBLT,1,0), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bnlla-", BBOCB(16,BOF,CBLT,1,1), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bnlla+", BBOCB(16,BOF,CBLT,1,1), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bnlla", BBOCB(16,BOF,CBLT,1,1), BBOATCB_MASK, COM, { CR, BDA } }, +{ "ble-", BBOCB(16,BOF,CBGT,0,0), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "ble+", BBOCB(16,BOF,CBGT,0,0), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "ble", BBOCB(16,BOF,CBGT,0,0), BBOATCB_MASK, COM, { CR, BD } }, +{ "blel-", BBOCB(16,BOF,CBGT,0,1), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "blel+", BBOCB(16,BOF,CBGT,0,1), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "blel", BBOCB(16,BOF,CBGT,0,1), BBOATCB_MASK, COM, { CR, BD } }, +{ "blea-", BBOCB(16,BOF,CBGT,1,0), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "blea+", BBOCB(16,BOF,CBGT,1,0), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "blea", BBOCB(16,BOF,CBGT,1,0), BBOATCB_MASK, COM, { CR, BDA } }, +{ "blela-", BBOCB(16,BOF,CBGT,1,1), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "blela+", BBOCB(16,BOF,CBGT,1,1), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "blela", BBOCB(16,BOF,CBGT,1,1), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bng-", BBOCB(16,BOF,CBGT,0,0), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bng+", BBOCB(16,BOF,CBGT,0,0), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bng", BBOCB(16,BOF,CBGT,0,0), BBOATCB_MASK, COM, { CR, BD } }, +{ "bngl-", BBOCB(16,BOF,CBGT,0,1), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bngl+", BBOCB(16,BOF,CBGT,0,1), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bngl", BBOCB(16,BOF,CBGT,0,1), BBOATCB_MASK, COM, { CR, BD } }, +{ "bnga-", BBOCB(16,BOF,CBGT,1,0), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bnga+", BBOCB(16,BOF,CBGT,1,0), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bnga", BBOCB(16,BOF,CBGT,1,0), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bngla-", BBOCB(16,BOF,CBGT,1,1), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bngla+", BBOCB(16,BOF,CBGT,1,1), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bngla", BBOCB(16,BOF,CBGT,1,1), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bne-", BBOCB(16,BOF,CBEQ,0,0), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bne+", BBOCB(16,BOF,CBEQ,0,0), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bne", BBOCB(16,BOF,CBEQ,0,0), BBOATCB_MASK, COM, { CR, BD } }, +{ "bnel-", BBOCB(16,BOF,CBEQ,0,1), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bnel+", BBOCB(16,BOF,CBEQ,0,1), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bnel", BBOCB(16,BOF,CBEQ,0,1), BBOATCB_MASK, COM, { CR, BD } }, +{ "bnea-", BBOCB(16,BOF,CBEQ,1,0), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bnea+", BBOCB(16,BOF,CBEQ,1,0), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bnea", BBOCB(16,BOF,CBEQ,1,0), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bnela-", BBOCB(16,BOF,CBEQ,1,1), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bnela+", BBOCB(16,BOF,CBEQ,1,1), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bnela", BBOCB(16,BOF,CBEQ,1,1), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bns-", BBOCB(16,BOF,CBSO,0,0), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bns+", BBOCB(16,BOF,CBSO,0,0), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bns", BBOCB(16,BOF,CBSO,0,0), BBOATCB_MASK, COM, { CR, BD } }, +{ "bnsl-", BBOCB(16,BOF,CBSO,0,1), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bnsl+", BBOCB(16,BOF,CBSO,0,1), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bnsl", BBOCB(16,BOF,CBSO,0,1), BBOATCB_MASK, COM, { CR, BD } }, +{ "bnsa-", BBOCB(16,BOF,CBSO,1,0), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bnsa+", BBOCB(16,BOF,CBSO,1,0), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bnsa", BBOCB(16,BOF,CBSO,1,0), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bnsla-", BBOCB(16,BOF,CBSO,1,1), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bnsla+", BBOCB(16,BOF,CBSO,1,1), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bnsla", BBOCB(16,BOF,CBSO,1,1), BBOATCB_MASK, COM, { CR, BDA } }, +{ "bnu-", BBOCB(16,BOF,CBSO,0,0), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bnu+", BBOCB(16,BOF,CBSO,0,0), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bnu", BBOCB(16,BOF,CBSO,0,0), BBOATCB_MASK, PPCCOM, { CR, BD } }, +{ "bnul-", BBOCB(16,BOF,CBSO,0,1), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bnul+", BBOCB(16,BOF,CBSO,0,1), BBOATCB_MASK, PPCCOM, { CR, BDP } }, +{ "bnul", BBOCB(16,BOF,CBSO,0,1), BBOATCB_MASK, PPCCOM, { CR, BD } }, +{ "bnua-", BBOCB(16,BOF,CBSO,1,0), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bnua+", BBOCB(16,BOF,CBSO,1,0), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bnua", BBOCB(16,BOF,CBSO,1,0), BBOATCB_MASK, PPCCOM, { CR, BDA } }, +{ "bnula-", BBOCB(16,BOF,CBSO,1,1), BBOATCB_MASK, PPCCOM, { CR, BDMA } }, +{ "bnula+", BBOCB(16,BOF,CBSO,1,1), BBOATCB_MASK, PPCCOM, { CR, BDPA } }, +{ "bnula", BBOCB(16,BOF,CBSO,1,1), BBOATCB_MASK, PPCCOM, { CR, BDA } }, +{ "bdnzt-", BBO(16,BODNZT,0,0), BBOY_MASK, NOPOWER4, { BI, BDM } }, +{ "bdnzt+", BBO(16,BODNZT,0,0), BBOY_MASK, NOPOWER4, { BI, BDP } }, +{ "bdnzt", BBO(16,BODNZT,0,0), BBOY_MASK, PPCCOM, { BI, BD } }, +{ "bdnztl-", BBO(16,BODNZT,0,1), BBOY_MASK, NOPOWER4, { BI, BDM } }, +{ "bdnztl+", BBO(16,BODNZT,0,1), BBOY_MASK, NOPOWER4, { BI, BDP } }, +{ "bdnztl", BBO(16,BODNZT,0,1), BBOY_MASK, PPCCOM, { BI, BD } }, +{ "bdnzta-", BBO(16,BODNZT,1,0), BBOY_MASK, NOPOWER4, { BI, BDMA } }, +{ "bdnzta+", BBO(16,BODNZT,1,0), BBOY_MASK, NOPOWER4, { BI, BDPA } }, +{ "bdnzta", BBO(16,BODNZT,1,0), BBOY_MASK, PPCCOM, { BI, BDA } }, +{ "bdnztla-",BBO(16,BODNZT,1,1), BBOY_MASK, NOPOWER4, { BI, BDMA } }, +{ "bdnztla+",BBO(16,BODNZT,1,1), BBOY_MASK, NOPOWER4, { BI, BDPA } }, +{ "bdnztla", BBO(16,BODNZT,1,1), BBOY_MASK, PPCCOM, { BI, BDA } }, +{ "bdnzf-", BBO(16,BODNZF,0,0), BBOY_MASK, NOPOWER4, { BI, BDM } }, +{ "bdnzf+", BBO(16,BODNZF,0,0), BBOY_MASK, NOPOWER4, { BI, BDP } }, +{ "bdnzf", BBO(16,BODNZF,0,0), BBOY_MASK, PPCCOM, { BI, BD } }, +{ "bdnzfl-", BBO(16,BODNZF,0,1), BBOY_MASK, NOPOWER4, { BI, BDM } }, +{ "bdnzfl+", BBO(16,BODNZF,0,1), BBOY_MASK, NOPOWER4, { BI, BDP } }, +{ "bdnzfl", BBO(16,BODNZF,0,1), BBOY_MASK, PPCCOM, { BI, BD } }, +{ "bdnzfa-", BBO(16,BODNZF,1,0), BBOY_MASK, NOPOWER4, { BI, BDMA } }, +{ "bdnzfa+", BBO(16,BODNZF,1,0), BBOY_MASK, NOPOWER4, { BI, BDPA } }, +{ "bdnzfa", BBO(16,BODNZF,1,0), BBOY_MASK, PPCCOM, { BI, BDA } }, +{ "bdnzfla-",BBO(16,BODNZF,1,1), BBOY_MASK, NOPOWER4, { BI, BDMA } }, +{ "bdnzfla+",BBO(16,BODNZF,1,1), BBOY_MASK, NOPOWER4, { BI, BDPA } }, +{ "bdnzfla", BBO(16,BODNZF,1,1), BBOY_MASK, PPCCOM, { BI, BDA } }, +{ "bt-", BBO(16,BOT,0,0), BBOAT_MASK, PPCCOM, { BI, BDM } }, +{ "bt+", BBO(16,BOT,0,0), BBOAT_MASK, PPCCOM, { BI, BDP } }, +{ "bt", BBO(16,BOT,0,0), BBOAT_MASK, PPCCOM, { BI, BD } }, +{ "bbt", BBO(16,BOT,0,0), BBOAT_MASK, PWRCOM, { BI, BD } }, +{ "btl-", BBO(16,BOT,0,1), BBOAT_MASK, PPCCOM, { BI, BDM } }, +{ "btl+", BBO(16,BOT,0,1), BBOAT_MASK, PPCCOM, { BI, BDP } }, +{ "btl", BBO(16,BOT,0,1), BBOAT_MASK, PPCCOM, { BI, BD } }, +{ "bbtl", BBO(16,BOT,0,1), BBOAT_MASK, PWRCOM, { BI, BD } }, +{ "bta-", BBO(16,BOT,1,0), BBOAT_MASK, PPCCOM, { BI, BDMA } }, +{ "bta+", BBO(16,BOT,1,0), BBOAT_MASK, PPCCOM, { BI, BDPA } }, +{ "bta", BBO(16,BOT,1,0), BBOAT_MASK, PPCCOM, { BI, BDA } }, +{ "bbta", BBO(16,BOT,1,0), BBOAT_MASK, PWRCOM, { BI, BDA } }, +{ "btla-", BBO(16,BOT,1,1), BBOAT_MASK, PPCCOM, { BI, BDMA } }, +{ "btla+", BBO(16,BOT,1,1), BBOAT_MASK, PPCCOM, { BI, BDPA } }, +{ "btla", BBO(16,BOT,1,1), BBOAT_MASK, PPCCOM, { BI, BDA } }, +{ "bbtla", BBO(16,BOT,1,1), BBOAT_MASK, PWRCOM, { BI, BDA } }, +{ "bf-", BBO(16,BOF,0,0), BBOAT_MASK, PPCCOM, { BI, BDM } }, +{ "bf+", BBO(16,BOF,0,0), BBOAT_MASK, PPCCOM, { BI, BDP } }, +{ "bf", BBO(16,BOF,0,0), BBOAT_MASK, PPCCOM, { BI, BD } }, +{ "bbf", BBO(16,BOF,0,0), BBOAT_MASK, PWRCOM, { BI, BD } }, +{ "bfl-", BBO(16,BOF,0,1), BBOAT_MASK, PPCCOM, { BI, BDM } }, +{ "bfl+", BBO(16,BOF,0,1), BBOAT_MASK, PPCCOM, { BI, BDP } }, +{ "bfl", BBO(16,BOF,0,1), BBOAT_MASK, PPCCOM, { BI, BD } }, +{ "bbfl", BBO(16,BOF,0,1), BBOAT_MASK, PWRCOM, { BI, BD } }, +{ "bfa-", BBO(16,BOF,1,0), BBOAT_MASK, PPCCOM, { BI, BDMA } }, +{ "bfa+", BBO(16,BOF,1,0), BBOAT_MASK, PPCCOM, { BI, BDPA } }, +{ "bfa", BBO(16,BOF,1,0), BBOAT_MASK, PPCCOM, { BI, BDA } }, +{ "bbfa", BBO(16,BOF,1,0), BBOAT_MASK, PWRCOM, { BI, BDA } }, +{ "bfla-", BBO(16,BOF,1,1), BBOAT_MASK, PPCCOM, { BI, BDMA } }, +{ "bfla+", BBO(16,BOF,1,1), BBOAT_MASK, PPCCOM, { BI, BDPA } }, +{ "bfla", BBO(16,BOF,1,1), BBOAT_MASK, PPCCOM, { BI, BDA } }, +{ "bbfla", BBO(16,BOF,1,1), BBOAT_MASK, PWRCOM, { BI, BDA } }, +{ "bdzt-", BBO(16,BODZT,0,0), BBOY_MASK, NOPOWER4, { BI, BDM } }, +{ "bdzt+", BBO(16,BODZT,0,0), BBOY_MASK, NOPOWER4, { BI, BDP } }, +{ "bdzt", BBO(16,BODZT,0,0), BBOY_MASK, PPCCOM, { BI, BD } }, +{ "bdztl-", BBO(16,BODZT,0,1), BBOY_MASK, NOPOWER4, { BI, BDM } }, +{ "bdztl+", BBO(16,BODZT,0,1), BBOY_MASK, NOPOWER4, { BI, BDP } }, +{ "bdztl", BBO(16,BODZT,0,1), BBOY_MASK, PPCCOM, { BI, BD } }, +{ "bdzta-", BBO(16,BODZT,1,0), BBOY_MASK, NOPOWER4, { BI, BDMA } }, +{ "bdzta+", BBO(16,BODZT,1,0), BBOY_MASK, NOPOWER4, { BI, BDPA } }, +{ "bdzta", BBO(16,BODZT,1,0), BBOY_MASK, PPCCOM, { BI, BDA } }, +{ "bdztla-", BBO(16,BODZT,1,1), BBOY_MASK, NOPOWER4, { BI, BDMA } }, +{ "bdztla+", BBO(16,BODZT,1,1), BBOY_MASK, NOPOWER4, { BI, BDPA } }, +{ "bdztla", BBO(16,BODZT,1,1), BBOY_MASK, PPCCOM, { BI, BDA } }, +{ "bdzf-", BBO(16,BODZF,0,0), BBOY_MASK, NOPOWER4, { BI, BDM } }, +{ "bdzf+", BBO(16,BODZF,0,0), BBOY_MASK, NOPOWER4, { BI, BDP } }, +{ "bdzf", BBO(16,BODZF,0,0), BBOY_MASK, PPCCOM, { BI, BD } }, +{ "bdzfl-", BBO(16,BODZF,0,1), BBOY_MASK, NOPOWER4, { BI, BDM } }, +{ "bdzfl+", BBO(16,BODZF,0,1), BBOY_MASK, NOPOWER4, { BI, BDP } }, +{ "bdzfl", BBO(16,BODZF,0,1), BBOY_MASK, PPCCOM, { BI, BD } }, +{ "bdzfa-", BBO(16,BODZF,1,0), BBOY_MASK, NOPOWER4, { BI, BDMA } }, +{ "bdzfa+", BBO(16,BODZF,1,0), BBOY_MASK, NOPOWER4, { BI, BDPA } }, +{ "bdzfa", BBO(16,BODZF,1,0), BBOY_MASK, PPCCOM, { BI, BDA } }, +{ "bdzfla-", BBO(16,BODZF,1,1), BBOY_MASK, NOPOWER4, { BI, BDMA } }, +{ "bdzfla+", BBO(16,BODZF,1,1), BBOY_MASK, NOPOWER4, { BI, BDPA } }, +{ "bdzfla", BBO(16,BODZF,1,1), BBOY_MASK, PPCCOM, { BI, BDA } }, +{ "bc-", B(16,0,0), B_MASK, PPCCOM, { BOE, BI, BDM } }, +{ "bc+", B(16,0,0), B_MASK, PPCCOM, { BOE, BI, BDP } }, +{ "bc", B(16,0,0), B_MASK, COM, { BO, BI, BD } }, +{ "bcl-", B(16,0,1), B_MASK, PPCCOM, { BOE, BI, BDM } }, +{ "bcl+", B(16,0,1), B_MASK, PPCCOM, { BOE, BI, BDP } }, +{ "bcl", B(16,0,1), B_MASK, COM, { BO, BI, BD } }, +{ "bca-", B(16,1,0), B_MASK, PPCCOM, { BOE, BI, BDMA } }, +{ "bca+", B(16,1,0), B_MASK, PPCCOM, { BOE, BI, BDPA } }, +{ "bca", B(16,1,0), B_MASK, COM, { BO, BI, BDA } }, +{ "bcla-", B(16,1,1), B_MASK, PPCCOM, { BOE, BI, BDMA } }, +{ "bcla+", B(16,1,1), B_MASK, PPCCOM, { BOE, BI, BDPA } }, +{ "bcla", B(16,1,1), B_MASK, COM, { BO, BI, BDA } }, + +{ "sc", SC(17,1,0), 0xffffffff, PPC, { 0 } }, +{ "svc", SC(17,0,0), SC_MASK, POWER, { LEV, FL1, FL2 } }, +{ "svcl", SC(17,0,1), SC_MASK, POWER, { LEV, FL1, FL2 } }, +{ "svca", SC(17,1,0), SC_MASK, PWRCOM, { SV } }, +{ "svcla", SC(17,1,1), SC_MASK, POWER, { SV } }, + +{ "b", B(18,0,0), B_MASK, COM, { LI } }, +{ "bl", B(18,0,1), B_MASK, COM, { LI } }, +{ "ba", B(18,1,0), B_MASK, COM, { LIA } }, +{ "bla", B(18,1,1), B_MASK, COM, { LIA } }, + +{ "mcrf", XL(19,0), XLBB_MASK|(3 << 21)|(3 << 16), COM, { BF, BFA } }, + +{ "blr", XLO(19,BOU,16,0), XLBOBIBB_MASK, PPCCOM, { 0 } }, +{ "br", XLO(19,BOU,16,0), XLBOBIBB_MASK, PWRCOM, { 0 } }, +{ "blrl", XLO(19,BOU,16,1), XLBOBIBB_MASK, PPCCOM, { 0 } }, +{ "brl", XLO(19,BOU,16,1), XLBOBIBB_MASK, PWRCOM, { 0 } }, +{ "bdnzlr", XLO(19,BODNZ,16,0), XLBOBIBB_MASK, PPCCOM, { 0 } }, +{ "bdnzlr-", XLO(19,BODNZ,16,0), XLBOBIBB_MASK, NOPOWER4, { 0 } }, +{ "bdnzlr-", XLO(19,BODNZM4,16,0), XLBOBIBB_MASK, POWER4, { 0 } }, +{ "bdnzlr+", XLO(19,BODNZP,16,0), XLBOBIBB_MASK, NOPOWER4, { 0 } }, +{ "bdnzlr+", XLO(19,BODNZP4,16,0), XLBOBIBB_MASK, POWER4, { 0 } }, +{ "bdnzlrl", XLO(19,BODNZ,16,1), XLBOBIBB_MASK, PPCCOM, { 0 } }, +{ "bdnzlrl-",XLO(19,BODNZ,16,1), XLBOBIBB_MASK, NOPOWER4, { 0 } }, +{ "bdnzlrl-",XLO(19,BODNZM4,16,1), XLBOBIBB_MASK, POWER4, { 0 } }, +{ "bdnzlrl+",XLO(19,BODNZP,16,1), XLBOBIBB_MASK, NOPOWER4, { 0 } }, +{ "bdnzlrl+",XLO(19,BODNZP4,16,1), XLBOBIBB_MASK, POWER4, { 0 } }, +{ "bdzlr", XLO(19,BODZ,16,0), XLBOBIBB_MASK, PPCCOM, { 0 } }, +{ "bdzlr-", XLO(19,BODZ,16,0), XLBOBIBB_MASK, NOPOWER4, { 0 } }, +{ "bdzlr-", XLO(19,BODZM4,16,0), XLBOBIBB_MASK, POWER4, { 0 } }, +{ "bdzlr+", XLO(19,BODZP,16,0), XLBOBIBB_MASK, NOPOWER4, { 0 } }, +{ "bdzlr+", XLO(19,BODZP4,16,0), XLBOBIBB_MASK, POWER4, { 0 } }, +{ "bdzlrl", XLO(19,BODZ,16,1), XLBOBIBB_MASK, PPCCOM, { 0 } }, +{ "bdzlrl-", XLO(19,BODZ,16,1), XLBOBIBB_MASK, NOPOWER4, { 0 } }, +{ "bdzlrl-", XLO(19,BODZM4,16,1), XLBOBIBB_MASK, POWER4, { 0 } }, +{ "bdzlrl+", XLO(19,BODZP,16,1), XLBOBIBB_MASK, NOPOWER4, { 0 } }, +{ "bdzlrl+", XLO(19,BODZP4,16,1), XLBOBIBB_MASK, POWER4, { 0 } }, +{ "bltlr", XLOCB(19,BOT,CBLT,16,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bltlr-", XLOCB(19,BOT,CBLT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bltlr-", XLOCB(19,BOTM4,CBLT,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bltlr+", XLOCB(19,BOTP,CBLT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bltlr+", XLOCB(19,BOTP4,CBLT,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bltr", XLOCB(19,BOT,CBLT,16,0), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bltlrl", XLOCB(19,BOT,CBLT,16,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bltlrl-", XLOCB(19,BOT,CBLT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bltlrl-", XLOCB(19,BOTM4,CBLT,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bltlrl+", XLOCB(19,BOTP,CBLT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bltlrl+", XLOCB(19,BOTP4,CBLT,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bltrl", XLOCB(19,BOT,CBLT,16,1), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bgtlr", XLOCB(19,BOT,CBGT,16,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bgtlr-", XLOCB(19,BOT,CBGT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgtlr-", XLOCB(19,BOTM4,CBGT,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgtlr+", XLOCB(19,BOTP,CBGT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgtlr+", XLOCB(19,BOTP4,CBGT,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgtr", XLOCB(19,BOT,CBGT,16,0), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bgtlrl", XLOCB(19,BOT,CBGT,16,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bgtlrl-", XLOCB(19,BOT,CBGT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgtlrl-", XLOCB(19,BOTM4,CBGT,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgtlrl+", XLOCB(19,BOTP,CBGT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgtlrl+", XLOCB(19,BOTP4,CBGT,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgtrl", XLOCB(19,BOT,CBGT,16,1), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "beqlr", XLOCB(19,BOT,CBEQ,16,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "beqlr-", XLOCB(19,BOT,CBEQ,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "beqlr-", XLOCB(19,BOTM4,CBEQ,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "beqlr+", XLOCB(19,BOTP,CBEQ,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "beqlr+", XLOCB(19,BOTP4,CBEQ,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "beqr", XLOCB(19,BOT,CBEQ,16,0), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "beqlrl", XLOCB(19,BOT,CBEQ,16,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "beqlrl-", XLOCB(19,BOT,CBEQ,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "beqlrl-", XLOCB(19,BOTM4,CBEQ,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "beqlrl+", XLOCB(19,BOTP,CBEQ,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "beqlrl+", XLOCB(19,BOTP4,CBEQ,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "beqrl", XLOCB(19,BOT,CBEQ,16,1), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bsolr", XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bsolr-", XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bsolr-", XLOCB(19,BOTM4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bsolr+", XLOCB(19,BOTP,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bsolr+", XLOCB(19,BOTP4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bsor", XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bsolrl", XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bsolrl-", XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bsolrl-", XLOCB(19,BOTM4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bsolrl+", XLOCB(19,BOTP,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bsolrl+", XLOCB(19,BOTP4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bsorl", XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bunlr", XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bunlr-", XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bunlr-", XLOCB(19,BOTM4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bunlr+", XLOCB(19,BOTP,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bunlr+", XLOCB(19,BOTP4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bunlrl", XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bunlrl-", XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bunlrl-", XLOCB(19,BOTM4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bunlrl+", XLOCB(19,BOTP,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bunlrl+", XLOCB(19,BOTP4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgelr", XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bgelr-", XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgelr-", XLOCB(19,BOFM4,CBLT,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgelr+", XLOCB(19,BOFP,CBLT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgelr+", XLOCB(19,BOFP4,CBLT,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bger", XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bgelrl", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bgelrl-", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgelrl-", XLOCB(19,BOFM4,CBLT,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgelrl+", XLOCB(19,BOFP,CBLT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgelrl+", XLOCB(19,BOFP4,CBLT,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgerl", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bnllr", XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnllr-", XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnllr-", XLOCB(19,BOFM4,CBLT,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnllr+", XLOCB(19,BOFP,CBLT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnllr+", XLOCB(19,BOFP4,CBLT,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnlr", XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bnllrl", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnllrl-", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnllrl-", XLOCB(19,BOFM4,CBLT,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnllrl+", XLOCB(19,BOFP,CBLT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnllrl+", XLOCB(19,BOFP4,CBLT,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnlrl", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "blelr", XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "blelr-", XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "blelr-", XLOCB(19,BOFM4,CBGT,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "blelr+", XLOCB(19,BOFP,CBGT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "blelr+", XLOCB(19,BOFP4,CBGT,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bler", XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "blelrl", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "blelrl-", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "blelrl-", XLOCB(19,BOFM4,CBGT,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "blelrl+", XLOCB(19,BOFP,CBGT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "blelrl+", XLOCB(19,BOFP4,CBGT,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "blerl", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bnglr", XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnglr-", XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnglr-", XLOCB(19,BOFM4,CBGT,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnglr+", XLOCB(19,BOFP,CBGT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnglr+", XLOCB(19,BOFP4,CBGT,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bngr", XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bnglrl", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnglrl-", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnglrl-", XLOCB(19,BOFM4,CBGT,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnglrl+", XLOCB(19,BOFP,CBGT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnglrl+", XLOCB(19,BOFP4,CBGT,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bngrl", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bnelr", XLOCB(19,BOF,CBEQ,16,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnelr-", XLOCB(19,BOF,CBEQ,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnelr-", XLOCB(19,BOFM4,CBEQ,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnelr+", XLOCB(19,BOFP,CBEQ,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnelr+", XLOCB(19,BOFP4,CBEQ,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bner", XLOCB(19,BOF,CBEQ,16,0), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bnelrl", XLOCB(19,BOF,CBEQ,16,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnelrl-", XLOCB(19,BOF,CBEQ,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnelrl-", XLOCB(19,BOFM4,CBEQ,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnelrl+", XLOCB(19,BOFP,CBEQ,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnelrl+", XLOCB(19,BOFP4,CBEQ,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnerl", XLOCB(19,BOF,CBEQ,16,1), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bnslr", XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnslr-", XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnslr-", XLOCB(19,BOFM4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnslr+", XLOCB(19,BOFP,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnslr+", XLOCB(19,BOFP4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnsr", XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bnslrl", XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnslrl-", XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnslrl-", XLOCB(19,BOFM4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnslrl+", XLOCB(19,BOFP,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnslrl+", XLOCB(19,BOFP4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnsrl", XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, PWRCOM, { CR } }, +{ "bnulr", XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnulr-", XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnulr-", XLOCB(19,BOFM4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnulr+", XLOCB(19,BOFP,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnulr+", XLOCB(19,BOFP4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnulrl", XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnulrl-", XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnulrl-", XLOCB(19,BOFM4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnulrl+", XLOCB(19,BOFP,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnulrl+", XLOCB(19,BOFP4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "btlr", XLO(19,BOT,16,0), XLBOBB_MASK, PPCCOM, { BI } }, +{ "btlr-", XLO(19,BOT,16,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "btlr-", XLO(19,BOTM4,16,0), XLBOBB_MASK, POWER4, { BI } }, +{ "btlr+", XLO(19,BOTP,16,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "btlr+", XLO(19,BOTP4,16,0), XLBOBB_MASK, POWER4, { BI } }, +{ "bbtr", XLO(19,BOT,16,0), XLBOBB_MASK, PWRCOM, { BI } }, +{ "btlrl", XLO(19,BOT,16,1), XLBOBB_MASK, PPCCOM, { BI } }, +{ "btlrl-", XLO(19,BOT,16,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "btlrl-", XLO(19,BOTM4,16,1), XLBOBB_MASK, POWER4, { BI } }, +{ "btlrl+", XLO(19,BOTP,16,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "btlrl+", XLO(19,BOTP4,16,1), XLBOBB_MASK, POWER4, { BI } }, +{ "bbtrl", XLO(19,BOT,16,1), XLBOBB_MASK, PWRCOM, { BI } }, +{ "bflr", XLO(19,BOF,16,0), XLBOBB_MASK, PPCCOM, { BI } }, +{ "bflr-", XLO(19,BOF,16,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bflr-", XLO(19,BOFM4,16,0), XLBOBB_MASK, POWER4, { BI } }, +{ "bflr+", XLO(19,BOFP,16,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bflr+", XLO(19,BOFP4,16,0), XLBOBB_MASK, POWER4, { BI } }, +{ "bbfr", XLO(19,BOF,16,0), XLBOBB_MASK, PWRCOM, { BI } }, +{ "bflrl", XLO(19,BOF,16,1), XLBOBB_MASK, PPCCOM, { BI } }, +{ "bflrl-", XLO(19,BOF,16,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bflrl-", XLO(19,BOFM4,16,1), XLBOBB_MASK, POWER4, { BI } }, +{ "bflrl+", XLO(19,BOFP,16,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bflrl+", XLO(19,BOFP4,16,1), XLBOBB_MASK, POWER4, { BI } }, +{ "bbfrl", XLO(19,BOF,16,1), XLBOBB_MASK, PWRCOM, { BI } }, +{ "bdnztlr", XLO(19,BODNZT,16,0), XLBOBB_MASK, PPCCOM, { BI } }, +{ "bdnztlr-",XLO(19,BODNZT,16,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdnztlr+",XLO(19,BODNZTP,16,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdnztlrl",XLO(19,BODNZT,16,1), XLBOBB_MASK, PPCCOM, { BI } }, +{ "bdnztlrl-",XLO(19,BODNZT,16,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdnztlrl+",XLO(19,BODNZTP,16,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdnzflr", XLO(19,BODNZF,16,0), XLBOBB_MASK, PPCCOM, { BI } }, +{ "bdnzflr-",XLO(19,BODNZF,16,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdnzflr+",XLO(19,BODNZFP,16,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdnzflrl",XLO(19,BODNZF,16,1), XLBOBB_MASK, PPCCOM, { BI } }, +{ "bdnzflrl-",XLO(19,BODNZF,16,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdnzflrl+",XLO(19,BODNZFP,16,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdztlr", XLO(19,BODZT,16,0), XLBOBB_MASK, PPCCOM, { BI } }, +{ "bdztlr-", XLO(19,BODZT,16,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdztlr+", XLO(19,BODZTP,16,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdztlrl", XLO(19,BODZT,16,1), XLBOBB_MASK, PPCCOM, { BI } }, +{ "bdztlrl-",XLO(19,BODZT,16,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdztlrl+",XLO(19,BODZTP,16,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdzflr", XLO(19,BODZF,16,0), XLBOBB_MASK, PPCCOM, { BI } }, +{ "bdzflr-", XLO(19,BODZF,16,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdzflr+", XLO(19,BODZFP,16,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdzflrl", XLO(19,BODZF,16,1), XLBOBB_MASK, PPCCOM, { BI } }, +{ "bdzflrl-",XLO(19,BODZF,16,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bdzflrl+",XLO(19,BODZFP,16,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bclr", XLLK(19,16,0), XLYBB_MASK, PPCCOM, { BO, BI } }, +{ "bclrl", XLLK(19,16,1), XLYBB_MASK, PPCCOM, { BO, BI } }, +{ "bclr+", XLYLK(19,16,1,0), XLYBB_MASK, PPCCOM, { BOE, BI } }, +{ "bclrl+", XLYLK(19,16,1,1), XLYBB_MASK, PPCCOM, { BOE, BI } }, +{ "bclr-", XLYLK(19,16,0,0), XLYBB_MASK, PPCCOM, { BOE, BI } }, +{ "bclrl-", XLYLK(19,16,0,1), XLYBB_MASK, PPCCOM, { BOE, BI } }, +{ "bcr", XLLK(19,16,0), XLBB_MASK, PWRCOM, { BO, BI } }, +{ "bcrl", XLLK(19,16,1), XLBB_MASK, PWRCOM, { BO, BI } }, +{ "bclre", XLLK(19,17,0), XLBB_MASK, BOOKE64, { BO, BI } }, +{ "bclrel", XLLK(19,17,1), XLBB_MASK, BOOKE64, { BO, BI } }, + +{ "rfid", XL(19,18), 0xffffffff, PPC64, { 0 } }, + +{ "crnot", XL(19,33), XL_MASK, PPCCOM, { BT, BA, BBA } }, +{ "crnor", XL(19,33), XL_MASK, COM, { BT, BA, BB } }, +{ "rfmci", X(19,38), 0xffffffff, PPCRFMCI, { 0 } }, + +{ "rfi", XL(19,50), 0xffffffff, COM, { 0 } }, +{ "rfci", XL(19,51), 0xffffffff, PPC403 | BOOKE, { 0 } }, + +{ "rfsvc", XL(19,82), 0xffffffff, POWER, { 0 } }, + +{ "crandc", XL(19,129), XL_MASK, COM, { BT, BA, BB } }, + +{ "isync", XL(19,150), 0xffffffff, PPCCOM, { 0 } }, +{ "ics", XL(19,150), 0xffffffff, PWRCOM, { 0 } }, + +{ "crclr", XL(19,193), XL_MASK, PPCCOM, { BT, BAT, BBA } }, +{ "crxor", XL(19,193), XL_MASK, COM, { BT, BA, BB } }, + +{ "crnand", XL(19,225), XL_MASK, COM, { BT, BA, BB } }, + +{ "crand", XL(19,257), XL_MASK, COM, { BT, BA, BB } }, + +{ "crset", XL(19,289), XL_MASK, PPCCOM, { BT, BAT, BBA } }, +{ "creqv", XL(19,289), XL_MASK, COM, { BT, BA, BB } }, + +{ "crorc", XL(19,417), XL_MASK, COM, { BT, BA, BB } }, + +{ "crmove", XL(19,449), XL_MASK, PPCCOM, { BT, BA, BBA } }, +{ "cror", XL(19,449), XL_MASK, COM, { BT, BA, BB } }, + +{ "bctr", XLO(19,BOU,528,0), XLBOBIBB_MASK, COM, { 0 } }, +{ "bctrl", XLO(19,BOU,528,1), XLBOBIBB_MASK, COM, { 0 } }, +{ "bltctr", XLOCB(19,BOT,CBLT,528,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bltctr-", XLOCB(19,BOT,CBLT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bltctr-", XLOCB(19,BOTM4,CBLT,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bltctr+", XLOCB(19,BOTP,CBLT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bltctr+", XLOCB(19,BOTP4,CBLT,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bltctrl", XLOCB(19,BOT,CBLT,528,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bltctrl-",XLOCB(19,BOT,CBLT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bltctrl-",XLOCB(19,BOTM4,CBLT,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bltctrl+",XLOCB(19,BOTP,CBLT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bltctrl+",XLOCB(19,BOTP4,CBLT,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgtctr", XLOCB(19,BOT,CBGT,528,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bgtctr-", XLOCB(19,BOT,CBGT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgtctr-", XLOCB(19,BOTM4,CBGT,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgtctr+", XLOCB(19,BOTP,CBGT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgtctr+", XLOCB(19,BOTP4,CBGT,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgtctrl", XLOCB(19,BOT,CBGT,528,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bgtctrl-",XLOCB(19,BOT,CBGT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgtctrl-",XLOCB(19,BOTM4,CBGT,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgtctrl+",XLOCB(19,BOTP,CBGT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgtctrl+",XLOCB(19,BOTP4,CBGT,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "beqctr", XLOCB(19,BOT,CBEQ,528,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "beqctr-", XLOCB(19,BOT,CBEQ,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "beqctr-", XLOCB(19,BOTM4,CBEQ,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "beqctr+", XLOCB(19,BOTP,CBEQ,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "beqctr+", XLOCB(19,BOTP4,CBEQ,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "beqctrl", XLOCB(19,BOT,CBEQ,528,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "beqctrl-",XLOCB(19,BOT,CBEQ,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "beqctrl-",XLOCB(19,BOTM4,CBEQ,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "beqctrl+",XLOCB(19,BOTP,CBEQ,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "beqctrl+",XLOCB(19,BOTP4,CBEQ,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bsoctr", XLOCB(19,BOT,CBSO,528,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bsoctr-", XLOCB(19,BOT,CBSO,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bsoctr-", XLOCB(19,BOTM4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bsoctr+", XLOCB(19,BOTP,CBSO,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bsoctr+", XLOCB(19,BOTP4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bsoctrl", XLOCB(19,BOT,CBSO,528,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bsoctrl-",XLOCB(19,BOT,CBSO,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bsoctrl-",XLOCB(19,BOTM4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bsoctrl+",XLOCB(19,BOTP,CBSO,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bsoctrl+",XLOCB(19,BOTP4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bunctr", XLOCB(19,BOT,CBSO,528,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bunctr-", XLOCB(19,BOT,CBSO,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bunctr-", XLOCB(19,BOTM4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bunctr+", XLOCB(19,BOTP,CBSO,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bunctr+", XLOCB(19,BOTP4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bunctrl", XLOCB(19,BOT,CBSO,528,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bunctrl-",XLOCB(19,BOT,CBSO,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bunctrl-",XLOCB(19,BOTM4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bunctrl+",XLOCB(19,BOTP,CBSO,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bunctrl+",XLOCB(19,BOTP4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgectr", XLOCB(19,BOF,CBLT,528,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bgectr-", XLOCB(19,BOF,CBLT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgectr-", XLOCB(19,BOFM4,CBLT,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgectr+", XLOCB(19,BOFP,CBLT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgectr+", XLOCB(19,BOFP4,CBLT,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgectrl", XLOCB(19,BOF,CBLT,528,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bgectrl-",XLOCB(19,BOF,CBLT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgectrl-",XLOCB(19,BOFM4,CBLT,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bgectrl+",XLOCB(19,BOFP,CBLT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bgectrl+",XLOCB(19,BOFP4,CBLT,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnlctr", XLOCB(19,BOF,CBLT,528,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnlctr-", XLOCB(19,BOF,CBLT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnlctr-", XLOCB(19,BOFM4,CBLT,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnlctr+", XLOCB(19,BOFP,CBLT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnlctr+", XLOCB(19,BOFP4,CBLT,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnlctrl", XLOCB(19,BOF,CBLT,528,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnlctrl-",XLOCB(19,BOF,CBLT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnlctrl-",XLOCB(19,BOFM4,CBLT,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnlctrl+",XLOCB(19,BOFP,CBLT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnlctrl+",XLOCB(19,BOFP4,CBLT,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "blectr", XLOCB(19,BOF,CBGT,528,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "blectr-", XLOCB(19,BOF,CBGT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "blectr-", XLOCB(19,BOFM4,CBGT,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "blectr+", XLOCB(19,BOFP,CBGT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "blectr+", XLOCB(19,BOFP4,CBGT,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "blectrl", XLOCB(19,BOF,CBGT,528,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "blectrl-",XLOCB(19,BOF,CBGT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "blectrl-",XLOCB(19,BOFM4,CBGT,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "blectrl+",XLOCB(19,BOFP,CBGT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "blectrl+",XLOCB(19,BOFP4,CBGT,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bngctr", XLOCB(19,BOF,CBGT,528,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bngctr-", XLOCB(19,BOF,CBGT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bngctr-", XLOCB(19,BOFM4,CBGT,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bngctr+", XLOCB(19,BOFP,CBGT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bngctr+", XLOCB(19,BOFP4,CBGT,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bngctrl", XLOCB(19,BOF,CBGT,528,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bngctrl-",XLOCB(19,BOF,CBGT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bngctrl-",XLOCB(19,BOFM4,CBGT,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bngctrl+",XLOCB(19,BOFP,CBGT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bngctrl+",XLOCB(19,BOFP4,CBGT,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnectr", XLOCB(19,BOF,CBEQ,528,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnectr-", XLOCB(19,BOF,CBEQ,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnectr-", XLOCB(19,BOFM4,CBEQ,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnectr+", XLOCB(19,BOFP,CBEQ,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnectr+", XLOCB(19,BOFP4,CBEQ,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnectrl", XLOCB(19,BOF,CBEQ,528,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnectrl-",XLOCB(19,BOF,CBEQ,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnectrl-",XLOCB(19,BOFM4,CBEQ,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnectrl+",XLOCB(19,BOFP,CBEQ,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnectrl+",XLOCB(19,BOFP4,CBEQ,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnsctr", XLOCB(19,BOF,CBSO,528,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnsctr-", XLOCB(19,BOF,CBSO,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnsctr-", XLOCB(19,BOFM4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnsctr+", XLOCB(19,BOFP,CBSO,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnsctr+", XLOCB(19,BOFP4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnsctrl", XLOCB(19,BOF,CBSO,528,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnsctrl-",XLOCB(19,BOF,CBSO,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnsctrl-",XLOCB(19,BOFM4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnsctrl+",XLOCB(19,BOFP,CBSO,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnsctrl+",XLOCB(19,BOFP4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnuctr", XLOCB(19,BOF,CBSO,528,0), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnuctr-", XLOCB(19,BOF,CBSO,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnuctr-", XLOCB(19,BOFM4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnuctr+", XLOCB(19,BOFP,CBSO,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnuctr+", XLOCB(19,BOFP4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnuctrl", XLOCB(19,BOF,CBSO,528,1), XLBOCBBB_MASK, PPCCOM, { CR } }, +{ "bnuctrl-",XLOCB(19,BOF,CBSO,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnuctrl-",XLOCB(19,BOFM4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "bnuctrl+",XLOCB(19,BOFP,CBSO,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } }, +{ "bnuctrl+",XLOCB(19,BOFP4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } }, +{ "btctr", XLO(19,BOT,528,0), XLBOBB_MASK, PPCCOM, { BI } }, +{ "btctr-", XLO(19,BOT,528,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "btctr-", XLO(19,BOTM4,528,0), XLBOBB_MASK, POWER4, { BI } }, +{ "btctr+", XLO(19,BOTP,528,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "btctr+", XLO(19,BOTP4,528,0), XLBOBB_MASK, POWER4, { BI } }, +{ "btctrl", XLO(19,BOT,528,1), XLBOBB_MASK, PPCCOM, { BI } }, +{ "btctrl-", XLO(19,BOT,528,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "btctrl-", XLO(19,BOTM4,528,1), XLBOBB_MASK, POWER4, { BI } }, +{ "btctrl+", XLO(19,BOTP,528,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "btctrl+", XLO(19,BOTP4,528,1), XLBOBB_MASK, POWER4, { BI } }, +{ "bfctr", XLO(19,BOF,528,0), XLBOBB_MASK, PPCCOM, { BI } }, +{ "bfctr-", XLO(19,BOF,528,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bfctr-", XLO(19,BOFM4,528,0), XLBOBB_MASK, POWER4, { BI } }, +{ "bfctr+", XLO(19,BOFP,528,0), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bfctr+", XLO(19,BOFP4,528,0), XLBOBB_MASK, POWER4, { BI } }, +{ "bfctrl", XLO(19,BOF,528,1), XLBOBB_MASK, PPCCOM, { BI } }, +{ "bfctrl-", XLO(19,BOF,528,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bfctrl-", XLO(19,BOFM4,528,1), XLBOBB_MASK, POWER4, { BI } }, +{ "bfctrl+", XLO(19,BOFP,528,1), XLBOBB_MASK, NOPOWER4, { BI } }, +{ "bfctrl+", XLO(19,BOFP4,528,1), XLBOBB_MASK, POWER4, { BI } }, +{ "bcctr", XLLK(19,528,0), XLYBB_MASK, PPCCOM, { BO, BI } }, +{ "bcctr-", XLYLK(19,528,0,0), XLYBB_MASK, PPCCOM, { BOE, BI } }, +{ "bcctr+", XLYLK(19,528,1,0), XLYBB_MASK, PPCCOM, { BOE, BI } }, +{ "bcctrl", XLLK(19,528,1), XLYBB_MASK, PPCCOM, { BO, BI } }, +{ "bcctrl-", XLYLK(19,528,0,1), XLYBB_MASK, PPCCOM, { BOE, BI } }, +{ "bcctrl+", XLYLK(19,528,1,1), XLYBB_MASK, PPCCOM, { BOE, BI } }, +{ "bcc", XLLK(19,528,0), XLBB_MASK, PWRCOM, { BO, BI } }, +{ "bccl", XLLK(19,528,1), XLBB_MASK, PWRCOM, { BO, BI } }, +{ "bcctre", XLLK(19,529,0), XLYBB_MASK, BOOKE64, { BO, BI } }, +{ "bcctrel", XLLK(19,529,1), XLYBB_MASK, BOOKE64, { BO, BI } }, + +{ "rlwimi", M(20,0), M_MASK, PPCCOM, { RA,RS,SH,MBE,ME } }, +{ "rlimi", M(20,0), M_MASK, PWRCOM, { RA,RS,SH,MBE,ME } }, + +{ "rlwimi.", M(20,1), M_MASK, PPCCOM, { RA,RS,SH,MBE,ME } }, +{ "rlimi.", M(20,1), M_MASK, PWRCOM, { RA,RS,SH,MBE,ME } }, + +{ "rotlwi", MME(21,31,0), MMBME_MASK, PPCCOM, { RA, RS, SH } }, +{ "clrlwi", MME(21,31,0), MSHME_MASK, PPCCOM, { RA, RS, MB } }, +{ "rlwinm", M(21,0), M_MASK, PPCCOM, { RA,RS,SH,MBE,ME } }, +{ "rlinm", M(21,0), M_MASK, PWRCOM, { RA,RS,SH,MBE,ME } }, +{ "rotlwi.", MME(21,31,1), MMBME_MASK, PPCCOM, { RA,RS,SH } }, +{ "clrlwi.", MME(21,31,1), MSHME_MASK, PPCCOM, { RA, RS, MB } }, +{ "rlwinm.", M(21,1), M_MASK, PPCCOM, { RA,RS,SH,MBE,ME } }, +{ "rlinm.", M(21,1), M_MASK, PWRCOM, { RA,RS,SH,MBE,ME } }, + +{ "rlmi", M(22,0), M_MASK, M601, { RA,RS,RB,MBE,ME } }, +{ "rlmi.", M(22,1), M_MASK, M601, { RA,RS,RB,MBE,ME } }, + +{ "be", B(22,0,0), B_MASK, BOOKE64, { LI } }, +{ "bel", B(22,0,1), B_MASK, BOOKE64, { LI } }, +{ "bea", B(22,1,0), B_MASK, BOOKE64, { LIA } }, +{ "bela", B(22,1,1), B_MASK, BOOKE64, { LIA } }, + +{ "rotlw", MME(23,31,0), MMBME_MASK, PPCCOM, { RA, RS, RB } }, +{ "rlwnm", M(23,0), M_MASK, PPCCOM, { RA,RS,RB,MBE,ME } }, +{ "rlnm", M(23,0), M_MASK, PWRCOM, { RA,RS,RB,MBE,ME } }, +{ "rotlw.", MME(23,31,1), MMBME_MASK, PPCCOM, { RA, RS, RB } }, +{ "rlwnm.", M(23,1), M_MASK, PPCCOM, { RA,RS,RB,MBE,ME } }, +{ "rlnm.", M(23,1), M_MASK, PWRCOM, { RA,RS,RB,MBE,ME } }, + +{ "nop", OP(24), 0xffffffff, PPCCOM, { 0 } }, +{ "ori", OP(24), OP_MASK, PPCCOM, { RA, RS, UI } }, +{ "oril", OP(24), OP_MASK, PWRCOM, { RA, RS, UI } }, + +{ "oris", OP(25), OP_MASK, PPCCOM, { RA, RS, UI } }, +{ "oriu", OP(25), OP_MASK, PWRCOM, { RA, RS, UI } }, + +{ "xori", OP(26), OP_MASK, PPCCOM, { RA, RS, UI } }, +{ "xoril", OP(26), OP_MASK, PWRCOM, { RA, RS, UI } }, + +{ "xoris", OP(27), OP_MASK, PPCCOM, { RA, RS, UI } }, +{ "xoriu", OP(27), OP_MASK, PWRCOM, { RA, RS, UI } }, + +{ "andi.", OP(28), OP_MASK, PPCCOM, { RA, RS, UI } }, +{ "andil.", OP(28), OP_MASK, PWRCOM, { RA, RS, UI } }, + +{ "andis.", OP(29), OP_MASK, PPCCOM, { RA, RS, UI } }, +{ "andiu.", OP(29), OP_MASK, PWRCOM, { RA, RS, UI } }, + +{ "rotldi", MD(30,0,0), MDMB_MASK, PPC64, { RA, RS, SH6 } }, +{ "clrldi", MD(30,0,0), MDSH_MASK, PPC64, { RA, RS, MB6 } }, +{ "rldicl", MD(30,0,0), MD_MASK, PPC64, { RA, RS, SH6, MB6 } }, +{ "rotldi.", MD(30,0,1), MDMB_MASK, PPC64, { RA, RS, SH6 } }, +{ "clrldi.", MD(30,0,1), MDSH_MASK, PPC64, { RA, RS, MB6 } }, +{ "rldicl.", MD(30,0,1), MD_MASK, PPC64, { RA, RS, SH6, MB6 } }, + +{ "rldicr", MD(30,1,0), MD_MASK, PPC64, { RA, RS, SH6, ME6 } }, +{ "rldicr.", MD(30,1,1), MD_MASK, PPC64, { RA, RS, SH6, ME6 } }, + +{ "rldic", MD(30,2,0), MD_MASK, PPC64, { RA, RS, SH6, MB6 } }, +{ "rldic.", MD(30,2,1), MD_MASK, PPC64, { RA, RS, SH6, MB6 } }, + +{ "rldimi", MD(30,3,0), MD_MASK, PPC64, { RA, RS, SH6, MB6 } }, +{ "rldimi.", MD(30,3,1), MD_MASK, PPC64, { RA, RS, SH6, MB6 } }, + +{ "rotld", MDS(30,8,0), MDSMB_MASK, PPC64, { RA, RS, RB } }, +{ "rldcl", MDS(30,8,0), MDS_MASK, PPC64, { RA, RS, RB, MB6 } }, +{ "rotld.", MDS(30,8,1), MDSMB_MASK, PPC64, { RA, RS, RB } }, +{ "rldcl.", MDS(30,8,1), MDS_MASK, PPC64, { RA, RS, RB, MB6 } }, + +{ "rldcr", MDS(30,9,0), MDS_MASK, PPC64, { RA, RS, RB, ME6 } }, +{ "rldcr.", MDS(30,9,1), MDS_MASK, PPC64, { RA, RS, RB, ME6 } }, + +{ "cmpw", XCMPL(31,0,0), XCMPL_MASK, PPCCOM, { OBF, RA, RB } }, +{ "cmpd", XCMPL(31,0,1), XCMPL_MASK, PPC64, { OBF, RA, RB } }, +{ "cmp", X(31,0), XCMP_MASK, PPC, { BF, L, RA, RB } }, +{ "cmp", X(31,0), XCMPL_MASK, PWRCOM, { BF, RA, RB } }, + +{ "twlgt", XTO(31,4,TOLGT), XTO_MASK, PPCCOM, { RA, RB } }, +{ "tlgt", XTO(31,4,TOLGT), XTO_MASK, PWRCOM, { RA, RB } }, +{ "twllt", XTO(31,4,TOLLT), XTO_MASK, PPCCOM, { RA, RB } }, +{ "tllt", XTO(31,4,TOLLT), XTO_MASK, PWRCOM, { RA, RB } }, +{ "tweq", XTO(31,4,TOEQ), XTO_MASK, PPCCOM, { RA, RB } }, +{ "teq", XTO(31,4,TOEQ), XTO_MASK, PWRCOM, { RA, RB } }, +{ "twlge", XTO(31,4,TOLGE), XTO_MASK, PPCCOM, { RA, RB } }, +{ "tlge", XTO(31,4,TOLGE), XTO_MASK, PWRCOM, { RA, RB } }, +{ "twlnl", XTO(31,4,TOLNL), XTO_MASK, PPCCOM, { RA, RB } }, +{ "tlnl", XTO(31,4,TOLNL), XTO_MASK, PWRCOM, { RA, RB } }, +{ "twlle", XTO(31,4,TOLLE), XTO_MASK, PPCCOM, { RA, RB } }, +{ "tlle", XTO(31,4,TOLLE), XTO_MASK, PWRCOM, { RA, RB } }, +{ "twlng", XTO(31,4,TOLNG), XTO_MASK, PPCCOM, { RA, RB } }, +{ "tlng", XTO(31,4,TOLNG), XTO_MASK, PWRCOM, { RA, RB } }, +{ "twgt", XTO(31,4,TOGT), XTO_MASK, PPCCOM, { RA, RB } }, +{ "tgt", XTO(31,4,TOGT), XTO_MASK, PWRCOM, { RA, RB } }, +{ "twge", XTO(31,4,TOGE), XTO_MASK, PPCCOM, { RA, RB } }, +{ "tge", XTO(31,4,TOGE), XTO_MASK, PWRCOM, { RA, RB } }, +{ "twnl", XTO(31,4,TONL), XTO_MASK, PPCCOM, { RA, RB } }, +{ "tnl", XTO(31,4,TONL), XTO_MASK, PWRCOM, { RA, RB } }, +{ "twlt", XTO(31,4,TOLT), XTO_MASK, PPCCOM, { RA, RB } }, +{ "tlt", XTO(31,4,TOLT), XTO_MASK, PWRCOM, { RA, RB } }, +{ "twle", XTO(31,4,TOLE), XTO_MASK, PPCCOM, { RA, RB } }, +{ "tle", XTO(31,4,TOLE), XTO_MASK, PWRCOM, { RA, RB } }, +{ "twng", XTO(31,4,TONG), XTO_MASK, PPCCOM, { RA, RB } }, +{ "tng", XTO(31,4,TONG), XTO_MASK, PWRCOM, { RA, RB } }, +{ "twne", XTO(31,4,TONE), XTO_MASK, PPCCOM, { RA, RB } }, +{ "tne", XTO(31,4,TONE), XTO_MASK, PWRCOM, { RA, RB } }, +{ "trap", XTO(31,4,TOU), 0xffffffff, PPCCOM, { 0 } }, +{ "tw", X(31,4), X_MASK, PPCCOM, { TO, RA, RB } }, +{ "t", X(31,4), X_MASK, PWRCOM, { TO, RA, RB } }, + +{ "subfc", XO(31,8,0,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "sf", XO(31,8,0,0), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "subc", XO(31,8,0,0), XO_MASK, PPC, { RT, RB, RA } }, +{ "subfc.", XO(31,8,0,1), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "sf.", XO(31,8,0,1), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "subc.", XO(31,8,0,1), XO_MASK, PPCCOM, { RT, RB, RA } }, +{ "subfco", XO(31,8,1,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "sfo", XO(31,8,1,0), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "subco", XO(31,8,1,0), XO_MASK, PPC, { RT, RB, RA } }, +{ "subfco.", XO(31,8,1,1), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "sfo.", XO(31,8,1,1), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "subco.", XO(31,8,1,1), XO_MASK, PPC, { RT, RB, RA } }, + +{ "mulhdu", XO(31,9,0,0), XO_MASK, PPC64, { RT, RA, RB } }, +{ "mulhdu.", XO(31,9,0,1), XO_MASK, PPC64, { RT, RA, RB } }, + +{ "addc", XO(31,10,0,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "a", XO(31,10,0,0), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "addc.", XO(31,10,0,1), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "a.", XO(31,10,0,1), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "addco", XO(31,10,1,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "ao", XO(31,10,1,0), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "addco.", XO(31,10,1,1), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "ao.", XO(31,10,1,1), XO_MASK, PWRCOM, { RT, RA, RB } }, + +{ "mulhwu", XO(31,11,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "mulhwu.", XO(31,11,0,1), XO_MASK, PPC, { RT, RA, RB } }, + +{ "isellt", X(31,15), X_MASK, PPCISEL, { RT, RA, RB } }, +{ "iselgt", X(31,47), X_MASK, PPCISEL, { RT, RA, RB } }, +{ "iseleq", X(31,79), X_MASK, PPCISEL, { RT, RA, RB } }, +{ "isel", XISEL(31,15), XISEL_MASK, PPCISEL, { RT, RA, RB, CRB } }, + +{ "mfcr", X(31,19), XRARB_MASK, NOPOWER4, { RT } }, +{ "mfcr", X(31,19), XFXFXM_MASK, POWER4, { RT, FXM4 } }, + +{ "lwarx", X(31,20), X_MASK, PPC, { RT, RA, RB } }, + +{ "ldx", X(31,21), X_MASK, PPC64, { RT, RA, RB } }, + +{ "icbt", X(31,22), X_MASK, BOOKE, { CT, RA, RB } }, +{ "icbt", X(31,262), XRT_MASK, PPC403, { RA, RB } }, + +{ "lwzx", X(31,23), X_MASK, PPCCOM, { RT, RA, RB } }, +{ "lx", X(31,23), X_MASK, PWRCOM, { RT, RA, RB } }, + +{ "slw", XRC(31,24,0), X_MASK, PPCCOM, { RA, RS, RB } }, +{ "sl", XRC(31,24,0), X_MASK, PWRCOM, { RA, RS, RB } }, +{ "slw.", XRC(31,24,1), X_MASK, PPCCOM, { RA, RS, RB } }, +{ "sl.", XRC(31,24,1), X_MASK, PWRCOM, { RA, RS, RB } }, + +{ "cntlzw", XRC(31,26,0), XRB_MASK, PPCCOM, { RA, RS } }, +{ "cntlz", XRC(31,26,0), XRB_MASK, PWRCOM, { RA, RS } }, +{ "cntlzw.", XRC(31,26,1), XRB_MASK, PPCCOM, { RA, RS } }, +{ "cntlz.", XRC(31,26,1), XRB_MASK, PWRCOM, { RA, RS } }, + +{ "sld", XRC(31,27,0), X_MASK, PPC64, { RA, RS, RB } }, +{ "sld.", XRC(31,27,1), X_MASK, PPC64, { RA, RS, RB } }, + +{ "and", XRC(31,28,0), X_MASK, COM, { RA, RS, RB } }, +{ "and.", XRC(31,28,1), X_MASK, COM, { RA, RS, RB } }, + +{ "maskg", XRC(31,29,0), X_MASK, M601, { RA, RS, RB } }, +{ "maskg.", XRC(31,29,1), X_MASK, M601, { RA, RS, RB } }, + +{ "icbte", X(31,30), X_MASK, BOOKE64, { CT, RA, RB } }, + +{ "lwzxe", X(31,31), X_MASK, BOOKE64, { RT, RA, RB } }, + +{ "cmplw", XCMPL(31,32,0), XCMPL_MASK, PPCCOM, { OBF, RA, RB } }, +{ "cmpld", XCMPL(31,32,1), XCMPL_MASK, PPC64, { OBF, RA, RB } }, +{ "cmpl", X(31,32), XCMP_MASK, PPC, { BF, L, RA, RB } }, +{ "cmpl", X(31,32), XCMPL_MASK, PWRCOM, { BF, RA, RB } }, + +{ "subf", XO(31,40,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "sub", XO(31,40,0,0), XO_MASK, PPC, { RT, RB, RA } }, +{ "subf.", XO(31,40,0,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "sub.", XO(31,40,0,1), XO_MASK, PPC, { RT, RB, RA } }, +{ "subfo", XO(31,40,1,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "subo", XO(31,40,1,0), XO_MASK, PPC, { RT, RB, RA } }, +{ "subfo.", XO(31,40,1,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "subo.", XO(31,40,1,1), XO_MASK, PPC, { RT, RB, RA } }, + +{ "ldux", X(31,53), X_MASK, PPC64, { RT, RAL, RB } }, + +{ "dcbst", X(31,54), XRT_MASK, PPC, { RA, RB } }, + +{ "lwzux", X(31,55), X_MASK, PPCCOM, { RT, RAL, RB } }, +{ "lux", X(31,55), X_MASK, PWRCOM, { RT, RA, RB } }, + +{ "dcbste", X(31,62), XRT_MASK, BOOKE64, { RA, RB } }, + +{ "lwzuxe", X(31,63), X_MASK, BOOKE64, { RT, RAL, RB } }, + +{ "cntlzd", XRC(31,58,0), XRB_MASK, PPC64, { RA, RS } }, +{ "cntlzd.", XRC(31,58,1), XRB_MASK, PPC64, { RA, RS } }, + +{ "andc", XRC(31,60,0), X_MASK, COM, { RA, RS, RB } }, +{ "andc.", XRC(31,60,1), X_MASK, COM, { RA, RS, RB } }, + +{ "tdlgt", XTO(31,68,TOLGT), XTO_MASK, PPC64, { RA, RB } }, +{ "tdllt", XTO(31,68,TOLLT), XTO_MASK, PPC64, { RA, RB } }, +{ "tdeq", XTO(31,68,TOEQ), XTO_MASK, PPC64, { RA, RB } }, +{ "tdlge", XTO(31,68,TOLGE), XTO_MASK, PPC64, { RA, RB } }, +{ "tdlnl", XTO(31,68,TOLNL), XTO_MASK, PPC64, { RA, RB } }, +{ "tdlle", XTO(31,68,TOLLE), XTO_MASK, PPC64, { RA, RB } }, +{ "tdlng", XTO(31,68,TOLNG), XTO_MASK, PPC64, { RA, RB } }, +{ "tdgt", XTO(31,68,TOGT), XTO_MASK, PPC64, { RA, RB } }, +{ "tdge", XTO(31,68,TOGE), XTO_MASK, PPC64, { RA, RB } }, +{ "tdnl", XTO(31,68,TONL), XTO_MASK, PPC64, { RA, RB } }, +{ "tdlt", XTO(31,68,TOLT), XTO_MASK, PPC64, { RA, RB } }, +{ "tdle", XTO(31,68,TOLE), XTO_MASK, PPC64, { RA, RB } }, +{ "tdng", XTO(31,68,TONG), XTO_MASK, PPC64, { RA, RB } }, +{ "tdne", XTO(31,68,TONE), XTO_MASK, PPC64, { RA, RB } }, +{ "td", X(31,68), X_MASK, PPC64, { TO, RA, RB } }, + +{ "mulhd", XO(31,73,0,0), XO_MASK, PPC64, { RT, RA, RB } }, +{ "mulhd.", XO(31,73,0,1), XO_MASK, PPC64, { RT, RA, RB } }, + +{ "mulhw", XO(31,75,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "mulhw.", XO(31,75,0,1), XO_MASK, PPC, { RT, RA, RB } }, + +{ "dlmzb", XRC(31,78,0), X_MASK, PPC403|PPC440, { RA, RS, RB } }, +{ "dlmzb.", XRC(31,78,1), X_MASK, PPC403|PPC440, { RA, RS, RB } }, + +{ "mtsrd", X(31,82), XRB_MASK|(1<<20), PPC64, { SR, RS } }, + +{ "mfmsr", X(31,83), XRARB_MASK, COM, { RT } }, + +{ "ldarx", X(31,84), X_MASK, PPC64, { RT, RA, RB } }, + +{ "dcbf", X(31,86), XRT_MASK, PPC, { RA, RB } }, + +{ "lbzx", X(31,87), X_MASK, COM, { RT, RA, RB } }, + +{ "dcbfe", X(31,94), XRT_MASK, BOOKE64, { RA, RB } }, + +{ "lbzxe", X(31,95), X_MASK, BOOKE64, { RT, RA, RB } }, + +{ "neg", XO(31,104,0,0), XORB_MASK, COM, { RT, RA } }, +{ "neg.", XO(31,104,0,1), XORB_MASK, COM, { RT, RA } }, +{ "nego", XO(31,104,1,0), XORB_MASK, COM, { RT, RA } }, +{ "nego.", XO(31,104,1,1), XORB_MASK, COM, { RT, RA } }, + +{ "mul", XO(31,107,0,0), XO_MASK, M601, { RT, RA, RB } }, +{ "mul.", XO(31,107,0,1), XO_MASK, M601, { RT, RA, RB } }, +{ "mulo", XO(31,107,1,0), XO_MASK, M601, { RT, RA, RB } }, +{ "mulo.", XO(31,107,1,1), XO_MASK, M601, { RT, RA, RB } }, + +{ "mtsrdin", X(31,114), XRA_MASK, PPC64, { RS, RB } }, + +{ "clf", X(31,118), XTO_MASK, POWER, { RA, RB } }, + +{ "lbzux", X(31,119), X_MASK, COM, { RT, RAL, RB } }, + +{ "not", XRC(31,124,0), X_MASK, COM, { RA, RS, RBS } }, +{ "nor", XRC(31,124,0), X_MASK, COM, { RA, RS, RB } }, +{ "not.", XRC(31,124,1), X_MASK, COM, { RA, RS, RBS } }, +{ "nor.", XRC(31,124,1), X_MASK, COM, { RA, RS, RB } }, + +{ "lwarxe", X(31,126), X_MASK, BOOKE64, { RT, RA, RB } }, + +{ "lbzuxe", X(31,127), X_MASK, BOOKE64, { RT, RAL, RB } }, + +{ "wrtee", X(31,131), XRARB_MASK, PPC403 | BOOKE, { RS } }, + +{ "dcbtstls",X(31,134), X_MASK, PPCCHLK, { CT, RA, RB }}, + +{ "subfe", XO(31,136,0,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "sfe", XO(31,136,0,0), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "subfe.", XO(31,136,0,1), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "sfe.", XO(31,136,0,1), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "subfeo", XO(31,136,1,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "sfeo", XO(31,136,1,0), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "subfeo.", XO(31,136,1,1), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "sfeo.", XO(31,136,1,1), XO_MASK, PWRCOM, { RT, RA, RB } }, + +{ "adde", XO(31,138,0,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "ae", XO(31,138,0,0), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "adde.", XO(31,138,0,1), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "ae.", XO(31,138,0,1), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "addeo", XO(31,138,1,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "aeo", XO(31,138,1,0), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "addeo.", XO(31,138,1,1), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "aeo.", XO(31,138,1,1), XO_MASK, PWRCOM, { RT, RA, RB } }, + +{ "dcbtstlse",X(31,142),X_MASK, PPCCHLK64, { CT, RA, RB }}, + +{ "mtcr", XFXM(31,144,0xff), XRARB_MASK, COM, { RS }}, +{ "mtcrf", X(31,144), XFXFXM_MASK, COM, { FXM, RS } }, + +{ "mtmsr", X(31,146), XRARB_MASK, COM, { RS } }, + +{ "stdx", X(31,149), X_MASK, PPC64, { RS, RA, RB } }, + +{ "stwcx.", XRC(31,150,1), X_MASK, PPC, { RS, RA, RB } }, + +{ "stwx", X(31,151), X_MASK, PPCCOM, { RS, RA, RB } }, +{ "stx", X(31,151), X_MASK, PWRCOM, { RS, RA, RB } }, + +{ "stwcxe.", XRC(31,158,1), X_MASK, BOOKE64, { RS, RA, RB } }, + +{ "stwxe", X(31,159), X_MASK, BOOKE64, { RS, RA, RB } }, + +{ "slq", XRC(31,152,0), X_MASK, M601, { RA, RS, RB } }, +{ "slq.", XRC(31,152,1), X_MASK, M601, { RA, RS, RB } }, + +{ "sle", XRC(31,153,0), X_MASK, M601, { RA, RS, RB } }, +{ "sle.", XRC(31,153,1), X_MASK, M601, { RA, RS, RB } }, + +{ "wrteei", X(31,163), XE_MASK, PPC403 | BOOKE, { E } }, + +{ "dcbtls", X(31,166), X_MASK, PPCCHLK, { CT, RA, RB }}, +{ "dcbtlse", X(31,174), X_MASK, PPCCHLK64, { CT, RA, RB }}, + +{ "mtmsrd", X(31,178), XRLARB_MASK, PPC64, { RS, MTMSRD_L } }, + +{ "stdux", X(31,181), X_MASK, PPC64, { RS, RAS, RB } }, + +{ "stwux", X(31,183), X_MASK, PPCCOM, { RS, RAS, RB } }, +{ "stux", X(31,183), X_MASK, PWRCOM, { RS, RA, RB } }, + +{ "sliq", XRC(31,184,0), X_MASK, M601, { RA, RS, SH } }, +{ "sliq.", XRC(31,184,1), X_MASK, M601, { RA, RS, SH } }, + +{ "stwuxe", X(31,191), X_MASK, BOOKE64, { RS, RAS, RB } }, + +{ "subfze", XO(31,200,0,0), XORB_MASK, PPCCOM, { RT, RA } }, +{ "sfze", XO(31,200,0,0), XORB_MASK, PWRCOM, { RT, RA } }, +{ "subfze.", XO(31,200,0,1), XORB_MASK, PPCCOM, { RT, RA } }, +{ "sfze.", XO(31,200,0,1), XORB_MASK, PWRCOM, { RT, RA } }, +{ "subfzeo", XO(31,200,1,0), XORB_MASK, PPCCOM, { RT, RA } }, +{ "sfzeo", XO(31,200,1,0), XORB_MASK, PWRCOM, { RT, RA } }, +{ "subfzeo.",XO(31,200,1,1), XORB_MASK, PPCCOM, { RT, RA } }, +{ "sfzeo.", XO(31,200,1,1), XORB_MASK, PWRCOM, { RT, RA } }, + +{ "addze", XO(31,202,0,0), XORB_MASK, PPCCOM, { RT, RA } }, +{ "aze", XO(31,202,0,0), XORB_MASK, PWRCOM, { RT, RA } }, +{ "addze.", XO(31,202,0,1), XORB_MASK, PPCCOM, { RT, RA } }, +{ "aze.", XO(31,202,0,1), XORB_MASK, PWRCOM, { RT, RA } }, +{ "addzeo", XO(31,202,1,0), XORB_MASK, PPCCOM, { RT, RA } }, +{ "azeo", XO(31,202,1,0), XORB_MASK, PWRCOM, { RT, RA } }, +{ "addzeo.", XO(31,202,1,1), XORB_MASK, PPCCOM, { RT, RA } }, +{ "azeo.", XO(31,202,1,1), XORB_MASK, PWRCOM, { RT, RA } }, + +{ "mtsr", X(31,210), XRB_MASK|(1<<20), COM32, { SR, RS } }, + +{ "stdcx.", XRC(31,214,1), X_MASK, PPC64, { RS, RA, RB } }, + +{ "stbx", X(31,215), X_MASK, COM, { RS, RA, RB } }, + +{ "sllq", XRC(31,216,0), X_MASK, M601, { RA, RS, RB } }, +{ "sllq.", XRC(31,216,1), X_MASK, M601, { RA, RS, RB } }, + +{ "sleq", XRC(31,217,0), X_MASK, M601, { RA, RS, RB } }, +{ "sleq.", XRC(31,217,1), X_MASK, M601, { RA, RS, RB } }, + +{ "stbxe", X(31,223), X_MASK, BOOKE64, { RS, RA, RB } }, + +{ "icblc", X(31,230), X_MASK, PPCCHLK, { CT, RA, RB }}, + +{ "subfme", XO(31,232,0,0), XORB_MASK, PPCCOM, { RT, RA } }, +{ "sfme", XO(31,232,0,0), XORB_MASK, PWRCOM, { RT, RA } }, +{ "subfme.", XO(31,232,0,1), XORB_MASK, PPCCOM, { RT, RA } }, +{ "sfme.", XO(31,232,0,1), XORB_MASK, PWRCOM, { RT, RA } }, +{ "subfmeo", XO(31,232,1,0), XORB_MASK, PPCCOM, { RT, RA } }, +{ "sfmeo", XO(31,232,1,0), XORB_MASK, PWRCOM, { RT, RA } }, +{ "subfmeo.",XO(31,232,1,1), XORB_MASK, PPCCOM, { RT, RA } }, +{ "sfmeo.", XO(31,232,1,1), XORB_MASK, PWRCOM, { RT, RA } }, + +{ "mulld", XO(31,233,0,0), XO_MASK, PPC64, { RT, RA, RB } }, +{ "mulld.", XO(31,233,0,1), XO_MASK, PPC64, { RT, RA, RB } }, +{ "mulldo", XO(31,233,1,0), XO_MASK, PPC64, { RT, RA, RB } }, +{ "mulldo.", XO(31,233,1,1), XO_MASK, PPC64, { RT, RA, RB } }, + +{ "addme", XO(31,234,0,0), XORB_MASK, PPCCOM, { RT, RA } }, +{ "ame", XO(31,234,0,0), XORB_MASK, PWRCOM, { RT, RA } }, +{ "addme.", XO(31,234,0,1), XORB_MASK, PPCCOM, { RT, RA } }, +{ "ame.", XO(31,234,0,1), XORB_MASK, PWRCOM, { RT, RA } }, +{ "addmeo", XO(31,234,1,0), XORB_MASK, PPCCOM, { RT, RA } }, +{ "ameo", XO(31,234,1,0), XORB_MASK, PWRCOM, { RT, RA } }, +{ "addmeo.", XO(31,234,1,1), XORB_MASK, PPCCOM, { RT, RA } }, +{ "ameo.", XO(31,234,1,1), XORB_MASK, PWRCOM, { RT, RA } }, + +{ "mullw", XO(31,235,0,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "muls", XO(31,235,0,0), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "mullw.", XO(31,235,0,1), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "muls.", XO(31,235,0,1), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "mullwo", XO(31,235,1,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "mulso", XO(31,235,1,0), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "mullwo.", XO(31,235,1,1), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "mulso.", XO(31,235,1,1), XO_MASK, PWRCOM, { RT, RA, RB } }, + +{ "icblce", X(31,238), X_MASK, PPCCHLK64, { CT, RA, RB }}, +{ "mtsrin", X(31,242), XRA_MASK, PPC32, { RS, RB } }, +{ "mtsri", X(31,242), XRA_MASK, POWER32, { RS, RB } }, + +{ "dcbtst", X(31,246), XRT_MASK, PPC, { CT, RA, RB } }, + +{ "stbux", X(31,247), X_MASK, COM, { RS, RAS, RB } }, + +{ "slliq", XRC(31,248,0), X_MASK, M601, { RA, RS, SH } }, +{ "slliq.", XRC(31,248,1), X_MASK, M601, { RA, RS, SH } }, + +{ "dcbtste", X(31,253), X_MASK, BOOKE64, { CT, RA, RB } }, + +{ "stbuxe", X(31,255), X_MASK, BOOKE64, { RS, RAS, RB } }, + +{ "mfdcrx", X(31,259), X_MASK, BOOKE, { RS, RA } }, + +{ "doz", XO(31,264,0,0), XO_MASK, M601, { RT, RA, RB } }, +{ "doz.", XO(31,264,0,1), XO_MASK, M601, { RT, RA, RB } }, +{ "dozo", XO(31,264,1,0), XO_MASK, M601, { RT, RA, RB } }, +{ "dozo.", XO(31,264,1,1), XO_MASK, M601, { RT, RA, RB } }, + +{ "add", XO(31,266,0,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "cax", XO(31,266,0,0), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "add.", XO(31,266,0,1), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "cax.", XO(31,266,0,1), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "addo", XO(31,266,1,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "caxo", XO(31,266,1,0), XO_MASK, PWRCOM, { RT, RA, RB } }, +{ "addo.", XO(31,266,1,1), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "caxo.", XO(31,266,1,1), XO_MASK, PWRCOM, { RT, RA, RB } }, + +{ "tlbiel", X(31,274), XRTRA_MASK, POWER4, { RB } }, + +{ "mfapidi", X(31,275), X_MASK, BOOKE, { RT, RA } }, + +{ "lscbx", XRC(31,277,0), X_MASK, M601, { RT, RA, RB } }, +{ "lscbx.", XRC(31,277,1), X_MASK, M601, { RT, RA, RB } }, + +{ "dcbt", X(31,278), XRT_MASK, PPC, { CT, RA, RB } }, + +{ "lhzx", X(31,279), X_MASK, COM, { RT, RA, RB } }, + +{ "eqv", XRC(31,284,0), X_MASK, COM, { RA, RS, RB } }, +{ "eqv.", XRC(31,284,1), X_MASK, COM, { RA, RS, RB } }, + +{ "dcbte", X(31,286), X_MASK, BOOKE64, { CT, RA, RB } }, + +{ "lhzxe", X(31,287), X_MASK, BOOKE64, { RT, RA, RB } }, + +{ "tlbie", X(31,306), XRTLRA_MASK, PPC, { RB, L } }, +{ "tlbi", X(31,306), XRT_MASK, POWER, { RA, RB } }, + +{ "eciwx", X(31,310), X_MASK, PPC, { RT, RA, RB } }, + +{ "lhzux", X(31,311), X_MASK, COM, { RT, RAL, RB } }, + +{ "xor", XRC(31,316,0), X_MASK, COM, { RA, RS, RB } }, +{ "xor.", XRC(31,316,1), X_MASK, COM, { RA, RS, RB } }, + +{ "lhzuxe", X(31,319), X_MASK, BOOKE64, { RT, RAL, RB } }, + +{ "mfexisr", XSPR(31,323,64), XSPR_MASK, PPC403, { RT } }, +{ "mfexier", XSPR(31,323,66), XSPR_MASK, PPC403, { RT } }, +{ "mfbr0", XSPR(31,323,128), XSPR_MASK, PPC403, { RT } }, +{ "mfbr1", XSPR(31,323,129), XSPR_MASK, PPC403, { RT } }, +{ "mfbr2", XSPR(31,323,130), XSPR_MASK, PPC403, { RT } }, +{ "mfbr3", XSPR(31,323,131), XSPR_MASK, PPC403, { RT } }, +{ "mfbr4", XSPR(31,323,132), XSPR_MASK, PPC403, { RT } }, +{ "mfbr5", XSPR(31,323,133), XSPR_MASK, PPC403, { RT } }, +{ "mfbr6", XSPR(31,323,134), XSPR_MASK, PPC403, { RT } }, +{ "mfbr7", XSPR(31,323,135), XSPR_MASK, PPC403, { RT } }, +{ "mfbear", XSPR(31,323,144), XSPR_MASK, PPC403, { RT } }, +{ "mfbesr", XSPR(31,323,145), XSPR_MASK, PPC403, { RT } }, +{ "mfiocr", XSPR(31,323,160), XSPR_MASK, PPC403, { RT } }, +{ "mfdmacr0", XSPR(31,323,192), XSPR_MASK, PPC403, { RT } }, +{ "mfdmact0", XSPR(31,323,193), XSPR_MASK, PPC403, { RT } }, +{ "mfdmada0", XSPR(31,323,194), XSPR_MASK, PPC403, { RT } }, +{ "mfdmasa0", XSPR(31,323,195), XSPR_MASK, PPC403, { RT } }, +{ "mfdmacc0", XSPR(31,323,196), XSPR_MASK, PPC403, { RT } }, +{ "mfdmacr1", XSPR(31,323,200), XSPR_MASK, PPC403, { RT } }, +{ "mfdmact1", XSPR(31,323,201), XSPR_MASK, PPC403, { RT } }, +{ "mfdmada1", XSPR(31,323,202), XSPR_MASK, PPC403, { RT } }, +{ "mfdmasa1", XSPR(31,323,203), XSPR_MASK, PPC403, { RT } }, +{ "mfdmacc1", XSPR(31,323,204), XSPR_MASK, PPC403, { RT } }, +{ "mfdmacr2", XSPR(31,323,208), XSPR_MASK, PPC403, { RT } }, +{ "mfdmact2", XSPR(31,323,209), XSPR_MASK, PPC403, { RT } }, +{ "mfdmada2", XSPR(31,323,210), XSPR_MASK, PPC403, { RT } }, +{ "mfdmasa2", XSPR(31,323,211), XSPR_MASK, PPC403, { RT } }, +{ "mfdmacc2", XSPR(31,323,212), XSPR_MASK, PPC403, { RT } }, +{ "mfdmacr3", XSPR(31,323,216), XSPR_MASK, PPC403, { RT } }, +{ "mfdmact3", XSPR(31,323,217), XSPR_MASK, PPC403, { RT } }, +{ "mfdmada3", XSPR(31,323,218), XSPR_MASK, PPC403, { RT } }, +{ "mfdmasa3", XSPR(31,323,219), XSPR_MASK, PPC403, { RT } }, +{ "mfdmacc3", XSPR(31,323,220), XSPR_MASK, PPC403, { RT } }, +{ "mfdmasr", XSPR(31,323,224), XSPR_MASK, PPC403, { RT } }, +{ "mfdcr", X(31,323), X_MASK, PPC403 | BOOKE, { RT, SPR } }, + +{ "div", XO(31,331,0,0), XO_MASK, M601, { RT, RA, RB } }, +{ "div.", XO(31,331,0,1), XO_MASK, M601, { RT, RA, RB } }, +{ "divo", XO(31,331,1,0), XO_MASK, M601, { RT, RA, RB } }, +{ "divo.", XO(31,331,1,1), XO_MASK, M601, { RT, RA, RB } }, + +{ "mfpmr", X(31,334), X_MASK, PPCPMR, { RT, PMR }}, + +{ "mfmq", XSPR(31,339,0), XSPR_MASK, M601, { RT } }, +{ "mfxer", XSPR(31,339,1), XSPR_MASK, COM, { RT } }, +{ "mfrtcu", XSPR(31,339,4), XSPR_MASK, COM, { RT } }, +{ "mfrtcl", XSPR(31,339,5), XSPR_MASK, COM, { RT } }, +{ "mfdec", XSPR(31,339,6), XSPR_MASK, MFDEC1, { RT } }, +{ "mfdec", XSPR(31,339,22), XSPR_MASK, MFDEC2, { RT } }, +{ "mflr", XSPR(31,339,8), XSPR_MASK, COM, { RT } }, +{ "mfctr", XSPR(31,339,9), XSPR_MASK, COM, { RT } }, +{ "mftid", XSPR(31,339,17), XSPR_MASK, POWER, { RT } }, +{ "mfdsisr", XSPR(31,339,18), XSPR_MASK, COM, { RT } }, +{ "mfdar", XSPR(31,339,19), XSPR_MASK, COM, { RT } }, +{ "mfsdr0", XSPR(31,339,24), XSPR_MASK, POWER, { RT } }, +{ "mfsdr1", XSPR(31,339,25), XSPR_MASK, COM, { RT } }, +{ "mfsrr0", XSPR(31,339,26), XSPR_MASK, COM, { RT } }, +{ "mfsrr1", XSPR(31,339,27), XSPR_MASK, COM, { RT } }, +{ "mfpid", XSPR(31,339,48), XSPR_MASK, BOOKE, { RT } }, +{ "mfpid", XSPR(31,339,945), XSPR_MASK, PPC403, { RT } }, +{ "mfcsrr0", XSPR(31,339,58), XSPR_MASK, BOOKE, { RT } }, +{ "mfcsrr1", XSPR(31,339,59), XSPR_MASK, BOOKE, { RT } }, +{ "mfdear", XSPR(31,339,61), XSPR_MASK, BOOKE, { RT } }, +{ "mfdear", XSPR(31,339,981), XSPR_MASK, PPC403, { RT } }, +{ "mfesr", XSPR(31,339,62), XSPR_MASK, BOOKE, { RT } }, +{ "mfesr", XSPR(31,339,980), XSPR_MASK, PPC403, { RT } }, +{ "mfivpr", XSPR(31,339,63), XSPR_MASK, BOOKE, { RT } }, +{ "mfcmpa", XSPR(31,339,144), XSPR_MASK, PPC860, { RT } }, +{ "mfcmpb", XSPR(31,339,145), XSPR_MASK, PPC860, { RT } }, +{ "mfcmpc", XSPR(31,339,146), XSPR_MASK, PPC860, { RT } }, +{ "mfcmpd", XSPR(31,339,147), XSPR_MASK, PPC860, { RT } }, +{ "mficr", XSPR(31,339,148), XSPR_MASK, PPC860, { RT } }, +{ "mfder", XSPR(31,339,149), XSPR_MASK, PPC860, { RT } }, +{ "mfcounta", XSPR(31,339,150), XSPR_MASK, PPC860, { RT } }, +{ "mfcountb", XSPR(31,339,151), XSPR_MASK, PPC860, { RT } }, +{ "mfcmpe", XSPR(31,339,152), XSPR_MASK, PPC860, { RT } }, +{ "mfcmpf", XSPR(31,339,153), XSPR_MASK, PPC860, { RT } }, +{ "mfcmpg", XSPR(31,339,154), XSPR_MASK, PPC860, { RT } }, +{ "mfcmph", XSPR(31,339,155), XSPR_MASK, PPC860, { RT } }, +{ "mflctrl1", XSPR(31,339,156), XSPR_MASK, PPC860, { RT } }, +{ "mflctrl2", XSPR(31,339,157), XSPR_MASK, PPC860, { RT } }, +{ "mfictrl", XSPR(31,339,158), XSPR_MASK, PPC860, { RT } }, +{ "mfbar", XSPR(31,339,159), XSPR_MASK, PPC860, { RT } }, +{ "mfvrsave", XSPR(31,339,256), XSPR_MASK, PPCVEC, { RT } }, +{ "mfusprg0", XSPR(31,339,256), XSPR_MASK, BOOKE, { RT } }, +{ "mfsprg4", XSPR(31,339,260), XSPR_MASK, PPC405, { RT } }, +{ "mfsprg5", XSPR(31,339,261), XSPR_MASK, PPC405, { RT } }, +{ "mfsprg6", XSPR(31,339,262), XSPR_MASK, PPC405, { RT } }, +{ "mfsprg7", XSPR(31,339,263), XSPR_MASK, PPC405, { RT } }, +{ "mftb", X(31,371), X_MASK, CLASSIC, { RT, TBR } }, +{ "mftb", XSPR(31,339,268), XSPR_MASK, BOOKE, { RT } }, +{ "mftbl", XSPR(31,371,268), XSPR_MASK, CLASSIC, { RT } }, +{ "mftbl", XSPR(31,339,268), XSPR_MASK, BOOKE, { RT } }, +{ "mftbu", XSPR(31,371,269), XSPR_MASK, CLASSIC, { RT } }, +{ "mftbu", XSPR(31,339,269), XSPR_MASK, BOOKE, { RT } }, +{ "mfsprg", XSPR(31,339,272), XSPRG_MASK, PPC, { RT, SPRG } }, +{ "mfsprg0", XSPR(31,339,272), XSPR_MASK, PPC, { RT } }, +{ "mfsprg1", XSPR(31,339,273), XSPR_MASK, PPC, { RT } }, +{ "mfsprg2", XSPR(31,339,274), XSPR_MASK, PPC, { RT } }, +{ "mfsprg3", XSPR(31,339,275), XSPR_MASK, PPC, { RT } }, +{ "mfasr", XSPR(31,339,280), XSPR_MASK, PPC64, { RT } }, +{ "mfear", XSPR(31,339,282), XSPR_MASK, PPC, { RT } }, +{ "mfpir", XSPR(31,339,286), XSPR_MASK, BOOKE, { RT } }, +{ "mfpvr", XSPR(31,339,287), XSPR_MASK, PPC, { RT } }, +{ "mfdbsr", XSPR(31,339,304), XSPR_MASK, BOOKE, { RT } }, +{ "mfdbsr", XSPR(31,339,1008), XSPR_MASK, PPC403, { RT } }, +{ "mfdbcr0", XSPR(31,339,308), XSPR_MASK, BOOKE, { RT } }, +{ "mfdbcr0", XSPR(31,339,1010), XSPR_MASK, PPC405, { RT } }, +{ "mfdbcr1", XSPR(31,339,309), XSPR_MASK, BOOKE, { RT } }, +{ "mfdbcr1", XSPR(31,339,957), XSPR_MASK, PPC405, { RT } }, +{ "mfdbcr2", XSPR(31,339,310), XSPR_MASK, BOOKE, { RT } }, +{ "mfiac1", XSPR(31,339,312), XSPR_MASK, BOOKE, { RT } }, +{ "mfiac1", XSPR(31,339,1012), XSPR_MASK, PPC403, { RT } }, +{ "mfiac2", XSPR(31,339,313), XSPR_MASK, BOOKE, { RT } }, +{ "mfiac2", XSPR(31,339,1013), XSPR_MASK, PPC403, { RT } }, +{ "mfiac3", XSPR(31,339,314), XSPR_MASK, BOOKE, { RT } }, +{ "mfiac3", XSPR(31,339,948), XSPR_MASK, PPC405, { RT } }, +{ "mfiac4", XSPR(31,339,315), XSPR_MASK, BOOKE, { RT } }, +{ "mfiac4", XSPR(31,339,949), XSPR_MASK, PPC405, { RT } }, +{ "mfdac1", XSPR(31,339,316), XSPR_MASK, BOOKE, { RT } }, +{ "mfdac1", XSPR(31,339,1014), XSPR_MASK, PPC403, { RT } }, +{ "mfdac2", XSPR(31,339,317), XSPR_MASK, BOOKE, { RT } }, +{ "mfdac2", XSPR(31,339,1015), XSPR_MASK, PPC403, { RT } }, +{ "mfdvc1", XSPR(31,339,318), XSPR_MASK, BOOKE, { RT } }, +{ "mfdvc1", XSPR(31,339,950), XSPR_MASK, PPC405, { RT } }, +{ "mfdvc2", XSPR(31,339,319), XSPR_MASK, BOOKE, { RT } }, +{ "mfdvc2", XSPR(31,339,951), XSPR_MASK, PPC405, { RT } }, +{ "mftsr", XSPR(31,339,336), XSPR_MASK, BOOKE, { RT } }, +{ "mftsr", XSPR(31,339,984), XSPR_MASK, PPC403, { RT } }, +{ "mftcr", XSPR(31,339,340), XSPR_MASK, BOOKE, { RT } }, +{ "mftcr", XSPR(31,339,986), XSPR_MASK, PPC403, { RT } }, +{ "mfivor0", XSPR(31,339,400), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor1", XSPR(31,339,401), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor2", XSPR(31,339,402), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor3", XSPR(31,339,403), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor4", XSPR(31,339,404), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor5", XSPR(31,339,405), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor6", XSPR(31,339,406), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor7", XSPR(31,339,407), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor8", XSPR(31,339,408), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor9", XSPR(31,339,409), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor10", XSPR(31,339,410), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor11", XSPR(31,339,411), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor12", XSPR(31,339,412), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor13", XSPR(31,339,413), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor14", XSPR(31,339,414), XSPR_MASK, BOOKE, { RT } }, +{ "mfivor15", XSPR(31,339,415), XSPR_MASK, BOOKE, { RT } }, +{ "mfspefscr", XSPR(31,339,512), XSPR_MASK, PPCSPE, { RT } }, +{ "mfbbear", XSPR(31,339,513), XSPR_MASK, PPCBRLK, { RT } }, +{ "mfbbtar", XSPR(31,339,514), XSPR_MASK, PPCBRLK, { RT } }, +{ "mfibatu", XSPR(31,339,528), XSPRBAT_MASK, PPC, { RT, SPRBAT } }, +{ "mfibatl", XSPR(31,339,529), XSPRBAT_MASK, PPC, { RT, SPRBAT } }, +{ "mfdbatu", XSPR(31,339,536), XSPRBAT_MASK, PPC, { RT, SPRBAT } }, +{ "mfdbatl", XSPR(31,339,537), XSPRBAT_MASK, PPC, { RT, SPRBAT } }, +{ "mfic_cst", XSPR(31,339,560), XSPR_MASK, PPC860, { RT } }, +{ "mfic_adr", XSPR(31,339,561), XSPR_MASK, PPC860, { RT } }, +{ "mfic_dat", XSPR(31,339,562), XSPR_MASK, PPC860, { RT } }, +{ "mfdc_cst", XSPR(31,339,568), XSPR_MASK, PPC860, { RT } }, +{ "mfdc_adr", XSPR(31,339,569), XSPR_MASK, PPC860, { RT } }, +{ "mfdc_dat", XSPR(31,339,570), XSPR_MASK, PPC860, { RT } }, +{ "mfmcsrr0", XSPR(31,339,570), XSPR_MASK, PPCRFMCI, { RT } }, +{ "mfmcsrr1", XSPR(31,339,571), XSPR_MASK, PPCRFMCI, { RT } }, +{ "mfmcsr", XSPR(31,339,572), XSPR_MASK, PPCRFMCI, { RT } }, +{ "mfdpdr", XSPR(31,339,630), XSPR_MASK, PPC860, { RT } }, +{ "mfdpir", XSPR(31,339,631), XSPR_MASK, PPC860, { RT } }, +{ "mfimmr", XSPR(31,339,638), XSPR_MASK, PPC860, { RT } }, +{ "mfmi_ctr", XSPR(31,339,784), XSPR_MASK, PPC860, { RT } }, +{ "mfmi_ap", XSPR(31,339,786), XSPR_MASK, PPC860, { RT } }, +{ "mfmi_epn", XSPR(31,339,787), XSPR_MASK, PPC860, { RT } }, +{ "mfmi_twc", XSPR(31,339,789), XSPR_MASK, PPC860, { RT } }, +{ "mfmi_rpn", XSPR(31,339,790), XSPR_MASK, PPC860, { RT } }, +{ "mfmd_ctr", XSPR(31,339,792), XSPR_MASK, PPC860, { RT } }, +{ "mfm_casid", XSPR(31,339,793), XSPR_MASK, PPC860, { RT } }, +{ "mfmd_ap", XSPR(31,339,794), XSPR_MASK, PPC860, { RT } }, +{ "mfmd_epn", XSPR(31,339,795), XSPR_MASK, PPC860, { RT } }, +{ "mfmd_twb", XSPR(31,339,796), XSPR_MASK, PPC860, { RT } }, +{ "mfmd_twc", XSPR(31,339,797), XSPR_MASK, PPC860, { RT } }, +{ "mfmd_rpn", XSPR(31,339,798), XSPR_MASK, PPC860, { RT } }, +{ "mfm_tw", XSPR(31,339,799), XSPR_MASK, PPC860, { RT } }, +{ "mfmi_dbcam", XSPR(31,339,816), XSPR_MASK, PPC860, { RT } }, +{ "mfmi_dbram0",XSPR(31,339,817), XSPR_MASK, PPC860, { RT } }, +{ "mfmi_dbram1",XSPR(31,339,818), XSPR_MASK, PPC860, { RT } }, +{ "mfmd_dbcam", XSPR(31,339,824), XSPR_MASK, PPC860, { RT } }, +{ "mfmd_dbram0",XSPR(31,339,825), XSPR_MASK, PPC860, { RT } }, +{ "mfmd_dbram1",XSPR(31,339,826), XSPR_MASK, PPC860, { RT } }, +{ "mfummcr0", XSPR(31,339,936), XSPR_MASK, PPC750, { RT } }, +{ "mfupmc1", XSPR(31,339,937), XSPR_MASK, PPC750, { RT } }, +{ "mfupmc2", XSPR(31,339,938), XSPR_MASK, PPC750, { RT } }, +{ "mfusia", XSPR(31,339,939), XSPR_MASK, PPC750, { RT } }, +{ "mfummcr1", XSPR(31,339,940), XSPR_MASK, PPC750, { RT } }, +{ "mfupmc3", XSPR(31,339,941), XSPR_MASK, PPC750, { RT } }, +{ "mfupmc4", XSPR(31,339,942), XSPR_MASK, PPC750, { RT } }, +{ "mfzpr", XSPR(31,339,944), XSPR_MASK, PPC403, { RT } }, +{ "mfccr0", XSPR(31,339,947), XSPR_MASK, PPC405, { RT } }, +{ "mfmmcr0", XSPR(31,339,952), XSPR_MASK, PPC750, { RT } }, +{ "mfpmc1", XSPR(31,339,953), XSPR_MASK, PPC750, { RT } }, +{ "mfsgr", XSPR(31,339,953), XSPR_MASK, PPC403, { RT } }, +{ "mfpmc2", XSPR(31,339,954), XSPR_MASK, PPC750, { RT } }, +{ "mfdcwr", XSPR(31,339,954), XSPR_MASK, PPC403, { RT } }, +{ "mfsia", XSPR(31,339,955), XSPR_MASK, PPC750, { RT } }, +{ "mfsler", XSPR(31,339,955), XSPR_MASK, PPC405, { RT } }, +{ "mfmmcr1", XSPR(31,339,956), XSPR_MASK, PPC750, { RT } }, +{ "mfsu0r", XSPR(31,339,956), XSPR_MASK, PPC405, { RT } }, +{ "mfpmc3", XSPR(31,339,957), XSPR_MASK, PPC750, { RT } }, +{ "mfpmc4", XSPR(31,339,958), XSPR_MASK, PPC750, { RT } }, +{ "mficdbdr", XSPR(31,339,979), XSPR_MASK, PPC403, { RT } }, +{ "mfevpr", XSPR(31,339,982), XSPR_MASK, PPC403, { RT } }, +{ "mfcdbcr", XSPR(31,339,983), XSPR_MASK, PPC403, { RT } }, +{ "mfpit", XSPR(31,339,987), XSPR_MASK, PPC403, { RT } }, +{ "mftbhi", XSPR(31,339,988), XSPR_MASK, PPC403, { RT } }, +{ "mftblo", XSPR(31,339,989), XSPR_MASK, PPC403, { RT } }, +{ "mfsrr2", XSPR(31,339,990), XSPR_MASK, PPC403, { RT } }, +{ "mfsrr3", XSPR(31,339,991), XSPR_MASK, PPC403, { RT } }, +{ "mfl2cr", XSPR(31,339,1017), XSPR_MASK, PPC750, { RT } }, +{ "mfdccr", XSPR(31,339,1018), XSPR_MASK, PPC403, { RT } }, +{ "mficcr", XSPR(31,339,1019), XSPR_MASK, PPC403, { RT } }, +{ "mfictc", XSPR(31,339,1019), XSPR_MASK, PPC750, { RT } }, +{ "mfpbl1", XSPR(31,339,1020), XSPR_MASK, PPC403, { RT } }, +{ "mfthrm1", XSPR(31,339,1020), XSPR_MASK, PPC750, { RT } }, +{ "mfpbu1", XSPR(31,339,1021), XSPR_MASK, PPC403, { RT } }, +{ "mfthrm2", XSPR(31,339,1021), XSPR_MASK, PPC750, { RT } }, +{ "mfpbl2", XSPR(31,339,1022), XSPR_MASK, PPC403, { RT } }, +{ "mfthrm3", XSPR(31,339,1022), XSPR_MASK, PPC750, { RT } }, +{ "mfpbu2", XSPR(31,339,1023), XSPR_MASK, PPC403, { RT } }, +{ "mfspr", X(31,339), X_MASK, COM, { RT, SPR } }, + +{ "lwax", X(31,341), X_MASK, PPC64, { RT, RA, RB } }, + +{ "dst", XDSS(31,342,0), XDSS_MASK, PPCVEC, { RA, RB, STRM } }, +{ "dstt", XDSS(31,342,1), XDSS_MASK, PPCVEC, { RA, RB, STRM } }, + +{ "lhax", X(31,343), X_MASK, COM, { RT, RA, RB } }, + +{ "lhaxe", X(31,351), X_MASK, BOOKE64, { RT, RA, RB } }, + +{ "dstst", XDSS(31,374,0), XDSS_MASK, PPCVEC, { RA, RB, STRM } }, +{ "dststt", XDSS(31,374,1), XDSS_MASK, PPCVEC, { RA, RB, STRM } }, + +{ "dccci", X(31,454), XRT_MASK, PPC403|PPC440, { RA, RB } }, + +{ "abs", XO(31,360,0,0), XORB_MASK, M601, { RT, RA } }, +{ "abs.", XO(31,360,0,1), XORB_MASK, M601, { RT, RA } }, +{ "abso", XO(31,360,1,0), XORB_MASK, M601, { RT, RA } }, +{ "abso.", XO(31,360,1,1), XORB_MASK, M601, { RT, RA } }, + +{ "divs", XO(31,363,0,0), XO_MASK, M601, { RT, RA, RB } }, +{ "divs.", XO(31,363,0,1), XO_MASK, M601, { RT, RA, RB } }, +{ "divso", XO(31,363,1,0), XO_MASK, M601, { RT, RA, RB } }, +{ "divso.", XO(31,363,1,1), XO_MASK, M601, { RT, RA, RB } }, + +{ "tlbia", X(31,370), 0xffffffff, PPC, { 0 } }, + +{ "lwaux", X(31,373), X_MASK, PPC64, { RT, RAL, RB } }, + +{ "lhaux", X(31,375), X_MASK, COM, { RT, RAL, RB } }, + +{ "lhauxe", X(31,383), X_MASK, BOOKE64, { RT, RAL, RB } }, + +{ "mtdcrx", X(31,387), X_MASK, BOOKE, { RA, RS } }, + +{ "dcblc", X(31,390), X_MASK, PPCCHLK, { CT, RA, RB }}, + +{ "subfe64", XO(31,392,0,0), XO_MASK, BOOKE64, { RT, RA, RB } }, +{ "subfe64o",XO(31,392,1,0), XO_MASK, BOOKE64, { RT, RA, RB } }, + +{ "adde64", XO(31,394,0,0), XO_MASK, BOOKE64, { RT, RA, RB } }, +{ "adde64o", XO(31,394,1,0), XO_MASK, BOOKE64, { RT, RA, RB } }, + +{ "dcblce", X(31,398), X_MASK, PPCCHLK64, { CT, RA, RB }}, + +{ "slbmte", X(31,402), XRA_MASK, PPC64, { RS, RB } }, + +{ "sthx", X(31,407), X_MASK, COM, { RS, RA, RB } }, + +{ "lfqx", X(31,791), X_MASK, POWER2, { FRT, RA, RB } }, + +{ "lfqux", X(31,823), X_MASK, POWER2, { FRT, RA, RB } }, + +{ "stfqx", X(31,919), X_MASK, POWER2, { FRS, RA, RB } }, + +{ "stfqux", X(31,951), X_MASK, POWER2, { FRS, RA, RB } }, + +{ "orc", XRC(31,412,0), X_MASK, COM, { RA, RS, RB } }, +{ "orc.", XRC(31,412,1), X_MASK, COM, { RA, RS, RB } }, + +{ "sradi", XS(31,413,0), XS_MASK, PPC64, { RA, RS, SH6 } }, +{ "sradi.", XS(31,413,1), XS_MASK, PPC64, { RA, RS, SH6 } }, + +{ "sthxe", X(31,415), X_MASK, BOOKE64, { RS, RA, RB } }, + +{ "slbie", X(31,434), XRTRA_MASK, PPC64, { RB } }, + +{ "ecowx", X(31,438), X_MASK, PPC, { RT, RA, RB } }, + +{ "sthux", X(31,439), X_MASK, COM, { RS, RAS, RB } }, + +{ "sthuxe", X(31,447), X_MASK, BOOKE64, { RS, RAS, RB } }, + +{ "mr", XRC(31,444,0), X_MASK, COM, { RA, RS, RBS } }, +{ "or", XRC(31,444,0), X_MASK, COM, { RA, RS, RB } }, +{ "mr.", XRC(31,444,1), X_MASK, COM, { RA, RS, RBS } }, +{ "or.", XRC(31,444,1), X_MASK, COM, { RA, RS, RB } }, + +{ "mtexisr", XSPR(31,451,64), XSPR_MASK, PPC403, { RS } }, +{ "mtexier", XSPR(31,451,66), XSPR_MASK, PPC403, { RS } }, +{ "mtbr0", XSPR(31,451,128), XSPR_MASK, PPC403, { RS } }, +{ "mtbr1", XSPR(31,451,129), XSPR_MASK, PPC403, { RS } }, +{ "mtbr2", XSPR(31,451,130), XSPR_MASK, PPC403, { RS } }, +{ "mtbr3", XSPR(31,451,131), XSPR_MASK, PPC403, { RS } }, +{ "mtbr4", XSPR(31,451,132), XSPR_MASK, PPC403, { RS } }, +{ "mtbr5", XSPR(31,451,133), XSPR_MASK, PPC403, { RS } }, +{ "mtbr6", XSPR(31,451,134), XSPR_MASK, PPC403, { RS } }, +{ "mtbr7", XSPR(31,451,135), XSPR_MASK, PPC403, { RS } }, +{ "mtbear", XSPR(31,451,144), XSPR_MASK, PPC403, { RS } }, +{ "mtbesr", XSPR(31,451,145), XSPR_MASK, PPC403, { RS } }, +{ "mtiocr", XSPR(31,451,160), XSPR_MASK, PPC403, { RS } }, +{ "mtdmacr0", XSPR(31,451,192), XSPR_MASK, PPC403, { RS } }, +{ "mtdmact0", XSPR(31,451,193), XSPR_MASK, PPC403, { RS } }, +{ "mtdmada0", XSPR(31,451,194), XSPR_MASK, PPC403, { RS } }, +{ "mtdmasa0", XSPR(31,451,195), XSPR_MASK, PPC403, { RS } }, +{ "mtdmacc0", XSPR(31,451,196), XSPR_MASK, PPC403, { RS } }, +{ "mtdmacr1", XSPR(31,451,200), XSPR_MASK, PPC403, { RS } }, +{ "mtdmact1", XSPR(31,451,201), XSPR_MASK, PPC403, { RS } }, +{ "mtdmada1", XSPR(31,451,202), XSPR_MASK, PPC403, { RS } }, +{ "mtdmasa1", XSPR(31,451,203), XSPR_MASK, PPC403, { RS } }, +{ "mtdmacc1", XSPR(31,451,204), XSPR_MASK, PPC403, { RS } }, +{ "mtdmacr2", XSPR(31,451,208), XSPR_MASK, PPC403, { RS } }, +{ "mtdmact2", XSPR(31,451,209), XSPR_MASK, PPC403, { RS } }, +{ "mtdmada2", XSPR(31,451,210), XSPR_MASK, PPC403, { RS } }, +{ "mtdmasa2", XSPR(31,451,211), XSPR_MASK, PPC403, { RS } }, +{ "mtdmacc2", XSPR(31,451,212), XSPR_MASK, PPC403, { RS } }, +{ "mtdmacr3", XSPR(31,451,216), XSPR_MASK, PPC403, { RS } }, +{ "mtdmact3", XSPR(31,451,217), XSPR_MASK, PPC403, { RS } }, +{ "mtdmada3", XSPR(31,451,218), XSPR_MASK, PPC403, { RS } }, +{ "mtdmasa3", XSPR(31,451,219), XSPR_MASK, PPC403, { RS } }, +{ "mtdmacc3", XSPR(31,451,220), XSPR_MASK, PPC403, { RS } }, +{ "mtdmasr", XSPR(31,451,224), XSPR_MASK, PPC403, { RS } }, +{ "mtdcr", X(31,451), X_MASK, PPC403 | BOOKE, { SPR, RS } }, + +{ "subfze64",XO(31,456,0,0), XORB_MASK, BOOKE64, { RT, RA } }, +{ "subfze64o",XO(31,456,1,0), XORB_MASK, BOOKE64, { RT, RA } }, + +{ "divdu", XO(31,457,0,0), XO_MASK, PPC64, { RT, RA, RB } }, +{ "divdu.", XO(31,457,0,1), XO_MASK, PPC64, { RT, RA, RB } }, +{ "divduo", XO(31,457,1,0), XO_MASK, PPC64, { RT, RA, RB } }, +{ "divduo.", XO(31,457,1,1), XO_MASK, PPC64, { RT, RA, RB } }, + +{ "addze64", XO(31,458,0,0), XORB_MASK, BOOKE64, { RT, RA } }, +{ "addze64o",XO(31,458,1,0), XORB_MASK, BOOKE64, { RT, RA } }, + +{ "divwu", XO(31,459,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "divwu.", XO(31,459,0,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "divwuo", XO(31,459,1,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "divwuo.", XO(31,459,1,1), XO_MASK, PPC, { RT, RA, RB } }, + +{ "mtmq", XSPR(31,467,0), XSPR_MASK, M601, { RS } }, +{ "mtxer", XSPR(31,467,1), XSPR_MASK, COM, { RS } }, +{ "mtlr", XSPR(31,467,8), XSPR_MASK, COM, { RS } }, +{ "mtctr", XSPR(31,467,9), XSPR_MASK, COM, { RS } }, +{ "mttid", XSPR(31,467,17), XSPR_MASK, POWER, { RS } }, +{ "mtdsisr", XSPR(31,467,18), XSPR_MASK, COM, { RS } }, +{ "mtdar", XSPR(31,467,19), XSPR_MASK, COM, { RS } }, +{ "mtrtcu", XSPR(31,467,20), XSPR_MASK, COM, { RS } }, +{ "mtrtcl", XSPR(31,467,21), XSPR_MASK, COM, { RS } }, +{ "mtdec", XSPR(31,467,22), XSPR_MASK, COM, { RS } }, +{ "mtsdr0", XSPR(31,467,24), XSPR_MASK, POWER, { RS } }, +{ "mtsdr1", XSPR(31,467,25), XSPR_MASK, COM, { RS } }, +{ "mtsrr0", XSPR(31,467,26), XSPR_MASK, COM, { RS } }, +{ "mtsrr1", XSPR(31,467,27), XSPR_MASK, COM, { RS } }, +{ "mtpid", XSPR(31,467,48), XSPR_MASK, BOOKE, { RS } }, +{ "mtpid", XSPR(31,467,945), XSPR_MASK, PPC403, { RS } }, +{ "mtdecar", XSPR(31,467,54), XSPR_MASK, BOOKE, { RS } }, +{ "mtcsrr0", XSPR(31,467,58), XSPR_MASK, BOOKE, { RS } }, +{ "mtcsrr1", XSPR(31,467,59), XSPR_MASK, BOOKE, { RS } }, +{ "mtdear", XSPR(31,467,61), XSPR_MASK, BOOKE, { RS } }, +{ "mtdear", XSPR(31,467,981), XSPR_MASK, PPC403, { RS } }, +{ "mtesr", XSPR(31,467,62), XSPR_MASK, BOOKE, { RS } }, +{ "mtesr", XSPR(31,467,980), XSPR_MASK, PPC403, { RS } }, +{ "mtivpr", XSPR(31,467,63), XSPR_MASK, BOOKE, { RS } }, +{ "mtcmpa", XSPR(31,467,144), XSPR_MASK, PPC860, { RS } }, +{ "mtcmpb", XSPR(31,467,145), XSPR_MASK, PPC860, { RS } }, +{ "mtcmpc", XSPR(31,467,146), XSPR_MASK, PPC860, { RS } }, +{ "mtcmpd", XSPR(31,467,147), XSPR_MASK, PPC860, { RS } }, +{ "mticr", XSPR(31,467,148), XSPR_MASK, PPC860, { RS } }, +{ "mtder", XSPR(31,467,149), XSPR_MASK, PPC860, { RS } }, +{ "mtcounta", XSPR(31,467,150), XSPR_MASK, PPC860, { RS } }, +{ "mtcountb", XSPR(31,467,151), XSPR_MASK, PPC860, { RS } }, +{ "mtcmpe", XSPR(31,467,152), XSPR_MASK, PPC860, { RS } }, +{ "mtcmpf", XSPR(31,467,153), XSPR_MASK, PPC860, { RS } }, +{ "mtcmpg", XSPR(31,467,154), XSPR_MASK, PPC860, { RS } }, +{ "mtcmph", XSPR(31,467,155), XSPR_MASK, PPC860, { RS } }, +{ "mtlctrl1", XSPR(31,467,156), XSPR_MASK, PPC860, { RS } }, +{ "mtlctrl2", XSPR(31,467,157), XSPR_MASK, PPC860, { RS } }, +{ "mtictrl", XSPR(31,467,158), XSPR_MASK, PPC860, { RS } }, +{ "mtbar", XSPR(31,467,159), XSPR_MASK, PPC860, { RS } }, +{ "mtvrsave", XSPR(31,467,256), XSPR_MASK, PPCVEC, { RS } }, +{ "mtusprg0", XSPR(31,467,256), XSPR_MASK, BOOKE, { RS } }, +{ "mtsprg", XSPR(31,467,272), XSPRG_MASK,PPC, { SPRG, RS } }, +{ "mtsprg0", XSPR(31,467,272), XSPR_MASK, PPC, { RS } }, +{ "mtsprg1", XSPR(31,467,273), XSPR_MASK, PPC, { RS } }, +{ "mtsprg2", XSPR(31,467,274), XSPR_MASK, PPC, { RS } }, +{ "mtsprg3", XSPR(31,467,275), XSPR_MASK, PPC, { RS } }, +{ "mtsprg4", XSPR(31,467,276), XSPR_MASK, PPC405 | BOOKE, { RS } }, +{ "mtsprg5", XSPR(31,467,277), XSPR_MASK, PPC405 | BOOKE, { RS } }, +{ "mtsprg6", XSPR(31,467,278), XSPR_MASK, PPC405 | BOOKE, { RS } }, +{ "mtsprg7", XSPR(31,467,279), XSPR_MASK, PPC405 | BOOKE, { RS } }, +{ "mtasr", XSPR(31,467,280), XSPR_MASK, PPC64, { RS } }, +{ "mtear", XSPR(31,467,282), XSPR_MASK, PPC, { RS } }, +{ "mttbl", XSPR(31,467,284), XSPR_MASK, PPC, { RS } }, +{ "mttbu", XSPR(31,467,285), XSPR_MASK, PPC, { RS } }, +{ "mtdbsr", XSPR(31,467,304), XSPR_MASK, BOOKE, { RS } }, +{ "mtdbsr", XSPR(31,467,1008), XSPR_MASK, PPC403, { RS } }, +{ "mtdbcr0", XSPR(31,467,308), XSPR_MASK, BOOKE, { RS } }, +{ "mtdbcr0", XSPR(31,467,1010), XSPR_MASK, PPC405, { RS } }, +{ "mtdbcr1", XSPR(31,467,309), XSPR_MASK, BOOKE, { RS } }, +{ "mtdbcr1", XSPR(31,467,957), XSPR_MASK, PPC405, { RS } }, +{ "mtdbcr2", XSPR(31,467,310), XSPR_MASK, BOOKE, { RS } }, +{ "mtiac1", XSPR(31,467,312), XSPR_MASK, BOOKE, { RS } }, +{ "mtiac1", XSPR(31,467,1012), XSPR_MASK, PPC403, { RS } }, +{ "mtiac2", XSPR(31,467,313), XSPR_MASK, BOOKE, { RS } }, +{ "mtiac2", XSPR(31,467,1013), XSPR_MASK, PPC403, { RS } }, +{ "mtiac3", XSPR(31,467,314), XSPR_MASK, BOOKE, { RS } }, +{ "mtiac3", XSPR(31,467,948), XSPR_MASK, PPC405, { RS } }, +{ "mtiac4", XSPR(31,467,315), XSPR_MASK, BOOKE, { RS } }, +{ "mtiac4", XSPR(31,467,949), XSPR_MASK, PPC405, { RS } }, +{ "mtdac1", XSPR(31,467,316), XSPR_MASK, BOOKE, { RS } }, +{ "mtdac1", XSPR(31,467,1014), XSPR_MASK, PPC403, { RS } }, +{ "mtdac2", XSPR(31,467,317), XSPR_MASK, BOOKE, { RS } }, +{ "mtdac2", XSPR(31,467,1015), XSPR_MASK, PPC403, { RS } }, +{ "mtdvc1", XSPR(31,467,318), XSPR_MASK, BOOKE, { RS } }, +{ "mtdvc1", XSPR(31,467,950), XSPR_MASK, PPC405, { RS } }, +{ "mtdvc2", XSPR(31,467,319), XSPR_MASK, BOOKE, { RS } }, +{ "mtdvc2", XSPR(31,467,951), XSPR_MASK, PPC405, { RS } }, +{ "mttsr", XSPR(31,467,336), XSPR_MASK, BOOKE, { RS } }, +{ "mttsr", XSPR(31,467,984), XSPR_MASK, PPC403, { RS } }, +{ "mttcr", XSPR(31,467,340), XSPR_MASK, BOOKE, { RS } }, +{ "mttcr", XSPR(31,467,986), XSPR_MASK, PPC403, { RS } }, +{ "mtivor0", XSPR(31,467,400), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor1", XSPR(31,467,401), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor2", XSPR(31,467,402), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor3", XSPR(31,467,403), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor4", XSPR(31,467,404), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor5", XSPR(31,467,405), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor6", XSPR(31,467,406), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor7", XSPR(31,467,407), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor8", XSPR(31,467,408), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor9", XSPR(31,467,409), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor10", XSPR(31,467,410), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor11", XSPR(31,467,411), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor12", XSPR(31,467,412), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor13", XSPR(31,467,413), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor14", XSPR(31,467,414), XSPR_MASK, BOOKE, { RS } }, +{ "mtivor15", XSPR(31,467,415), XSPR_MASK, BOOKE, { RS } }, +{ "mtspefscr", XSPR(31,467,512), XSPR_MASK, PPCSPE, { RS } }, +{ "mtbbear", XSPR(31,467,513), XSPR_MASK, PPCBRLK, { RS } }, +{ "mtbbtar", XSPR(31,467,514), XSPR_MASK, PPCBRLK, { RS } }, +{ "mtibatu", XSPR(31,467,528), XSPRBAT_MASK, PPC, { SPRBAT, RS } }, +{ "mtibatl", XSPR(31,467,529), XSPRBAT_MASK, PPC, { SPRBAT, RS } }, +{ "mtdbatu", XSPR(31,467,536), XSPRBAT_MASK, PPC, { SPRBAT, RS } }, +{ "mtdbatl", XSPR(31,467,537), XSPRBAT_MASK, PPC, { SPRBAT, RS } }, +{ "mtmcsrr0", XSPR(31,467,570), XSPR_MASK, PPCRFMCI, { RS } }, +{ "mtmcsrr1", XSPR(31,467,571), XSPR_MASK, PPCRFMCI, { RS } }, +{ "mtmcsr", XSPR(31,467,572), XSPR_MASK, PPCRFMCI, { RS } }, +{ "mtummcr0", XSPR(31,467,936), XSPR_MASK, PPC750, { RS } }, +{ "mtupmc1", XSPR(31,467,937), XSPR_MASK, PPC750, { RS } }, +{ "mtupmc2", XSPR(31,467,938), XSPR_MASK, PPC750, { RS } }, +{ "mtusia", XSPR(31,467,939), XSPR_MASK, PPC750, { RS } }, +{ "mtummcr1", XSPR(31,467,940), XSPR_MASK, PPC750, { RS } }, +{ "mtupmc3", XSPR(31,467,941), XSPR_MASK, PPC750, { RS } }, +{ "mtupmc4", XSPR(31,467,942), XSPR_MASK, PPC750, { RS } }, +{ "mtzpr", XSPR(31,467,944), XSPR_MASK, PPC403, { RS } }, +{ "mtccr0", XSPR(31,467,947), XSPR_MASK, PPC405, { RS } }, +{ "mtmmcr0", XSPR(31,467,952), XSPR_MASK, PPC750, { RS } }, +{ "mtsgr", XSPR(31,467,953), XSPR_MASK, PPC403, { RS } }, +{ "mtpmc1", XSPR(31,467,953), XSPR_MASK, PPC750, { RS } }, +{ "mtdcwr", XSPR(31,467,954), XSPR_MASK, PPC403, { RS } }, +{ "mtpmc2", XSPR(31,467,954), XSPR_MASK, PPC750, { RS } }, +{ "mtsler", XSPR(31,467,955), XSPR_MASK, PPC405, { RS } }, +{ "mtsia", XSPR(31,467,955), XSPR_MASK, PPC750, { RS } }, +{ "mtsu0r", XSPR(31,467,956), XSPR_MASK, PPC405, { RS } }, +{ "mtmmcr1", XSPR(31,467,956), XSPR_MASK, PPC750, { RS } }, +{ "mtpmc3", XSPR(31,467,957), XSPR_MASK, PPC750, { RS } }, +{ "mtpmc4", XSPR(31,467,958), XSPR_MASK, PPC750, { RS } }, +{ "mticdbdr", XSPR(31,467,979), XSPR_MASK, PPC403, { RS } }, +{ "mtevpr", XSPR(31,467,982), XSPR_MASK, PPC403, { RS } }, +{ "mtcdbcr", XSPR(31,467,983), XSPR_MASK, PPC403, { RS } }, +{ "mtpit", XSPR(31,467,987), XSPR_MASK, PPC403, { RS } }, +{ "mttbhi", XSPR(31,467,988), XSPR_MASK, PPC403, { RS } }, +{ "mttblo", XSPR(31,467,989), XSPR_MASK, PPC403, { RS } }, +{ "mtsrr2", XSPR(31,467,990), XSPR_MASK, PPC403, { RS } }, +{ "mtsrr3", XSPR(31,467,991), XSPR_MASK, PPC403, { RS } }, +{ "mtl2cr", XSPR(31,467,1017), XSPR_MASK, PPC750, { RS } }, +{ "mtdccr", XSPR(31,467,1018), XSPR_MASK, PPC403, { RS } }, +{ "mticcr", XSPR(31,467,1019), XSPR_MASK, PPC403, { RS } }, +{ "mtictc", XSPR(31,467,1019), XSPR_MASK, PPC750, { RS } }, +{ "mtpbl1", XSPR(31,467,1020), XSPR_MASK, PPC403, { RS } }, +{ "mtthrm1", XSPR(31,467,1020), XSPR_MASK, PPC750, { RS } }, +{ "mtpbu1", XSPR(31,467,1021), XSPR_MASK, PPC403, { RS } }, +{ "mtthrm2", XSPR(31,467,1021), XSPR_MASK, PPC750, { RS } }, +{ "mtpbl2", XSPR(31,467,1022), XSPR_MASK, PPC403, { RS } }, +{ "mtthrm3", XSPR(31,467,1022), XSPR_MASK, PPC750, { RS } }, +{ "mtpbu2", XSPR(31,467,1023), XSPR_MASK, PPC403, { RS } }, +{ "mtspr", X(31,467), X_MASK, COM, { SPR, RS } }, + +{ "dcbi", X(31,470), XRT_MASK, PPC, { RA, RB } }, + +{ "nand", XRC(31,476,0), X_MASK, COM, { RA, RS, RB } }, +{ "nand.", XRC(31,476,1), X_MASK, COM, { RA, RS, RB } }, + +{ "dcbie", X(31,478), XRT_MASK, BOOKE64, { RA, RB } }, + +{ "dcread", X(31,486), X_MASK, PPC403|PPC440, { RT, RA, RB }}, + +{ "mtpmr", X(31,462), X_MASK, PPCPMR, { PMR, RS }}, + +{ "icbtls", X(31,486), X_MASK, PPCCHLK, { CT, RA, RB }}, + +{ "nabs", XO(31,488,0,0), XORB_MASK, M601, { RT, RA } }, +{ "subfme64",XO(31,488,0,0), XORB_MASK, BOOKE64, { RT, RA } }, +{ "nabs.", XO(31,488,0,1), XORB_MASK, M601, { RT, RA } }, +{ "nabso", XO(31,488,1,0), XORB_MASK, M601, { RT, RA } }, +{ "subfme64o",XO(31,488,1,0), XORB_MASK, BOOKE64, { RT, RA } }, +{ "nabso.", XO(31,488,1,1), XORB_MASK, M601, { RT, RA } }, + +{ "divd", XO(31,489,0,0), XO_MASK, PPC64, { RT, RA, RB } }, +{ "divd.", XO(31,489,0,1), XO_MASK, PPC64, { RT, RA, RB } }, +{ "divdo", XO(31,489,1,0), XO_MASK, PPC64, { RT, RA, RB } }, +{ "divdo.", XO(31,489,1,1), XO_MASK, PPC64, { RT, RA, RB } }, + +{ "addme64", XO(31,490,0,0), XORB_MASK, BOOKE64, { RT, RA } }, +{ "addme64o",XO(31,490,1,0), XORB_MASK, BOOKE64, { RT, RA } }, + +{ "divw", XO(31,491,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "divw.", XO(31,491,0,1), XO_MASK, PPC, { RT, RA, RB } }, +{ "divwo", XO(31,491,1,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "divwo.", XO(31,491,1,1), XO_MASK, PPC, { RT, RA, RB } }, + +{ "icbtlse", X(31,494), X_MASK, PPCCHLK64, { CT, RA, RB }}, + +{ "slbia", X(31,498), 0xffffffff, PPC64, { 0 } }, + +{ "cli", X(31,502), XRB_MASK, POWER, { RT, RA } }, + +{ "stdcxe.", XRC(31,511,1), X_MASK, BOOKE64, { RS, RA, RB } }, + +{ "mcrxr", X(31,512), XRARB_MASK|(3<<21), COM, { BF } }, + +{ "bblels", X(31,518), X_MASK, PPCBRLK, { 0 }}, +{ "mcrxr64", X(31,544), XRARB_MASK|(3<<21), BOOKE64, { BF } }, + +{ "clcs", X(31,531), XRB_MASK, M601, { RT, RA } }, + +{ "lswx", X(31,533), X_MASK, PPCCOM, { RT, RA, RB } }, +{ "lsx", X(31,533), X_MASK, PWRCOM, { RT, RA, RB } }, + +{ "lwbrx", X(31,534), X_MASK, PPCCOM, { RT, RA, RB } }, +{ "lbrx", X(31,534), X_MASK, PWRCOM, { RT, RA, RB } }, + +{ "lfsx", X(31,535), X_MASK, COM, { FRT, RA, RB } }, + +{ "srw", XRC(31,536,0), X_MASK, PPCCOM, { RA, RS, RB } }, +{ "sr", XRC(31,536,0), X_MASK, PWRCOM, { RA, RS, RB } }, +{ "srw.", XRC(31,536,1), X_MASK, PPCCOM, { RA, RS, RB } }, +{ "sr.", XRC(31,536,1), X_MASK, PWRCOM, { RA, RS, RB } }, + +{ "rrib", XRC(31,537,0), X_MASK, M601, { RA, RS, RB } }, +{ "rrib.", XRC(31,537,1), X_MASK, M601, { RA, RS, RB } }, + +{ "srd", XRC(31,539,0), X_MASK, PPC64, { RA, RS, RB } }, +{ "srd.", XRC(31,539,1), X_MASK, PPC64, { RA, RS, RB } }, + +{ "maskir", XRC(31,541,0), X_MASK, M601, { RA, RS, RB } }, +{ "maskir.", XRC(31,541,1), X_MASK, M601, { RA, RS, RB } }, + +{ "lwbrxe", X(31,542), X_MASK, BOOKE64, { RT, RA, RB } }, + +{ "lfsxe", X(31,543), X_MASK, BOOKE64, { FRT, RA, RB } }, + +{ "bbelr", X(31,550), X_MASK, PPCBRLK, { 0 }}, +{ "tlbsync", X(31,566), 0xffffffff, PPC, { 0 } }, + +{ "lfsux", X(31,567), X_MASK, COM, { FRT, RAS, RB } }, + +{ "lfsuxe", X(31,575), X_MASK, BOOKE64, { FRT, RAS, RB } }, + +{ "mfsr", X(31,595), XRB_MASK|(1<<20), COM32, { RT, SR } }, + +{ "lswi", X(31,597), X_MASK, PPCCOM, { RT, RA, NB } }, +{ "lsi", X(31,597), X_MASK, PWRCOM, { RT, RA, NB } }, + +{ "lwsync", XSYNC(31,598,1), 0xffffffff, PPC, { 0 } }, +{ "ptesync", XSYNC(31,598,2), 0xffffffff, PPC64, { 0 } }, +{ "msync", X(31,598), 0xffffffff, BOOKE, { 0 } }, +{ "sync", X(31,598), XSYNC_MASK, PPCCOM, { LS } }, +{ "dcs", X(31,598), 0xffffffff, PWRCOM, { 0 } }, + +{ "lfdx", X(31,599), X_MASK, COM, { FRT, RA, RB } }, + +{ "lfdxe", X(31,607), X_MASK, BOOKE64, { FRT, RA, RB } }, + +{ "mfsri", X(31,627), X_MASK, PWRCOM, { RT, RA, RB } }, + +{ "dclst", X(31,630), XRB_MASK, PWRCOM, { RS, RA } }, + +{ "lfdux", X(31,631), X_MASK, COM, { FRT, RAS, RB } }, + +{ "lfduxe", X(31,639), X_MASK, BOOKE64, { FRT, RAS, RB } }, + +{ "mfsrin", X(31,659), XRA_MASK, PPC32, { RT, RB } }, + +{ "stswx", X(31,661), X_MASK, PPCCOM, { RS, RA, RB } }, +{ "stsx", X(31,661), X_MASK, PWRCOM, { RS, RA, RB } }, + +{ "stwbrx", X(31,662), X_MASK, PPCCOM, { RS, RA, RB } }, +{ "stbrx", X(31,662), X_MASK, PWRCOM, { RS, RA, RB } }, + +{ "stfsx", X(31,663), X_MASK, COM, { FRS, RA, RB } }, + +{ "srq", XRC(31,664,0), X_MASK, M601, { RA, RS, RB } }, +{ "srq.", XRC(31,664,1), X_MASK, M601, { RA, RS, RB } }, + +{ "sre", XRC(31,665,0), X_MASK, M601, { RA, RS, RB } }, +{ "sre.", XRC(31,665,1), X_MASK, M601, { RA, RS, RB } }, + +{ "stwbrxe", X(31,670), X_MASK, BOOKE64, { RS, RA, RB } }, + +{ "stfsxe", X(31,671), X_MASK, BOOKE64, { FRS, RA, RB } }, + +{ "stfsux", X(31,695), X_MASK, COM, { FRS, RAS, RB } }, + +{ "sriq", XRC(31,696,0), X_MASK, M601, { RA, RS, SH } }, +{ "sriq.", XRC(31,696,1), X_MASK, M601, { RA, RS, SH } }, + +{ "stfsuxe", X(31,703), X_MASK, BOOKE64, { FRS, RAS, RB } }, + +{ "stswi", X(31,725), X_MASK, PPCCOM, { RS, RA, NB } }, +{ "stsi", X(31,725), X_MASK, PWRCOM, { RS, RA, NB } }, + +{ "stfdx", X(31,727), X_MASK, COM, { FRS, RA, RB } }, + +{ "srlq", XRC(31,728,0), X_MASK, M601, { RA, RS, RB } }, +{ "srlq.", XRC(31,728,1), X_MASK, M601, { RA, RS, RB } }, + +{ "sreq", XRC(31,729,0), X_MASK, M601, { RA, RS, RB } }, +{ "sreq.", XRC(31,729,1), X_MASK, M601, { RA, RS, RB } }, + +{ "stfdxe", X(31,735), X_MASK, BOOKE64, { FRS, RA, RB } }, + +{ "dcba", X(31,758), XRT_MASK, PPC405 | BOOKE, { RA, RB } }, + +{ "stfdux", X(31,759), X_MASK, COM, { FRS, RAS, RB } }, + +{ "srliq", XRC(31,760,0), X_MASK, M601, { RA, RS, SH } }, +{ "srliq.", XRC(31,760,1), X_MASK, M601, { RA, RS, SH } }, + +{ "dcbae", X(31,766), XRT_MASK, BOOKE64, { RA, RB } }, + +{ "stfduxe", X(31,767), X_MASK, BOOKE64, { FRS, RAS, RB } }, + +{ "tlbivax", X(31,786), XRT_MASK, BOOKE, { RA, RB } }, +{ "tlbivaxe",X(31,787), XRT_MASK, BOOKE64, { RA, RB } }, + +{ "lhbrx", X(31,790), X_MASK, COM, { RT, RA, RB } }, + +{ "sraw", XRC(31,792,0), X_MASK, PPCCOM, { RA, RS, RB } }, +{ "sra", XRC(31,792,0), X_MASK, PWRCOM, { RA, RS, RB } }, +{ "sraw.", XRC(31,792,1), X_MASK, PPCCOM, { RA, RS, RB } }, +{ "sra.", XRC(31,792,1), X_MASK, PWRCOM, { RA, RS, RB } }, + +{ "srad", XRC(31,794,0), X_MASK, PPC64, { RA, RS, RB } }, +{ "srad.", XRC(31,794,1), X_MASK, PPC64, { RA, RS, RB } }, + +{ "lhbrxe", X(31,798), X_MASK, BOOKE64, { RT, RA, RB } }, + +{ "ldxe", X(31,799), X_MASK, BOOKE64, { RT, RA, RB } }, +{ "lduxe", X(31,831), X_MASK, BOOKE64, { RT, RA, RB } }, + +{ "rac", X(31,818), X_MASK, PWRCOM, { RT, RA, RB } }, + +{ "dss", XDSS(31,822,0), XDSS_MASK, PPCVEC, { STRM } }, +{ "dssall", XDSS(31,822,1), XDSS_MASK, PPCVEC, { 0 } }, + +{ "srawi", XRC(31,824,0), X_MASK, PPCCOM, { RA, RS, SH } }, +{ "srai", XRC(31,824,0), X_MASK, PWRCOM, { RA, RS, SH } }, +{ "srawi.", XRC(31,824,1), X_MASK, PPCCOM, { RA, RS, SH } }, +{ "srai.", XRC(31,824,1), X_MASK, PWRCOM, { RA, RS, SH } }, + +{ "slbmfev", X(31,851), XRA_MASK, PPC64, { RT, RB } }, + +{ "mbar", X(31,854), X_MASK, BOOKE, { MO } }, +{ "eieio", X(31,854), 0xffffffff, PPC, { 0 } }, + +{ "tlbsx", XRC(31,914,0), X_MASK, BOOKE, { RA, RB } }, +{ "tlbsx", XRC(31,914,0), X_MASK, PPC403, { RT, RA, RB } }, +{ "tlbsx.", XRC(31,914,1), X_MASK, BOOKE, { RA, RB } }, +{ "tlbsx.", XRC(31,914,1), X_MASK, PPC403, { RT, RA, RB } }, +{ "tlbsxe", XRC(31,915,0), X_MASK, BOOKE64, { RA, RB } }, +{ "tlbsxe.", XRC(31,915,1), X_MASK, BOOKE64, { RA, RB } }, + +{ "slbmfee", X(31,915), XRA_MASK, PPC64, { RT, RB } }, + +{ "sthbrx", X(31,918), X_MASK, COM, { RS, RA, RB } }, + +{ "sraq", XRC(31,920,0), X_MASK, M601, { RA, RS, RB } }, +{ "sraq.", XRC(31,920,1), X_MASK, M601, { RA, RS, RB } }, + +{ "srea", XRC(31,921,0), X_MASK, M601, { RA, RS, RB } }, +{ "srea.", XRC(31,921,1), X_MASK, M601, { RA, RS, RB } }, + +{ "extsh", XRC(31,922,0), XRB_MASK, PPCCOM, { RA, RS } }, +{ "exts", XRC(31,922,0), XRB_MASK, PWRCOM, { RA, RS } }, +{ "extsh.", XRC(31,922,1), XRB_MASK, PPCCOM, { RA, RS } }, +{ "exts.", XRC(31,922,1), XRB_MASK, PWRCOM, { RA, RS } }, + +{ "sthbrxe", X(31,926), X_MASK, BOOKE64, { RS, RA, RB } }, + +{ "stdxe", X(31,927), X_MASK, BOOKE64, { RS, RA, RB } }, + +{ "tlbrehi", XTLB(31,946,0), XTLB_MASK, PPC403, { RT, RA } }, +{ "tlbrelo", XTLB(31,946,1), XTLB_MASK, PPC403, { RT, RA } }, +{ "tlbre", X(31,946), X_MASK, BOOKE, { 0 } }, +{ "tlbre", X(31,946), X_MASK, PPC403, { RS, RA, SH } }, + +{ "sraiq", XRC(31,952,0), X_MASK, M601, { RA, RS, SH } }, +{ "sraiq.", XRC(31,952,1), X_MASK, M601, { RA, RS, SH } }, + +{ "extsb", XRC(31,954,0), XRB_MASK, PPC, { RA, RS} }, +{ "extsb.", XRC(31,954,1), XRB_MASK, PPC, { RA, RS} }, + +{ "stduxe", X(31,959), X_MASK, BOOKE64, { RS, RAS, RB } }, + +{ "iccci", X(31,966), XRT_MASK, PPC403|PPC440, { RA, RB } }, + +{ "tlbwehi", XTLB(31,978,0), XTLB_MASK, PPC403, { RT, RA } }, +{ "tlbwelo", XTLB(31,978,1), XTLB_MASK, PPC403, { RT, RA } }, +{ "tlbwe", X(31,978), X_MASK, BOOKE, { 0 } }, +{ "tlbwe", X(31,978), X_MASK, PPC403, { RS, RA, SH } }, +{ "tlbld", X(31,978), XRTRA_MASK, PPC, { RB } }, + +{ "icbi", X(31,982), XRT_MASK, PPC, { RA, RB } }, + +{ "stfiwx", X(31,983), X_MASK, PPC, { FRS, RA, RB } }, + +{ "extsw", XRC(31,986,0), XRB_MASK, PPC64 | BOOKE64,{ RA, RS } }, +{ "extsw.", XRC(31,986,1), XRB_MASK, PPC64, { RA, RS } }, + +{ "icread", X(31,998), XRT_MASK, PPC403|PPC440, { RA, RB } }, + +{ "icbie", X(31,990), XRT_MASK, BOOKE64, { RA, RB } }, +{ "stfiwxe", X(31,991), X_MASK, BOOKE64, { FRS, RA, RB } }, + +{ "tlbli", X(31,1010), XRTRA_MASK, PPC, { RB } }, + +{ "dcbz", X(31,1014), XRT_MASK, PPC, { RA, RB } }, +{ "dclz", X(31,1014), XRT_MASK, PPC, { RA, RB } }, + +{ "dcbze", X(31,1022), XRT_MASK, BOOKE64, { RA, RB } }, + +{ "lvebx", X(31, 7), X_MASK, PPCVEC, { VD, RA, RB } }, +{ "lvehx", X(31, 39), X_MASK, PPCVEC, { VD, RA, RB } }, +{ "lvewx", X(31, 71), X_MASK, PPCVEC, { VD, RA, RB } }, +{ "lvsl", X(31, 6), X_MASK, PPCVEC, { VD, RA, RB } }, +{ "lvsr", X(31, 38), X_MASK, PPCVEC, { VD, RA, RB } }, +{ "lvx", X(31, 103), X_MASK, PPCVEC, { VD, RA, RB } }, +{ "lvxl", X(31, 359), X_MASK, PPCVEC, { VD, RA, RB } }, +{ "stvebx", X(31, 135), X_MASK, PPCVEC, { VS, RA, RB } }, +{ "stvehx", X(31, 167), X_MASK, PPCVEC, { VS, RA, RB } }, +{ "stvewx", X(31, 199), X_MASK, PPCVEC, { VS, RA, RB } }, +{ "stvx", X(31, 231), X_MASK, PPCVEC, { VS, RA, RB } }, +{ "stvxl", X(31, 487), X_MASK, PPCVEC, { VS, RA, RB } }, + +{ "lwz", OP(32), OP_MASK, PPCCOM, { RT, D, RA } }, +{ "l", OP(32), OP_MASK, PWRCOM, { RT, D, RA } }, + +{ "lwzu", OP(33), OP_MASK, PPCCOM, { RT, D, RAL } }, +{ "lu", OP(33), OP_MASK, PWRCOM, { RT, D, RA } }, + +{ "lbz", OP(34), OP_MASK, COM, { RT, D, RA } }, + +{ "lbzu", OP(35), OP_MASK, COM, { RT, D, RAL } }, + +{ "stw", OP(36), OP_MASK, PPCCOM, { RS, D, RA } }, +{ "st", OP(36), OP_MASK, PWRCOM, { RS, D, RA } }, + +{ "stwu", OP(37), OP_MASK, PPCCOM, { RS, D, RAS } }, +{ "stu", OP(37), OP_MASK, PWRCOM, { RS, D, RA } }, + +{ "stb", OP(38), OP_MASK, COM, { RS, D, RA } }, + +{ "stbu", OP(39), OP_MASK, COM, { RS, D, RAS } }, + +{ "lhz", OP(40), OP_MASK, COM, { RT, D, RA } }, + +{ "lhzu", OP(41), OP_MASK, COM, { RT, D, RAL } }, + +{ "lha", OP(42), OP_MASK, COM, { RT, D, RA } }, + +{ "lhau", OP(43), OP_MASK, COM, { RT, D, RAL } }, + +{ "sth", OP(44), OP_MASK, COM, { RS, D, RA } }, + +{ "sthu", OP(45), OP_MASK, COM, { RS, D, RAS } }, + +{ "lmw", OP(46), OP_MASK, PPCCOM, { RT, D, RAM } }, +{ "lm", OP(46), OP_MASK, PWRCOM, { RT, D, RA } }, + +{ "stmw", OP(47), OP_MASK, PPCCOM, { RS, D, RA } }, +{ "stm", OP(47), OP_MASK, PWRCOM, { RS, D, RA } }, + +{ "lfs", OP(48), OP_MASK, COM, { FRT, D, RA } }, + +{ "lfsu", OP(49), OP_MASK, COM, { FRT, D, RAS } }, + +{ "lfd", OP(50), OP_MASK, COM, { FRT, D, RA } }, + +{ "lfdu", OP(51), OP_MASK, COM, { FRT, D, RAS } }, + +{ "stfs", OP(52), OP_MASK, COM, { FRS, D, RA } }, + +{ "stfsu", OP(53), OP_MASK, COM, { FRS, D, RAS } }, + +{ "stfd", OP(54), OP_MASK, COM, { FRS, D, RA } }, + +{ "stfdu", OP(55), OP_MASK, COM, { FRS, D, RAS } }, + +{ "lq", OP(56), OP_MASK, POWER4, { RTQ, DQ, RAQ } }, + +{ "lfq", OP(56), OP_MASK, POWER2, { FRT, D, RA } }, + +{ "lfqu", OP(57), OP_MASK, POWER2, { FRT, D, RA } }, + +{ "lbze", DEO(58,0), DE_MASK, BOOKE64, { RT, DE, RA } }, +{ "lbzue", DEO(58,1), DE_MASK, BOOKE64, { RT, DE, RAL } }, +{ "lhze", DEO(58,2), DE_MASK, BOOKE64, { RT, DE, RA } }, +{ "lhzue", DEO(58,3), DE_MASK, BOOKE64, { RT, DE, RAL } }, +{ "lhae", DEO(58,4), DE_MASK, BOOKE64, { RT, DE, RA } }, +{ "lhaue", DEO(58,5), DE_MASK, BOOKE64, { RT, DE, RAL } }, +{ "lwze", DEO(58,6), DE_MASK, BOOKE64, { RT, DE, RA } }, +{ "lwzue", DEO(58,7), DE_MASK, BOOKE64, { RT, DE, RAL } }, +{ "stbe", DEO(58,8), DE_MASK, BOOKE64, { RS, DE, RA } }, +{ "stbue", DEO(58,9), DE_MASK, BOOKE64, { RS, DE, RAS } }, +{ "sthe", DEO(58,10), DE_MASK, BOOKE64, { RS, DE, RA } }, +{ "sthue", DEO(58,11), DE_MASK, BOOKE64, { RS, DE, RAS } }, +{ "stwe", DEO(58,14), DE_MASK, BOOKE64, { RS, DE, RA } }, +{ "stwue", DEO(58,15), DE_MASK, BOOKE64, { RS, DE, RAS } }, + +{ "ld", DSO(58,0), DS_MASK, PPC64, { RT, DS, RA } }, + +{ "ldu", DSO(58,1), DS_MASK, PPC64, { RT, DS, RAL } }, + +{ "lwa", DSO(58,2), DS_MASK, PPC64, { RT, DS, RA } }, + +{ "fdivs", A(59,18,0), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fdivs.", A(59,18,1), AFRC_MASK, PPC, { FRT, FRA, FRB } }, + +{ "fsubs", A(59,20,0), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fsubs.", A(59,20,1), AFRC_MASK, PPC, { FRT, FRA, FRB } }, + +{ "fadds", A(59,21,0), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fadds.", A(59,21,1), AFRC_MASK, PPC, { FRT, FRA, FRB } }, + +{ "fsqrts", A(59,22,0), AFRAFRC_MASK, PPC, { FRT, FRB } }, +{ "fsqrts.", A(59,22,1), AFRAFRC_MASK, PPC, { FRT, FRB } }, + +{ "fres", A(59,24,0), AFRAFRC_MASK, PPC, { FRT, FRB } }, +{ "fres.", A(59,24,1), AFRAFRC_MASK, PPC, { FRT, FRB } }, + +{ "fmuls", A(59,25,0), AFRB_MASK, PPC, { FRT, FRA, FRC } }, +{ "fmuls.", A(59,25,1), AFRB_MASK, PPC, { FRT, FRA, FRC } }, + +{ "fmsubs", A(59,28,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fmsubs.", A(59,28,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, + +{ "fmadds", A(59,29,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fmadds.", A(59,29,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, + +{ "fnmsubs", A(59,30,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fnmsubs.",A(59,30,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, + +{ "fnmadds", A(59,31,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fnmadds.",A(59,31,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, + +{ "stfq", OP(60), OP_MASK, POWER2, { FRS, D, RA } }, + +{ "stfqu", OP(61), OP_MASK, POWER2, { FRS, D, RA } }, + +{ "lde", DEO(62,0), DE_MASK, BOOKE64, { RT, DES, RA } }, +{ "ldue", DEO(62,1), DE_MASK, BOOKE64, { RT, DES, RA } }, +{ "lfse", DEO(62,4), DE_MASK, BOOKE64, { FRT, DES, RA } }, +{ "lfsue", DEO(62,5), DE_MASK, BOOKE64, { FRT, DES, RAS } }, +{ "lfde", DEO(62,6), DE_MASK, BOOKE64, { FRT, DES, RA } }, +{ "lfdue", DEO(62,7), DE_MASK, BOOKE64, { FRT, DES, RAS } }, +{ "stde", DEO(62,8), DE_MASK, BOOKE64, { RS, DES, RA } }, +{ "stdue", DEO(62,9), DE_MASK, BOOKE64, { RS, DES, RAS } }, +{ "stfse", DEO(62,12), DE_MASK, BOOKE64, { FRS, DES, RA } }, +{ "stfsue", DEO(62,13), DE_MASK, BOOKE64, { FRS, DES, RAS } }, +{ "stfde", DEO(62,14), DE_MASK, BOOKE64, { FRS, DES, RA } }, +{ "stfdue", DEO(62,15), DE_MASK, BOOKE64, { FRS, DES, RAS } }, + +{ "std", DSO(62,0), DS_MASK, PPC64, { RS, DS, RA } }, + +{ "stdu", DSO(62,1), DS_MASK, PPC64, { RS, DS, RAS } }, + +{ "stq", DSO(62,2), DS_MASK, POWER4, { RSQ, DS, RA } }, + +{ "fcmpu", X(63,0), X_MASK|(3<<21), COM, { BF, FRA, FRB } }, + +{ "frsp", XRC(63,12,0), XRA_MASK, COM, { FRT, FRB } }, +{ "frsp.", XRC(63,12,1), XRA_MASK, COM, { FRT, FRB } }, + +{ "fctiw", XRC(63,14,0), XRA_MASK, PPCCOM, { FRT, FRB } }, +{ "fcir", XRC(63,14,0), XRA_MASK, POWER2, { FRT, FRB } }, +{ "fctiw.", XRC(63,14,1), XRA_MASK, PPCCOM, { FRT, FRB } }, +{ "fcir.", XRC(63,14,1), XRA_MASK, POWER2, { FRT, FRB } }, + +{ "fctiwz", XRC(63,15,0), XRA_MASK, PPCCOM, { FRT, FRB } }, +{ "fcirz", XRC(63,15,0), XRA_MASK, POWER2, { FRT, FRB } }, +{ "fctiwz.", XRC(63,15,1), XRA_MASK, PPCCOM, { FRT, FRB } }, +{ "fcirz.", XRC(63,15,1), XRA_MASK, POWER2, { FRT, FRB } }, + +{ "fdiv", A(63,18,0), AFRC_MASK, PPCCOM, { FRT, FRA, FRB } }, +{ "fd", A(63,18,0), AFRC_MASK, PWRCOM, { FRT, FRA, FRB } }, +{ "fdiv.", A(63,18,1), AFRC_MASK, PPCCOM, { FRT, FRA, FRB } }, +{ "fd.", A(63,18,1), AFRC_MASK, PWRCOM, { FRT, FRA, FRB } }, + +{ "fsub", A(63,20,0), AFRC_MASK, PPCCOM, { FRT, FRA, FRB } }, +{ "fs", A(63,20,0), AFRC_MASK, PWRCOM, { FRT, FRA, FRB } }, +{ "fsub.", A(63,20,1), AFRC_MASK, PPCCOM, { FRT, FRA, FRB } }, +{ "fs.", A(63,20,1), AFRC_MASK, PWRCOM, { FRT, FRA, FRB } }, + +{ "fadd", A(63,21,0), AFRC_MASK, PPCCOM, { FRT, FRA, FRB } }, +{ "fa", A(63,21,0), AFRC_MASK, PWRCOM, { FRT, FRA, FRB } }, +{ "fadd.", A(63,21,1), AFRC_MASK, PPCCOM, { FRT, FRA, FRB } }, +{ "fa.", A(63,21,1), AFRC_MASK, PWRCOM, { FRT, FRA, FRB } }, + +{ "fsqrt", A(63,22,0), AFRAFRC_MASK, PPCPWR2, { FRT, FRB } }, +{ "fsqrt.", A(63,22,1), AFRAFRC_MASK, PPCPWR2, { FRT, FRB } }, + +{ "fsel", A(63,23,0), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, +{ "fsel.", A(63,23,1), A_MASK, PPC, { FRT,FRA,FRC,FRB } }, + +{ "fmul", A(63,25,0), AFRB_MASK, PPCCOM, { FRT, FRA, FRC } }, +{ "fm", A(63,25,0), AFRB_MASK, PWRCOM, { FRT, FRA, FRC } }, +{ "fmul.", A(63,25,1), AFRB_MASK, PPCCOM, { FRT, FRA, FRC } }, +{ "fm.", A(63,25,1), AFRB_MASK, PWRCOM, { FRT, FRA, FRC } }, + +{ "frsqrte", A(63,26,0), AFRAFRC_MASK, PPC, { FRT, FRB } }, +{ "frsqrte.",A(63,26,1), AFRAFRC_MASK, PPC, { FRT, FRB } }, + +{ "fmsub", A(63,28,0), A_MASK, PPCCOM, { FRT,FRA,FRC,FRB } }, +{ "fms", A(63,28,0), A_MASK, PWRCOM, { FRT,FRA,FRC,FRB } }, +{ "fmsub.", A(63,28,1), A_MASK, PPCCOM, { FRT,FRA,FRC,FRB } }, +{ "fms.", A(63,28,1), A_MASK, PWRCOM, { FRT,FRA,FRC,FRB } }, + +{ "fmadd", A(63,29,0), A_MASK, PPCCOM, { FRT,FRA,FRC,FRB } }, +{ "fma", A(63,29,0), A_MASK, PWRCOM, { FRT,FRA,FRC,FRB } }, +{ "fmadd.", A(63,29,1), A_MASK, PPCCOM, { FRT,FRA,FRC,FRB } }, +{ "fma.", A(63,29,1), A_MASK, PWRCOM, { FRT,FRA,FRC,FRB } }, + +{ "fnmsub", A(63,30,0), A_MASK, PPCCOM, { FRT,FRA,FRC,FRB } }, +{ "fnms", A(63,30,0), A_MASK, PWRCOM, { FRT,FRA,FRC,FRB } }, +{ "fnmsub.", A(63,30,1), A_MASK, PPCCOM, { FRT,FRA,FRC,FRB } }, +{ "fnms.", A(63,30,1), A_MASK, PWRCOM, { FRT,FRA,FRC,FRB } }, + +{ "fnmadd", A(63,31,0), A_MASK, PPCCOM, { FRT,FRA,FRC,FRB } }, +{ "fnma", A(63,31,0), A_MASK, PWRCOM, { FRT,FRA,FRC,FRB } }, +{ "fnmadd.", A(63,31,1), A_MASK, PPCCOM, { FRT,FRA,FRC,FRB } }, +{ "fnma.", A(63,31,1), A_MASK, PWRCOM, { FRT,FRA,FRC,FRB } }, + +{ "fcmpo", X(63,32), X_MASK|(3<<21), COM, { BF, FRA, FRB } }, + +{ "mtfsb1", XRC(63,38,0), XRARB_MASK, COM, { BT } }, +{ "mtfsb1.", XRC(63,38,1), XRARB_MASK, COM, { BT } }, + +{ "fneg", XRC(63,40,0), XRA_MASK, COM, { FRT, FRB } }, +{ "fneg.", XRC(63,40,1), XRA_MASK, COM, { FRT, FRB } }, + +{ "mcrfs", X(63,64), XRB_MASK|(3<<21)|(3<<16), COM, { BF, BFA } }, + +{ "mtfsb0", XRC(63,70,0), XRARB_MASK, COM, { BT } }, +{ "mtfsb0.", XRC(63,70,1), XRARB_MASK, COM, { BT } }, + +{ "fmr", XRC(63,72,0), XRA_MASK, COM, { FRT, FRB } }, +{ "fmr.", XRC(63,72,1), XRA_MASK, COM, { FRT, FRB } }, + +{ "mtfsfi", XRC(63,134,0), XRA_MASK|(3<<21)|(1<<11), COM, { BF, U } }, +{ "mtfsfi.", XRC(63,134,1), XRA_MASK|(3<<21)|(1<<11), COM, { BF, U } }, + +{ "fnabs", XRC(63,136,0), XRA_MASK, COM, { FRT, FRB } }, +{ "fnabs.", XRC(63,136,1), XRA_MASK, COM, { FRT, FRB } }, + +{ "fabs", XRC(63,264,0), XRA_MASK, COM, { FRT, FRB } }, +{ "fabs.", XRC(63,264,1), XRA_MASK, COM, { FRT, FRB } }, + +{ "mffs", XRC(63,583,0), XRARB_MASK, COM, { FRT } }, +{ "mffs.", XRC(63,583,1), XRARB_MASK, COM, { FRT } }, + +{ "mtfsf", XFL(63,711,0), XFL_MASK, COM, { FLM, FRB } }, +{ "mtfsf.", XFL(63,711,1), XFL_MASK, COM, { FLM, FRB } }, + +{ "fctid", XRC(63,814,0), XRA_MASK, PPC64, { FRT, FRB } }, +{ "fctid.", XRC(63,814,1), XRA_MASK, PPC64, { FRT, FRB } }, + +{ "fctidz", XRC(63,815,0), XRA_MASK, PPC64, { FRT, FRB } }, +{ "fctidz.", XRC(63,815,1), XRA_MASK, PPC64, { FRT, FRB } }, + +{ "fcfid", XRC(63,846,0), XRA_MASK, PPC64, { FRT, FRB } }, +{ "fcfid.", XRC(63,846,1), XRA_MASK, PPC64, { FRT, FRB } }, + +}; + +const int powerpc_num_opcodes = + sizeof (powerpc_opcodes) / sizeof (powerpc_opcodes[0]); + +/* The macro table. This is only used by the assembler. */ + +/* The expressions of the form (-x ! 31) & (x | 31) have the value 0 + when x=0; 32-x when x is between 1 and 31; are negative if x is + negative; and are 32 or more otherwise. This is what you want + when, for instance, you are emulating a right shift by a + rotate-left-and-mask, because the underlying instructions support + shifts of size 0 but not shifts of size 32. By comparison, when + extracting x bits from some word you want to use just 32-x, because + the underlying instructions don't support extracting 0 bits but do + support extracting the whole word (32 bits in this case). */ + +const struct powerpc_macro powerpc_macros[] = { +{ "extldi", 4, PPC64, "rldicr %0,%1,%3,(%2)-1" }, +{ "extldi.", 4, PPC64, "rldicr. %0,%1,%3,(%2)-1" }, +{ "extrdi", 4, PPC64, "rldicl %0,%1,(%2)+(%3),64-(%2)" }, +{ "extrdi.", 4, PPC64, "rldicl. %0,%1,(%2)+(%3),64-(%2)" }, +{ "insrdi", 4, PPC64, "rldimi %0,%1,64-((%2)+(%3)),%3" }, +{ "insrdi.", 4, PPC64, "rldimi. %0,%1,64-((%2)+(%3)),%3" }, +{ "rotrdi", 3, PPC64, "rldicl %0,%1,(-(%2)!63)&((%2)|63),0" }, +{ "rotrdi.", 3, PPC64, "rldicl. %0,%1,(-(%2)!63)&((%2)|63),0" }, +{ "sldi", 3, PPC64, "rldicr %0,%1,%2,63-(%2)" }, +{ "sldi.", 3, PPC64, "rldicr. %0,%1,%2,63-(%2)" }, +{ "srdi", 3, PPC64, "rldicl %0,%1,(-(%2)!63)&((%2)|63),%2" }, +{ "srdi.", 3, PPC64, "rldicl. %0,%1,(-(%2)!63)&((%2)|63),%2" }, +{ "clrrdi", 3, PPC64, "rldicr %0,%1,0,63-(%2)" }, +{ "clrrdi.", 3, PPC64, "rldicr. %0,%1,0,63-(%2)" }, +{ "clrlsldi",4, PPC64, "rldic %0,%1,%3,(%2)-(%3)" }, +{ "clrlsldi.",4, PPC64, "rldic. %0,%1,%3,(%2)-(%3)" }, + +{ "extlwi", 4, PPCCOM, "rlwinm %0,%1,%3,0,(%2)-1" }, +{ "extlwi.", 4, PPCCOM, "rlwinm. %0,%1,%3,0,(%2)-1" }, +{ "extrwi", 4, PPCCOM, "rlwinm %0,%1,((%2)+(%3))&((%2)+(%3)<>32),32-(%2),31" }, +{ "extrwi.", 4, PPCCOM, "rlwinm. %0,%1,((%2)+(%3))&((%2)+(%3)<>32),32-(%2),31" }, +{ "inslwi", 4, PPCCOM, "rlwimi %0,%1,(-(%3)!31)&((%3)|31),%3,(%2)+(%3)-1" }, +{ "inslwi.", 4, PPCCOM, "rlwimi. %0,%1,(-(%3)!31)&((%3)|31),%3,(%2)+(%3)-1"}, +{ "insrwi", 4, PPCCOM, "rlwimi %0,%1,32-((%2)+(%3)),%3,(%2)+(%3)-1" }, +{ "insrwi.", 4, PPCCOM, "rlwimi. %0,%1,32-((%2)+(%3)),%3,(%2)+(%3)-1"}, +{ "rotrwi", 3, PPCCOM, "rlwinm %0,%1,(-(%2)!31)&((%2)|31),0,31" }, +{ "rotrwi.", 3, PPCCOM, "rlwinm. %0,%1,(-(%2)!31)&((%2)|31),0,31" }, +{ "slwi", 3, PPCCOM, "rlwinm %0,%1,%2,0,31-(%2)" }, +{ "sli", 3, PWRCOM, "rlinm %0,%1,%2,0,31-(%2)" }, +{ "slwi.", 3, PPCCOM, "rlwinm. %0,%1,%2,0,31-(%2)" }, +{ "sli.", 3, PWRCOM, "rlinm. %0,%1,%2,0,31-(%2)" }, +{ "srwi", 3, PPCCOM, "rlwinm %0,%1,(-(%2)!31)&((%2)|31),%2,31" }, +{ "sri", 3, PWRCOM, "rlinm %0,%1,(-(%2)!31)&((%2)|31),%2,31" }, +{ "srwi.", 3, PPCCOM, "rlwinm. %0,%1,(-(%2)!31)&((%2)|31),%2,31" }, +{ "sri.", 3, PWRCOM, "rlinm. %0,%1,(-(%2)!31)&((%2)|31),%2,31" }, +{ "clrrwi", 3, PPCCOM, "rlwinm %0,%1,0,0,31-(%2)" }, +{ "clrrwi.", 3, PPCCOM, "rlwinm. %0,%1,0,0,31-(%2)" }, +{ "clrlslwi",4, PPCCOM, "rlwinm %0,%1,%3,(%2)-(%3),31-(%3)" }, +{ "clrlslwi.",4, PPCCOM, "rlwinm. %0,%1,%3,(%2)-(%3),31-(%3)" }, +}; + +const int powerpc_num_macros = + sizeof (powerpc_macros) / sizeof (powerpc_macros[0]); diff --git a/arch/ppc64/xmon/ppc.h b/arch/ppc64/xmon/ppc.h new file mode 100644 index 00000000000..342237e8dd6 --- /dev/null +++ b/arch/ppc64/xmon/ppc.h @@ -0,0 +1,307 @@ +/* ppc.h -- Header file for PowerPC opcode table + Copyright 1994, 1995, 1999, 2000, 2001, 2002, 2003 + Free Software Foundation, Inc. + Written by Ian Lance Taylor, Cygnus Support + +This file is part of GDB, GAS, and the GNU binutils. + +GDB, GAS, and the GNU binutils are free software; you can redistribute +them and/or modify them under the terms of the GNU General Public +License as published by the Free Software Foundation; either version +1, or (at your option) any later version. + +GDB, GAS, and the GNU binutils are distributed in the hope that they +will be useful, but WITHOUT ANY WARRANTY; without even the implied +warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See +the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this file; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifndef PPC_H +#define PPC_H + +/* The opcode table is an array of struct powerpc_opcode. */ + +struct powerpc_opcode +{ + /* The opcode name. */ + const char *name; + + /* The opcode itself. Those bits which will be filled in with + operands are zeroes. */ + unsigned long opcode; + + /* The opcode mask. This is used by the disassembler. This is a + mask containing ones indicating those bits which must match the + opcode field, and zeroes indicating those bits which need not + match (and are presumably filled in by operands). */ + unsigned long mask; + + /* One bit flags for the opcode. These are used to indicate which + specific processors support the instructions. The defined values + are listed below. */ + unsigned long flags; + + /* An array of operand codes. Each code is an index into the + operand table. They appear in the order which the operands must + appear in assembly code, and are terminated by a zero. */ + unsigned char operands[8]; +}; + +/* The table itself is sorted by major opcode number, and is otherwise + in the order in which the disassembler should consider + instructions. */ +extern const struct powerpc_opcode powerpc_opcodes[]; +extern const int powerpc_num_opcodes; + +/* Values defined for the flags field of a struct powerpc_opcode. */ + +/* Opcode is defined for the PowerPC architecture. */ +#define PPC_OPCODE_PPC 1 + +/* Opcode is defined for the POWER (RS/6000) architecture. */ +#define PPC_OPCODE_POWER 2 + +/* Opcode is defined for the POWER2 (Rios 2) architecture. */ +#define PPC_OPCODE_POWER2 4 + +/* Opcode is only defined on 32 bit architectures. */ +#define PPC_OPCODE_32 8 + +/* Opcode is only defined on 64 bit architectures. */ +#define PPC_OPCODE_64 0x10 + +/* Opcode is supported by the Motorola PowerPC 601 processor. The 601 + is assumed to support all PowerPC (PPC_OPCODE_PPC) instructions, + but it also supports many additional POWER instructions. */ +#define PPC_OPCODE_601 0x20 + +/* Opcode is supported in both the Power and PowerPC architectures + (ie, compiler's -mcpu=common or assembler's -mcom). */ +#define PPC_OPCODE_COMMON 0x40 + +/* Opcode is supported for any Power or PowerPC platform (this is + for the assembler's -many option, and it eliminates duplicates). */ +#define PPC_OPCODE_ANY 0x80 + +/* Opcode is supported as part of the 64-bit bridge. */ +#define PPC_OPCODE_64_BRIDGE 0x100 + +/* Opcode is supported by Altivec Vector Unit */ +#define PPC_OPCODE_ALTIVEC 0x200 + +/* Opcode is supported by PowerPC 403 processor. */ +#define PPC_OPCODE_403 0x400 + +/* Opcode is supported by PowerPC BookE processor. */ +#define PPC_OPCODE_BOOKE 0x800 + +/* Opcode is only supported by 64-bit PowerPC BookE processor. */ +#define PPC_OPCODE_BOOKE64 0x1000 + +/* Opcode is supported by PowerPC 440 processor. */ +#define PPC_OPCODE_440 0x2000 + +/* Opcode is only supported by Power4 architecture. */ +#define PPC_OPCODE_POWER4 0x4000 + +/* Opcode isn't supported by Power4 architecture. */ +#define PPC_OPCODE_NOPOWER4 0x8000 + +/* Opcode is only supported by POWERPC Classic architecture. */ +#define PPC_OPCODE_CLASSIC 0x10000 + +/* Opcode is only supported by e500x2 Core. */ +#define PPC_OPCODE_SPE 0x20000 + +/* Opcode is supported by e500x2 Integer select APU. */ +#define PPC_OPCODE_ISEL 0x40000 + +/* Opcode is an e500 SPE floating point instruction. */ +#define PPC_OPCODE_EFS 0x80000 + +/* Opcode is supported by branch locking APU. */ +#define PPC_OPCODE_BRLOCK 0x100000 + +/* Opcode is supported by performance monitor APU. */ +#define PPC_OPCODE_PMR 0x200000 + +/* Opcode is supported by cache locking APU. */ +#define PPC_OPCODE_CACHELCK 0x400000 + +/* Opcode is supported by machine check APU. */ +#define PPC_OPCODE_RFMCI 0x800000 + +/* A macro to extract the major opcode from an instruction. */ +#define PPC_OP(i) (((i) >> 26) & 0x3f) + +/* The operands table is an array of struct powerpc_operand. */ + +struct powerpc_operand +{ + /* The number of bits in the operand. */ + int bits; + + /* How far the operand is left shifted in the instruction. */ + int shift; + + /* Insertion function. This is used by the assembler. To insert an + operand value into an instruction, check this field. + + If it is NULL, execute + i |= (op & ((1 << o->bits) - 1)) << o->shift; + (i is the instruction which we are filling in, o is a pointer to + this structure, and op is the opcode value; this assumes twos + complement arithmetic). + + If this field is not NULL, then simply call it with the + instruction and the operand value. It will return the new value + of the instruction. If the ERRMSG argument is not NULL, then if + the operand value is illegal, *ERRMSG will be set to a warning + string (the operand will be inserted in any case). If the + operand value is legal, *ERRMSG will be unchanged (most operands + can accept any value). */ + unsigned long (*insert) + (unsigned long instruction, long op, int dialect, const char **errmsg); + + /* Extraction function. This is used by the disassembler. To + extract this operand type from an instruction, check this field. + + If it is NULL, compute + op = ((i) >> o->shift) & ((1 << o->bits) - 1); + if ((o->flags & PPC_OPERAND_SIGNED) != 0 + && (op & (1 << (o->bits - 1))) != 0) + op -= 1 << o->bits; + (i is the instruction, o is a pointer to this structure, and op + is the result; this assumes twos complement arithmetic). + + If this field is not NULL, then simply call it with the + instruction value. It will return the value of the operand. If + the INVALID argument is not NULL, *INVALID will be set to + non-zero if this operand type can not actually be extracted from + this operand (i.e., the instruction does not match). If the + operand is valid, *INVALID will not be changed. */ + long (*extract) (unsigned long instruction, int dialect, int *invalid); + + /* One bit syntax flags. */ + unsigned long flags; +}; + +/* Elements in the table are retrieved by indexing with values from + the operands field of the powerpc_opcodes table. */ + +extern const struct powerpc_operand powerpc_operands[]; + +/* Values defined for the flags field of a struct powerpc_operand. */ + +/* This operand takes signed values. */ +#define PPC_OPERAND_SIGNED (01) + +/* This operand takes signed values, but also accepts a full positive + range of values when running in 32 bit mode. That is, if bits is + 16, it takes any value from -0x8000 to 0xffff. In 64 bit mode, + this flag is ignored. */ +#define PPC_OPERAND_SIGNOPT (02) + +/* This operand does not actually exist in the assembler input. This + is used to support extended mnemonics such as mr, for which two + operands fields are identical. The assembler should call the + insert function with any op value. The disassembler should call + the extract function, ignore the return value, and check the value + placed in the valid argument. */ +#define PPC_OPERAND_FAKE (04) + +/* The next operand should be wrapped in parentheses rather than + separated from this one by a comma. This is used for the load and + store instructions which want their operands to look like + reg,displacement(reg) + */ +#define PPC_OPERAND_PARENS (010) + +/* This operand may use the symbolic names for the CR fields, which + are + lt 0 gt 1 eq 2 so 3 un 3 + cr0 0 cr1 1 cr2 2 cr3 3 + cr4 4 cr5 5 cr6 6 cr7 7 + These may be combined arithmetically, as in cr2*4+gt. These are + only supported on the PowerPC, not the POWER. */ +#define PPC_OPERAND_CR (020) + +/* This operand names a register. The disassembler uses this to print + register names with a leading 'r'. */ +#define PPC_OPERAND_GPR (040) + +/* This operand names a floating point register. The disassembler + prints these with a leading 'f'. */ +#define PPC_OPERAND_FPR (0100) + +/* This operand is a relative branch displacement. The disassembler + prints these symbolically if possible. */ +#define PPC_OPERAND_RELATIVE (0200) + +/* This operand is an absolute branch address. The disassembler + prints these symbolically if possible. */ +#define PPC_OPERAND_ABSOLUTE (0400) + +/* This operand is optional, and is zero if omitted. This is used for + the optional BF and L fields in the comparison instructions. The + assembler must count the number of operands remaining on the line, + and the number of operands remaining for the opcode, and decide + whether this operand is present or not. The disassembler should + print this operand out only if it is not zero. */ +#define PPC_OPERAND_OPTIONAL (01000) + +/* This flag is only used with PPC_OPERAND_OPTIONAL. If this operand + is omitted, then for the next operand use this operand value plus + 1, ignoring the next operand field for the opcode. This wretched + hack is needed because the Power rotate instructions can take + either 4 or 5 operands. The disassembler should print this operand + out regardless of the PPC_OPERAND_OPTIONAL field. */ +#define PPC_OPERAND_NEXT (02000) + +/* This operand should be regarded as a negative number for the + purposes of overflow checking (i.e., the normal most negative + number is disallowed and one more than the normal most positive + number is allowed). This flag will only be set for a signed + operand. */ +#define PPC_OPERAND_NEGATIVE (04000) + +/* This operand names a vector unit register. The disassembler + prints these with a leading 'v'. */ +#define PPC_OPERAND_VR (010000) + +/* This operand is for the DS field in a DS form instruction. */ +#define PPC_OPERAND_DS (020000) + +/* This operand is for the DQ field in a DQ form instruction. */ +#define PPC_OPERAND_DQ (040000) + +/* The POWER and PowerPC assemblers use a few macros. We keep them + with the operands table for simplicity. The macro table is an + array of struct powerpc_macro. */ + +struct powerpc_macro +{ + /* The macro name. */ + const char *name; + + /* The number of operands the macro takes. */ + unsigned int operands; + + /* One bit flags for the opcode. These are used to indicate which + specific processors support the instructions. The values are the + same as those for the struct powerpc_opcode flags field. */ + unsigned long flags; + + /* A format string to turn the macro into a normal instruction. + Each %N in the string is replaced with operand number N (zero + based). */ + const char *format; +}; + +extern const struct powerpc_macro powerpc_macros[]; +extern const int powerpc_num_macros; + +#endif /* PPC_H */ diff --git a/arch/ppc64/xmon/privinst.h b/arch/ppc64/xmon/privinst.h new file mode 100644 index 00000000000..183c3e40025 --- /dev/null +++ b/arch/ppc64/xmon/privinst.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define GETREG(reg) \ + static inline unsigned long get_ ## reg (void) \ + { unsigned long ret; asm volatile ("mf" #reg " %0" : "=r" (ret) :); return ret; } + +#define SETREG(reg) \ + static inline void set_ ## reg (unsigned long val) \ + { asm volatile ("mt" #reg " %0" : : "r" (val)); } + +GETREG(msr) +SETREG(msrd) +GETREG(cr) + +#define GSETSPR(n, name) \ + static inline long get_ ## name (void) \ + { long ret; asm volatile ("mfspr %0," #n : "=r" (ret) : ); return ret; } \ + static inline void set_ ## name (long val) \ + { asm volatile ("mtspr " #n ",%0" : : "r" (val)); } + +GSETSPR(0, mq) +GSETSPR(1, xer) +GSETSPR(4, rtcu) +GSETSPR(5, rtcl) +GSETSPR(8, lr) +GSETSPR(9, ctr) +GSETSPR(18, dsisr) +GSETSPR(19, dar) +GSETSPR(22, dec) +GSETSPR(25, sdr1) +GSETSPR(26, srr0) +GSETSPR(27, srr1) +GSETSPR(272, sprg0) +GSETSPR(273, sprg1) +GSETSPR(274, sprg2) +GSETSPR(275, sprg3) +GSETSPR(282, ear) +GSETSPR(287, pvr) +GSETSPR(1008, hid0) +GSETSPR(1009, hid1) +GSETSPR(1010, iabr) +GSETSPR(1013, dabr) +GSETSPR(1023, pir) + +static inline void store_inst(void *p) +{ + asm volatile ("dcbst 0,%0; sync; icbi 0,%0; isync" : : "r" (p)); +} + +static inline void cflush(void *p) +{ + asm volatile ("dcbf 0,%0; icbi 0,%0" : : "r" (p)); +} + +static inline void cinval(void *p) +{ + asm volatile ("dcbi 0,%0; icbi 0,%0" : : "r" (p)); +} diff --git a/arch/ppc64/xmon/setjmp.S b/arch/ppc64/xmon/setjmp.S new file mode 100644 index 00000000000..30ee643d557 --- /dev/null +++ b/arch/ppc64/xmon/setjmp.S @@ -0,0 +1,73 @@ +/* + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * NOTE: assert(sizeof(buf) > 184) + */ +#include +#include + +_GLOBAL(xmon_setjmp) + mflr r0 + std r0,0(r3) + std r1,8(r3) + std r2,16(r3) + mfcr r0 + std r0,24(r3) + std r13,32(r3) + std r14,40(r3) + std r15,48(r3) + std r16,56(r3) + std r17,64(r3) + std r18,72(r3) + std r19,80(r3) + std r20,88(r3) + std r21,96(r3) + std r22,104(r3) + std r23,112(r3) + std r24,120(r3) + std r25,128(r3) + std r26,136(r3) + std r27,144(r3) + std r28,152(r3) + std r29,160(r3) + std r30,168(r3) + std r31,176(r3) + li r3,0 + blr + +_GLOBAL(xmon_longjmp) + cmpdi r4,0 + bne 1f + li r4,1 +1: ld r13,32(r3) + ld r14,40(r3) + ld r15,48(r3) + ld r16,56(r3) + ld r17,64(r3) + ld r18,72(r3) + ld r19,80(r3) + ld r20,88(r3) + ld r21,96(r3) + ld r22,104(r3) + ld r23,112(r3) + ld r24,120(r3) + ld r25,128(r3) + ld r26,136(r3) + ld r27,144(r3) + ld r28,152(r3) + ld r29,160(r3) + ld r30,168(r3) + ld r31,176(r3) + ld r0,24(r3) + mtcrf 56,r0 + ld r0,0(r3) + ld r1,8(r3) + ld r2,16(r3) + mtlr r0 + mr r3,r4 + blr diff --git a/arch/ppc64/xmon/start.c b/arch/ppc64/xmon/start.c new file mode 100644 index 00000000000..a9265bcc79b --- /dev/null +++ b/arch/ppc64/xmon/start.c @@ -0,0 +1,185 @@ +/* + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nonstdio.h" + +#ifdef CONFIG_MAGIC_SYSRQ + +static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + /* ensure xmon is enabled */ + xmon_init(); + debugger(pt_regs); +} + +static struct sysrq_key_op sysrq_xmon_op = +{ + .handler = sysrq_handle_xmon, + .help_msg = "Xmon", + .action_msg = "Entering xmon", +}; + +static int __init setup_xmon_sysrq(void) +{ + register_sysrq_key('x', &sysrq_xmon_op); + return 0; +} +__initcall(setup_xmon_sysrq); +#endif /* CONFIG_MAGIC_SYSRQ */ + +int +xmon_write(void *handle, void *ptr, int nb) +{ + return udbg_write(ptr, nb); +} + +int +xmon_read(void *handle, void *ptr, int nb) +{ + return udbg_read(ptr, nb); +} + +int +xmon_read_poll(void) +{ + return udbg_getc_poll(); +} + +FILE *xmon_stdin; +FILE *xmon_stdout; + +int +xmon_putc(int c, void *f) +{ + char ch = c; + + if (c == '\n') + xmon_putc('\r', f); + return xmon_write(f, &ch, 1) == 1? c: -1; +} + +int +xmon_putchar(int c) +{ + return xmon_putc(c, xmon_stdout); +} + +int +xmon_fputs(char *str, void *f) +{ + int n = strlen(str); + + return xmon_write(f, str, n) == n? 0: -1; +} + +int +xmon_readchar(void) +{ + char ch; + + for (;;) { + switch (xmon_read(xmon_stdin, &ch, 1)) { + case 1: + return ch; + case -1: + xmon_printf("read(stdin) returned -1\r\n", 0, 0); + return -1; + } + } +} + +static char line[256]; +static char *lineptr; +static int lineleft; + +int +xmon_getchar(void) +{ + int c; + + if (lineleft == 0) { + lineptr = line; + for (;;) { + c = xmon_readchar(); + if (c == -1 || c == 4) + break; + if (c == '\r' || c == '\n') { + *lineptr++ = '\n'; + xmon_putchar('\n'); + break; + } + switch (c) { + case 0177: + case '\b': + if (lineptr > line) { + xmon_putchar('\b'); + xmon_putchar(' '); + xmon_putchar('\b'); + --lineptr; + } + break; + case 'U' & 0x1F: + while (lineptr > line) { + xmon_putchar('\b'); + xmon_putchar(' '); + xmon_putchar('\b'); + --lineptr; + } + break; + default: + if (lineptr >= &line[sizeof(line) - 1]) + xmon_putchar('\a'); + else { + xmon_putchar(c); + *lineptr++ = c; + } + } + } + lineleft = lineptr - line; + lineptr = line; + } + if (lineleft == 0) + return -1; + --lineleft; + return *lineptr++; +} + +char * +xmon_fgets(char *str, int nb, void *f) +{ + char *p; + int c; + + for (p = str; p < str + nb - 1; ) { + c = xmon_getchar(); + if (c == -1) { + if (p == str) + return NULL; + break; + } + *p++ = c; + if (c == '\n') + break; + } + *p = 0; + return str; +} diff --git a/arch/ppc64/xmon/subr_prf.c b/arch/ppc64/xmon/subr_prf.c new file mode 100644 index 00000000000..5242bd7d095 --- /dev/null +++ b/arch/ppc64/xmon/subr_prf.c @@ -0,0 +1,55 @@ +/* + * Written by Cort Dougan to replace the version originally used + * by Paul Mackerras, which came from NetBSD and thus had copyright + * conflicts with Linux. + * + * This file makes liberal use of the standard linux utility + * routines to reduce the size of the binary. We assume we can + * trust some parts of Linux inside the debugger. + * -- Cort (cort@cs.nmt.edu) + * + * Copyright (C) 1999 Cort Dougan. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include "nonstdio.h" + +extern int xmon_write(void *, void *, int); + +void +xmon_vfprintf(void *f, const char *fmt, va_list ap) +{ + static char xmon_buf[2048]; + int n; + + n = vsprintf(xmon_buf, fmt, ap); + xmon_write(f, xmon_buf, n); +} + +void +xmon_printf(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xmon_vfprintf(stdout, fmt, ap); + va_end(ap); +} + +void +xmon_fprintf(void *f, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xmon_vfprintf(f, fmt, ap); + va_end(ap); +} + diff --git a/arch/ppc64/xmon/xmon.c b/arch/ppc64/xmon/xmon.c new file mode 100644 index 00000000000..3c0ccb2623a --- /dev/null +++ b/arch/ppc64/xmon/xmon.c @@ -0,0 +1,2506 @@ +/* + * Routines providing a simple monitor for use on the PowerMac. + * + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nonstdio.h" +#include "privinst.h" + +#define scanhex xmon_scanhex +#define skipbl xmon_skipbl + +#ifdef CONFIG_SMP +cpumask_t cpus_in_xmon = CPU_MASK_NONE; +static unsigned long xmon_taken = 1; +static int xmon_owner; +static int xmon_gate; +#endif /* CONFIG_SMP */ + +static unsigned long in_xmon = 0; + +static unsigned long adrs; +static int size = 1; +#define MAX_DUMP (128 * 1024) +static unsigned long ndump = 64; +static unsigned long nidump = 16; +static unsigned long ncsum = 4096; +static int termch; +static char tmpstr[128]; + +#define JMP_BUF_LEN (184/sizeof(long)) +static long bus_error_jmp[JMP_BUF_LEN]; +static int catch_memory_errors; +static long *xmon_fault_jmp[NR_CPUS]; +#define setjmp xmon_setjmp +#define longjmp xmon_longjmp + +/* Breakpoint stuff */ +struct bpt { + unsigned long address; + unsigned int instr[2]; + atomic_t ref_count; + int enabled; + unsigned long pad; +}; + +/* Bits in bpt.enabled */ +#define BP_IABR_TE 1 /* IABR translation enabled */ +#define BP_IABR 2 +#define BP_TRAP 8 +#define BP_DABR 0x10 + +#define NBPTS 256 +static struct bpt bpts[NBPTS]; +static struct bpt dabr; +static struct bpt *iabr; +static unsigned bpinstr = 0x7fe00008; /* trap */ + +#define BP_NUM(bp) ((bp) - bpts + 1) + +/* Prototypes */ +static int cmds(struct pt_regs *); +static int mread(unsigned long, void *, int); +static int mwrite(unsigned long, void *, int); +static int handle_fault(struct pt_regs *); +static void byterev(unsigned char *, int); +static void memex(void); +static int bsesc(void); +static void dump(void); +static void prdump(unsigned long, long); +static int ppc_inst_dump(unsigned long, long, int); +void print_address(unsigned long); +static void backtrace(struct pt_regs *); +static void excprint(struct pt_regs *); +static void prregs(struct pt_regs *); +static void memops(int); +static void memlocate(void); +static void memzcan(void); +static void memdiffs(unsigned char *, unsigned char *, unsigned, unsigned); +int skipbl(void); +int scanhex(unsigned long *valp); +static void scannl(void); +static int hexdigit(int); +void getstring(char *, int); +static void flush_input(void); +static int inchar(void); +static void take_input(char *); +static unsigned long read_spr(int); +static void write_spr(int, unsigned long); +static void super_regs(void); +static void remove_bpts(void); +static void insert_bpts(void); +static void remove_cpu_bpts(void); +static void insert_cpu_bpts(void); +static struct bpt *at_breakpoint(unsigned long pc); +static struct bpt *in_breakpoint_table(unsigned long pc, unsigned long *offp); +static int do_step(struct pt_regs *); +static void bpt_cmds(void); +static void cacheflush(void); +static int cpu_cmd(void); +static void csum(void); +static void bootcmds(void); +void dump_segments(void); +static void symbol_lookup(void); +static void xmon_print_symbol(unsigned long address, const char *mid, + const char *after); +static const char *getvecname(unsigned long vec); + +static void debug_trace(void); + +extern int print_insn_powerpc(unsigned long, unsigned long, int); +extern void printf(const char *fmt, ...); +extern void xmon_vfprintf(void *f, const char *fmt, va_list ap); +extern int xmon_putc(int c, void *f); +extern int putchar(int ch); +extern int xmon_read_poll(void); +extern int setjmp(long *); +extern void longjmp(long *, int); +extern unsigned long _ASR; + +#define GETWORD(v) (((v)[0] << 24) + ((v)[1] << 16) + ((v)[2] << 8) + (v)[3]) + +#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ + || ('a' <= (c) && (c) <= 'f') \ + || ('A' <= (c) && (c) <= 'F')) +#define isalnum(c) (('0' <= (c) && (c) <= '9') \ + || ('a' <= (c) && (c) <= 'z') \ + || ('A' <= (c) && (c) <= 'Z')) +#define isspace(c) (c == ' ' || c == '\t' || c == 10 || c == 13 || c == 0) + +static char *help_string = "\ +Commands:\n\ + b show breakpoints\n\ + bd set data breakpoint\n\ + bi set instruction breakpoint\n\ + bc clear breakpoint\n" +#ifdef CONFIG_SMP + "\ + c print cpus stopped in xmon\n\ + c# try to switch to cpu number h (in hex)\n" +#endif + "\ + C checksum\n\ + d dump bytes\n\ + di dump instructions\n\ + df dump float values\n\ + dd dump double values\n\ + e print exception information\n\ + f flush cache\n\ + la lookup symbol+offset of specified address\n\ + ls lookup address of specified symbol\n\ + m examine/change memory\n\ + mm move a block of memory\n\ + ms set a block of memory\n\ + md compare two blocks of memory\n\ + ml locate a block of memory\n\ + mz zero a block of memory\n\ + mi show information about memory allocation\n\ + p show the task list\n\ + r print registers\n\ + s single step\n\ + S print special registers\n\ + t print backtrace\n\ + T Enable/Disable PPCDBG flags\n\ + x exit monitor and recover\n\ + X exit monitor and dont recover\n\ + u dump segment table or SLB\n\ + ? help\n" + "\ + zr reboot\n\ + zh halt\n" +; + +static struct pt_regs *xmon_regs; + +extern inline void sync(void) +{ + asm volatile("sync; isync"); +} + +/* (Ref: 64-bit PowerPC ELF ABI Spplement; Ian Lance Taylor, Zembu Labs). + A PPC stack frame looks like this: + + High Address + Back Chain + FP reg save area + GP reg save area + Local var space + Parameter save area (SP+48) + TOC save area (SP+40) + link editor doubleword (SP+32) + compiler doubleword (SP+24) + LR save (SP+16) + CR save (SP+8) + Back Chain (SP+0) + + Note that the LR (ret addr) may not be saved in the current frame if + no functions have been called from the current function. + */ + +/* + * Disable surveillance (the service processor watchdog function) + * while we are in xmon. + * XXX we should re-enable it when we leave. :) + */ +#define SURVEILLANCE_TOKEN 9000 + +static inline void disable_surveillance(void) +{ +#ifdef CONFIG_PPC_PSERIES + /* Since this can't be a module, args should end up below 4GB. */ + static struct rtas_args args; + + /* + * At this point we have got all the cpus we can into + * xmon, so there is hopefully no other cpu calling RTAS + * at the moment, even though we don't take rtas.lock. + * If we did try to take rtas.lock there would be a + * real possibility of deadlock. + */ + args.token = rtas_token("set-indicator"); + if (args.token == RTAS_UNKNOWN_SERVICE) + return; + args.nargs = 3; + args.nret = 1; + args.rets = &args.args[3]; + args.args[0] = SURVEILLANCE_TOKEN; + args.args[1] = 0; + args.args[2] = 0; + enter_rtas(__pa(&args)); +#endif /* CONFIG_PPC_PSERIES */ +} + +#ifdef CONFIG_SMP +static int xmon_speaker; + +static void get_output_lock(void) +{ + int me = smp_processor_id() + 0x100; + int last_speaker = 0, prev; + long timeout; + + if (xmon_speaker == me) + return; + for (;;) { + if (xmon_speaker == 0) { + last_speaker = cmpxchg(&xmon_speaker, 0, me); + if (last_speaker == 0) + return; + } + timeout = 10000000; + while (xmon_speaker == last_speaker) { + if (--timeout > 0) + continue; + /* hostile takeover */ + prev = cmpxchg(&xmon_speaker, last_speaker, me); + if (prev == last_speaker) + return; + break; + } + } +} + +static void release_output_lock(void) +{ + xmon_speaker = 0; +} +#endif + +int xmon_core(struct pt_regs *regs, int fromipi) +{ + int cmd = 0; + unsigned long msr; + struct bpt *bp; + long recurse_jmp[JMP_BUF_LEN]; + unsigned long offset; +#ifdef CONFIG_SMP + int cpu; + int secondary; + unsigned long timeout; +#endif + + msr = get_msr(); + set_msrd(msr & ~MSR_EE); /* disable interrupts */ + + bp = in_breakpoint_table(regs->nip, &offset); + if (bp != NULL) { + regs->nip = bp->address + offset; + atomic_dec(&bp->ref_count); + } + + remove_cpu_bpts(); + +#ifdef CONFIG_SMP + cpu = smp_processor_id(); + if (cpu_isset(cpu, cpus_in_xmon)) { + get_output_lock(); + excprint(regs); + printf("cpu 0x%x: Exception %lx %s in xmon, " + "returning to main loop\n", + cpu, regs->trap, getvecname(TRAP(regs))); + longjmp(xmon_fault_jmp[cpu], 1); + } + + if (setjmp(recurse_jmp) != 0) { + if (!in_xmon || !xmon_gate) { + printf("xmon: WARNING: bad recursive fault " + "on cpu 0x%x\n", cpu); + goto waiting; + } + secondary = !(xmon_taken && cpu == xmon_owner); + goto cmdloop; + } + + xmon_fault_jmp[cpu] = recurse_jmp; + cpu_set(cpu, cpus_in_xmon); + + bp = NULL; + if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) + bp = at_breakpoint(regs->nip); + if (bp || (regs->msr & MSR_RI) == 0) + fromipi = 0; + + if (!fromipi) { + get_output_lock(); + excprint(regs); + if (bp) { + printf("cpu 0x%x stopped at breakpoint 0x%x (", + cpu, BP_NUM(bp)); + xmon_print_symbol(regs->nip, " ", ")\n"); + } + if ((regs->msr & MSR_RI) == 0) + printf("WARNING: exception is not recoverable, " + "can't continue\n"); + release_output_lock(); + } + + waiting: + secondary = 1; + while (secondary && !xmon_gate) { + if (in_xmon == 0) { + if (fromipi) + goto leave; + secondary = test_and_set_bit(0, &in_xmon); + } + barrier(); + } + + if (!secondary && !xmon_gate) { + /* we are the first cpu to come in */ + /* interrupt other cpu(s) */ + int ncpus = num_online_cpus(); + + xmon_owner = cpu; + mb(); + if (ncpus > 1) { + smp_send_debugger_break(MSG_ALL_BUT_SELF); + /* wait for other cpus to come in */ + for (timeout = 100000000; timeout != 0; --timeout) { + if (cpus_weight(cpus_in_xmon) >= ncpus) + break; + barrier(); + } + } + remove_bpts(); + disable_surveillance(); + /* for breakpoint or single step, print the current instr. */ + if (bp || TRAP(regs) == 0xd00) + ppc_inst_dump(regs->nip, 1, 0); + printf("enter ? for help\n"); + mb(); + xmon_gate = 1; + barrier(); + } + + cmdloop: + while (in_xmon) { + if (secondary) { + if (cpu == xmon_owner) { + if (!test_and_set_bit(0, &xmon_taken)) { + secondary = 0; + continue; + } + /* missed it */ + while (cpu == xmon_owner) + barrier(); + } + barrier(); + } else { + cmd = cmds(regs); + if (cmd != 0) { + /* exiting xmon */ + insert_bpts(); + xmon_gate = 0; + wmb(); + in_xmon = 0; + break; + } + /* have switched to some other cpu */ + secondary = 1; + } + } + leave: + cpu_clear(cpu, cpus_in_xmon); + xmon_fault_jmp[cpu] = NULL; + +#else + /* UP is simple... */ + if (in_xmon) { + printf("Exception %lx %s in xmon, returning to main loop\n", + regs->trap, getvecname(TRAP(regs))); + longjmp(xmon_fault_jmp[0], 1); + } + if (setjmp(recurse_jmp) == 0) { + xmon_fault_jmp[0] = recurse_jmp; + in_xmon = 1; + + excprint(regs); + bp = at_breakpoint(regs->nip); + if (bp) { + printf("Stopped at breakpoint %x (", BP_NUM(bp)); + xmon_print_symbol(regs->nip, " ", ")\n"); + } + if ((regs->msr & MSR_RI) == 0) + printf("WARNING: exception is not recoverable, " + "can't continue\n"); + remove_bpts(); + disable_surveillance(); + /* for breakpoint or single step, print the current instr. */ + if (bp || TRAP(regs) == 0xd00) + ppc_inst_dump(regs->nip, 1, 0); + printf("enter ? for help\n"); + } + + cmd = cmds(regs); + + insert_bpts(); + in_xmon = 0; +#endif + + if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) { + bp = at_breakpoint(regs->nip); + if (bp != NULL) { + int stepped = emulate_step(regs, bp->instr[0]); + if (stepped == 0) { + regs->nip = (unsigned long) &bp->instr[0]; + atomic_inc(&bp->ref_count); + } else if (stepped < 0) { + printf("Couldn't single-step %s instruction\n", + (IS_RFID(bp->instr[0])? "rfid": "mtmsrd")); + } + } + } + + insert_cpu_bpts(); + + set_msrd(msr); /* restore interrupt enable */ + + return cmd != 'X'; +} + +int xmon(struct pt_regs *excp) +{ + struct pt_regs regs; + + if (excp == NULL) { + /* Ok, grab regs as they are now. + This won't do a particularily good job because the + prologue has already been executed. + ToDo: We could reach back into the callers save + area to do a better job of representing the + caller's state. + */ + asm volatile ("std 0,0(%0)\n\ + std 1,8(%0)\n\ + std 2,16(%0)\n\ + std 3,24(%0)\n\ + std 4,32(%0)\n\ + std 5,40(%0)\n\ + std 6,48(%0)\n\ + std 7,56(%0)\n\ + std 8,64(%0)\n\ + std 9,72(%0)\n\ + std 10,80(%0)\n\ + std 11,88(%0)\n\ + std 12,96(%0)\n\ + std 13,104(%0)\n\ + std 14,112(%0)\n\ + std 15,120(%0)\n\ + std 16,128(%0)\n\ + std 17,136(%0)\n\ + std 18,144(%0)\n\ + std 19,152(%0)\n\ + std 20,160(%0)\n\ + std 21,168(%0)\n\ + std 22,176(%0)\n\ + std 23,184(%0)\n\ + std 24,192(%0)\n\ + std 25,200(%0)\n\ + std 26,208(%0)\n\ + std 27,216(%0)\n\ + std 28,224(%0)\n\ + std 29,232(%0)\n\ + std 30,240(%0)\n\ + std 31,248(%0)" : : "b" (®s)); + + regs.nip = regs.link = ((unsigned long *)(regs.gpr[1]))[2]; + regs.msr = get_msr(); + regs.ctr = get_ctr(); + regs.xer = get_xer(); + regs.ccr = get_cr(); + regs.trap = 0; + excp = ®s; + } + return xmon_core(excp, 0); +} + +int xmon_bpt(struct pt_regs *regs) +{ + struct bpt *bp; + unsigned long offset; + + if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF)) + return 0; + + /* Are we at the trap at bp->instr[1] for some bp? */ + bp = in_breakpoint_table(regs->nip, &offset); + if (bp != NULL && offset == 4) { + regs->nip = bp->address + 4; + atomic_dec(&bp->ref_count); + return 1; + } + + /* Are we at a breakpoint? */ + bp = at_breakpoint(regs->nip); + if (!bp) + return 0; + + xmon_core(regs, 0); + + return 1; +} + +int xmon_sstep(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + xmon_core(regs, 0); + return 1; +} + +int xmon_dabr_match(struct pt_regs *regs) +{ + if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF)) + return 0; + xmon_core(regs, 0); + return 1; +} + +int xmon_iabr_match(struct pt_regs *regs) +{ + if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF)) + return 0; + if (iabr == 0) + return 0; + xmon_core(regs, 0); + return 1; +} + +int xmon_ipi(struct pt_regs *regs) +{ +#ifdef CONFIG_SMP + if (in_xmon && !cpu_isset(smp_processor_id(), cpus_in_xmon)) + xmon_core(regs, 1); +#endif + return 0; +} + +int xmon_fault_handler(struct pt_regs *regs) +{ + struct bpt *bp; + unsigned long offset; + + if (in_xmon && catch_memory_errors) + handle_fault(regs); /* doesn't return */ + + if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) { + bp = in_breakpoint_table(regs->nip, &offset); + if (bp != NULL) { + regs->nip = bp->address + offset; + atomic_dec(&bp->ref_count); + } + } + + return 0; +} + +/* On systems with a hypervisor, we can't set the DABR + (data address breakpoint register) directly. */ +static void set_controlled_dabr(unsigned long val) +{ +#ifdef CONFIG_PPC_PSERIES + if (systemcfg->platform == PLATFORM_PSERIES_LPAR) { + int rc = plpar_hcall_norets(H_SET_DABR, val); + if (rc != H_Success) + xmon_printf("Warning: setting DABR failed (%d)\n", rc); + } else +#endif + set_dabr(val); +} + +static struct bpt *at_breakpoint(unsigned long pc) +{ + int i; + struct bpt *bp; + + bp = bpts; + for (i = 0; i < NBPTS; ++i, ++bp) + if (bp->enabled && pc == bp->address) + return bp; + return NULL; +} + +static struct bpt *in_breakpoint_table(unsigned long nip, unsigned long *offp) +{ + unsigned long off; + + off = nip - (unsigned long) bpts; + if (off >= sizeof(bpts)) + return NULL; + off %= sizeof(struct bpt); + if (off != offsetof(struct bpt, instr[0]) + && off != offsetof(struct bpt, instr[1])) + return NULL; + *offp = off - offsetof(struct bpt, instr[0]); + return (struct bpt *) (nip - off); +} + +static struct bpt *new_breakpoint(unsigned long a) +{ + struct bpt *bp; + + a &= ~3UL; + bp = at_breakpoint(a); + if (bp) + return bp; + + for (bp = bpts; bp < &bpts[NBPTS]; ++bp) { + if (!bp->enabled && atomic_read(&bp->ref_count) == 0) { + bp->address = a; + bp->instr[1] = bpinstr; + store_inst(&bp->instr[1]); + return bp; + } + } + + printf("Sorry, no free breakpoints. Please clear one first.\n"); + return NULL; +} + +static void insert_bpts(void) +{ + int i; + struct bpt *bp; + + bp = bpts; + for (i = 0; i < NBPTS; ++i, ++bp) { + if ((bp->enabled & (BP_TRAP|BP_IABR)) == 0) + continue; + if (mread(bp->address, &bp->instr[0], 4) != 4) { + printf("Couldn't read instruction at %lx, " + "disabling breakpoint there\n", bp->address); + bp->enabled = 0; + continue; + } + if (IS_MTMSRD(bp->instr[0]) || IS_RFID(bp->instr[0])) { + printf("Breakpoint at %lx is on an mtmsrd or rfid " + "instruction, disabling it\n", bp->address); + bp->enabled = 0; + continue; + } + store_inst(&bp->instr[0]); + if (bp->enabled & BP_IABR) + continue; + if (mwrite(bp->address, &bpinstr, 4) != 4) { + printf("Couldn't write instruction at %lx, " + "disabling breakpoint there\n", bp->address); + bp->enabled &= ~BP_TRAP; + continue; + } + store_inst((void *)bp->address); + } +} + +static void insert_cpu_bpts(void) +{ + if (dabr.enabled) + set_controlled_dabr(dabr.address | (dabr.enabled & 7)); + if (iabr && cpu_has_feature(CPU_FTR_IABR)) + set_iabr(iabr->address + | (iabr->enabled & (BP_IABR|BP_IABR_TE))); +} + +static void remove_bpts(void) +{ + int i; + struct bpt *bp; + unsigned instr; + + bp = bpts; + for (i = 0; i < NBPTS; ++i, ++bp) { + if ((bp->enabled & (BP_TRAP|BP_IABR)) != BP_TRAP) + continue; + if (mread(bp->address, &instr, 4) == 4 + && instr == bpinstr + && mwrite(bp->address, &bp->instr, 4) != 4) + printf("Couldn't remove breakpoint at %lx\n", + bp->address); + else + store_inst((void *)bp->address); + } +} + +static void remove_cpu_bpts(void) +{ + set_controlled_dabr(0); + if (cpu_has_feature(CPU_FTR_IABR)) + set_iabr(0); +} + +/* Command interpreting routine */ +static char *last_cmd; + +static int +cmds(struct pt_regs *excp) +{ + int cmd = 0; + + last_cmd = NULL; + xmon_regs = excp; + for(;;) { +#ifdef CONFIG_SMP + printf("%x:", smp_processor_id()); +#endif /* CONFIG_SMP */ + printf("mon> "); + fflush(stdout); + flush_input(); + termch = 0; + cmd = skipbl(); + if( cmd == '\n' ) { + if (last_cmd == NULL) + continue; + take_input(last_cmd); + last_cmd = NULL; + cmd = inchar(); + } + switch (cmd) { + case 'm': + cmd = inchar(); + switch (cmd) { + case 'm': + case 's': + case 'd': + memops(cmd); + break; + case 'l': + memlocate(); + break; + case 'z': + memzcan(); + break; + case 'i': + show_mem(); + break; + default: + termch = cmd; + memex(); + } + break; + case 'd': + dump(); + break; + case 'l': + symbol_lookup(); + break; + case 'r': + prregs(excp); /* print regs */ + break; + case 'e': + excprint(excp); + break; + case 'S': + super_regs(); + break; + case 't': + backtrace(excp); + break; + case 'f': + cacheflush(); + break; + case 's': + if (do_step(excp)) + return cmd; + break; + case 'x': + case 'X': + case EOF: + return cmd; + case '?': + printf(help_string); + break; + case 'p': + show_state(); + break; + case 'b': + bpt_cmds(); + break; + case 'C': + csum(); + break; + case 'c': + if (cpu_cmd()) + return 0; + break; + case 'z': + bootcmds(); + break; + case 'T': + debug_trace(); + break; + case 'u': + dump_segments(); + break; + default: + printf("Unrecognized command: "); + do { + if (' ' < cmd && cmd <= '~') + putchar(cmd); + else + printf("\\x%x", cmd); + cmd = inchar(); + } while (cmd != '\n'); + printf(" (type ? for help)\n"); + break; + } + } +} + +/* + * Step a single instruction. + * Some instructions we emulate, others we execute with MSR_SE set. + */ +static int do_step(struct pt_regs *regs) +{ + unsigned int instr; + int stepped; + + /* check we are in 64-bit kernel mode, translation enabled */ + if ((regs->msr & (MSR_SF|MSR_PR|MSR_IR)) == (MSR_SF|MSR_IR)) { + if (mread(regs->nip, &instr, 4) == 4) { + stepped = emulate_step(regs, instr); + if (stepped < 0) { + printf("Couldn't single-step %s instruction\n", + (IS_RFID(instr)? "rfid": "mtmsrd")); + return 0; + } + if (stepped > 0) { + regs->trap = 0xd00 | (regs->trap & 1); + printf("stepped to "); + xmon_print_symbol(regs->nip, " ", "\n"); + ppc_inst_dump(regs->nip, 1, 0); + return 0; + } + } + } + regs->msr |= MSR_SE; + return 1; +} + +static void bootcmds(void) +{ + int cmd; + + cmd = inchar(); + if (cmd == 'r') + ppc_md.restart(NULL); + else if (cmd == 'h') + ppc_md.halt(); + else if (cmd == 'p') + ppc_md.power_off(); +} + +static int cpu_cmd(void) +{ +#ifdef CONFIG_SMP + unsigned long cpu; + int timeout; + int count; + + if (!scanhex(&cpu)) { + /* print cpus waiting or in xmon */ + printf("cpus stopped:"); + count = 0; + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + if (cpu_isset(cpu, cpus_in_xmon)) { + if (count == 0) + printf(" %x", cpu); + ++count; + } else { + if (count > 1) + printf("-%x", cpu - 1); + count = 0; + } + } + if (count > 1) + printf("-%x", NR_CPUS - 1); + printf("\n"); + return 0; + } + /* try to switch to cpu specified */ + if (!cpu_isset(cpu, cpus_in_xmon)) { + printf("cpu 0x%x isn't in xmon\n", cpu); + return 0; + } + xmon_taken = 0; + mb(); + xmon_owner = cpu; + timeout = 10000000; + while (!xmon_taken) { + if (--timeout == 0) { + if (test_and_set_bit(0, &xmon_taken)) + break; + /* take control back */ + mb(); + xmon_owner = smp_processor_id(); + printf("cpu %u didn't take control\n", cpu); + return 0; + } + barrier(); + } + return 1; +#else + return 0; +#endif /* CONFIG_SMP */ +} + +static unsigned short fcstab[256] = { + 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, + 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, + 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, + 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, + 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, + 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, + 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, + 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, + 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, + 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, + 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, + 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, + 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, + 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, + 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, + 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, + 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, + 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, + 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, + 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, + 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, + 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, + 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, + 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, + 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, + 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, + 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, + 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, + 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, + 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, + 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, + 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78 +}; + +#define FCS(fcs, c) (((fcs) >> 8) ^ fcstab[((fcs) ^ (c)) & 0xff]) + +static void +csum(void) +{ + unsigned int i; + unsigned short fcs; + unsigned char v; + + if (!scanhex(&adrs)) + return; + if (!scanhex(&ncsum)) + return; + fcs = 0xffff; + for (i = 0; i < ncsum; ++i) { + if (mread(adrs+i, &v, 1) == 0) { + printf("csum stopped at %x\n", adrs+i); + break; + } + fcs = FCS(fcs, v); + } + printf("%x\n", fcs); +} + +/* + * Check if this is a suitable place to put a breakpoint. + */ +static long check_bp_loc(unsigned long addr) +{ + unsigned int instr; + + addr &= ~3; + if (addr < KERNELBASE) { + printf("Breakpoints may only be placed at kernel addresses\n"); + return 0; + } + if (!mread(addr, &instr, sizeof(instr))) { + printf("Can't read instruction at address %lx\n", addr); + return 0; + } + if (IS_MTMSRD(instr) || IS_RFID(instr)) { + printf("Breakpoints may not be placed on mtmsrd or rfid " + "instructions\n"); + return 0; + } + return 1; +} + +static char *breakpoint_help_string = + "Breakpoint command usage:\n" + "b show breakpoints\n" + "b [cnt] set breakpoint at given instr addr\n" + "bc clear all breakpoints\n" + "bc clear breakpoint number n or at addr\n" + "bi [cnt] set hardware instr breakpoint (POWER3/RS64 only)\n" + "bd [cnt] set hardware data breakpoint\n" + ""; + +static void +bpt_cmds(void) +{ + int cmd; + unsigned long a; + int mode, i; + struct bpt *bp; + const char badaddr[] = "Only kernel addresses are permitted " + "for breakpoints\n"; + + cmd = inchar(); + switch (cmd) { + case 'd': /* bd - hardware data breakpoint */ + mode = 7; + cmd = inchar(); + if (cmd == 'r') + mode = 5; + else if (cmd == 'w') + mode = 6; + else + termch = cmd; + dabr.address = 0; + dabr.enabled = 0; + if (scanhex(&dabr.address)) { + if (dabr.address < KERNELBASE) { + printf(badaddr); + break; + } + dabr.address &= ~7; + dabr.enabled = mode | BP_DABR; + } + break; + + case 'i': /* bi - hardware instr breakpoint */ + if (!cpu_has_feature(CPU_FTR_IABR)) { + printf("Hardware instruction breakpoint " + "not supported on this cpu\n"); + break; + } + if (iabr) { + iabr->enabled &= ~(BP_IABR | BP_IABR_TE); + iabr = NULL; + } + if (!scanhex(&a)) + break; + if (!check_bp_loc(a)) + break; + bp = new_breakpoint(a); + if (bp != NULL) { + bp->enabled |= BP_IABR | BP_IABR_TE; + iabr = bp; + } + break; + + case 'c': + if (!scanhex(&a)) { + /* clear all breakpoints */ + for (i = 0; i < NBPTS; ++i) + bpts[i].enabled = 0; + iabr = NULL; + dabr.enabled = 0; + printf("All breakpoints cleared\n"); + break; + } + + if (a <= NBPTS && a >= 1) { + /* assume a breakpoint number */ + bp = &bpts[a-1]; /* bp nums are 1 based */ + } else { + /* assume a breakpoint address */ + bp = at_breakpoint(a); + if (bp == 0) { + printf("No breakpoint at %x\n", a); + break; + } + } + + printf("Cleared breakpoint %x (", BP_NUM(bp)); + xmon_print_symbol(bp->address, " ", ")\n"); + bp->enabled = 0; + break; + + default: + termch = cmd; + cmd = skipbl(); + if (cmd == '?') { + printf(breakpoint_help_string); + break; + } + termch = cmd; + if (!scanhex(&a)) { + /* print all breakpoints */ + printf(" type address\n"); + if (dabr.enabled) { + printf(" data %.16lx [", dabr.address); + if (dabr.enabled & 1) + printf("r"); + if (dabr.enabled & 2) + printf("w"); + printf("]\n"); + } + for (bp = bpts; bp < &bpts[NBPTS]; ++bp) { + if (!bp->enabled) + continue; + printf("%2x %s ", BP_NUM(bp), + (bp->enabled & BP_IABR)? "inst": "trap"); + xmon_print_symbol(bp->address, " ", "\n"); + } + break; + } + + if (!check_bp_loc(a)) + break; + bp = new_breakpoint(a); + if (bp != NULL) + bp->enabled |= BP_TRAP; + break; + } +} + +/* Very cheap human name for vector lookup. */ +static +const char *getvecname(unsigned long vec) +{ + char *ret; + + switch (vec) { + case 0x100: ret = "(System Reset)"; break; + case 0x200: ret = "(Machine Check)"; break; + case 0x300: ret = "(Data Access)"; break; + case 0x380: ret = "(Data SLB Access)"; break; + case 0x400: ret = "(Instruction Access)"; break; + case 0x480: ret = "(Instruction SLB Access)"; break; + case 0x500: ret = "(Hardware Interrupt)"; break; + case 0x600: ret = "(Alignment)"; break; + case 0x700: ret = "(Program Check)"; break; + case 0x800: ret = "(FPU Unavailable)"; break; + case 0x900: ret = "(Decrementer)"; break; + case 0xc00: ret = "(System Call)"; break; + case 0xd00: ret = "(Single Step)"; break; + case 0xf00: ret = "(Performance Monitor)"; break; + case 0xf20: ret = "(Altivec Unavailable)"; break; + case 0x1300: ret = "(Instruction Breakpoint)"; break; + default: ret = ""; + } + return ret; +} + +static void get_function_bounds(unsigned long pc, unsigned long *startp, + unsigned long *endp) +{ + unsigned long size, offset; + const char *name; + char *modname; + + *startp = *endp = 0; + if (pc == 0) + return; + if (setjmp(bus_error_jmp) == 0) { + catch_memory_errors = 1; + sync(); + name = kallsyms_lookup(pc, &size, &offset, &modname, tmpstr); + if (name != NULL) { + *startp = pc - offset; + *endp = pc - offset + size; + } + sync(); + } + catch_memory_errors = 0; +} + +static int xmon_depth_to_print = 64; + +static void xmon_show_stack(unsigned long sp, unsigned long lr, + unsigned long pc) +{ + unsigned long ip; + unsigned long newsp; + unsigned long marker; + int count = 0; + struct pt_regs regs; + + do { + if (sp < PAGE_OFFSET) { + if (sp != 0) + printf("SP (%lx) is in userspace\n", sp); + break; + } + + if (!mread(sp + 16, &ip, sizeof(unsigned long)) + || !mread(sp, &newsp, sizeof(unsigned long))) { + printf("Couldn't read stack frame at %lx\n", sp); + break; + } + + /* + * For the first stack frame, try to work out if + * LR and/or the saved LR value in the bottommost + * stack frame are valid. + */ + if ((pc | lr) != 0) { + unsigned long fnstart, fnend; + unsigned long nextip; + int printip = 1; + + get_function_bounds(pc, &fnstart, &fnend); + nextip = 0; + if (newsp > sp) + mread(newsp + 16, &nextip, + sizeof(unsigned long)); + if (lr == ip) { + if (lr < PAGE_OFFSET + || (fnstart <= lr && lr < fnend)) + printip = 0; + } else if (lr == nextip) { + printip = 0; + } else if (lr >= PAGE_OFFSET + && !(fnstart <= lr && lr < fnend)) { + printf("[link register ] "); + xmon_print_symbol(lr, " ", "\n"); + } + if (printip) { + printf("[%.16lx] ", sp); + xmon_print_symbol(ip, " ", " (unreliable)\n"); + } + pc = lr = 0; + + } else { + printf("[%.16lx] ", sp); + xmon_print_symbol(ip, " ", "\n"); + } + + /* Look for "regshere" marker to see if this is + an exception frame. */ + if (mread(sp + 0x60, &marker, sizeof(unsigned long)) + && marker == 0x7265677368657265ul) { + if (mread(sp + 0x70, ®s, sizeof(regs)) + != sizeof(regs)) { + printf("Couldn't read registers at %lx\n", + sp + 0x70); + break; + } + printf("--- Exception: %lx %s at ", regs.trap, + getvecname(TRAP(®s))); + pc = regs.nip; + lr = regs.link; + xmon_print_symbol(pc, " ", "\n"); + } + + if (newsp == 0) + break; + + sp = newsp; + } while (count++ < xmon_depth_to_print); +} + +static void backtrace(struct pt_regs *excp) +{ + unsigned long sp; + + if (scanhex(&sp)) + xmon_show_stack(sp, 0, 0); + else + xmon_show_stack(excp->gpr[1], excp->link, excp->nip); + scannl(); +} + +static void print_bug_trap(struct pt_regs *regs) +{ + struct bug_entry *bug; + unsigned long addr; + + if (regs->msr & MSR_PR) + return; /* not in kernel */ + addr = regs->nip; /* address of trap instruction */ + if (addr < PAGE_OFFSET) + return; + bug = find_bug(regs->nip); + if (bug == NULL) + return; + if (bug->line & BUG_WARNING_TRAP) + return; + + printf("kernel BUG in %s at %s:%d!\n", + bug->function, bug->file, (unsigned int)bug->line); +} + +void excprint(struct pt_regs *fp) +{ + unsigned long trap; + +#ifdef CONFIG_SMP + printf("cpu 0x%x: ", smp_processor_id()); +#endif /* CONFIG_SMP */ + + trap = TRAP(fp); + printf("Vector: %lx %s at [%lx]\n", fp->trap, getvecname(trap), fp); + printf(" pc: "); + xmon_print_symbol(fp->nip, ": ", "\n"); + + printf(" lr: ", fp->link); + xmon_print_symbol(fp->link, ": ", "\n"); + + printf(" sp: %lx\n", fp->gpr[1]); + printf(" msr: %lx\n", fp->msr); + + if (trap == 0x300 || trap == 0x380 || trap == 0x600) { + printf(" dar: %lx\n", fp->dar); + if (trap != 0x380) + printf(" dsisr: %lx\n", fp->dsisr); + } + + printf(" current = 0x%lx\n", current); + printf(" paca = 0x%lx\n", get_paca()); + if (current) { + printf(" pid = %ld, comm = %s\n", + current->pid, current->comm); + } + + if (trap == 0x700) + print_bug_trap(fp); +} + +void prregs(struct pt_regs *fp) +{ + int n; + unsigned long base; + struct pt_regs regs; + + if (scanhex(&base)) { + if (setjmp(bus_error_jmp) == 0) { + catch_memory_errors = 1; + sync(); + regs = *(struct pt_regs *)base; + sync(); + __delay(200); + } else { + catch_memory_errors = 0; + printf("*** Error reading registers from %.16lx\n", + base); + return; + } + catch_memory_errors = 0; + fp = ®s; + } + + if (FULL_REGS(fp)) { + for (n = 0; n < 16; ++n) + printf("R%.2ld = %.16lx R%.2ld = %.16lx\n", + n, fp->gpr[n], n+16, fp->gpr[n+16]); + } else { + for (n = 0; n < 7; ++n) + printf("R%.2ld = %.16lx R%.2ld = %.16lx\n", + n, fp->gpr[n], n+7, fp->gpr[n+7]); + } + printf("pc = "); + xmon_print_symbol(fp->nip, " ", "\n"); + printf("lr = "); + xmon_print_symbol(fp->link, " ", "\n"); + printf("msr = %.16lx cr = %.8lx\n", fp->msr, fp->ccr); + printf("ctr = %.16lx xer = %.16lx trap = %8lx\n", + fp->ctr, fp->xer, fp->trap); +} + +void cacheflush(void) +{ + int cmd; + unsigned long nflush; + + cmd = inchar(); + if (cmd != 'i') + termch = cmd; + scanhex((void *)&adrs); + if (termch != '\n') + termch = 0; + nflush = 1; + scanhex(&nflush); + nflush = (nflush + L1_CACHE_BYTES - 1) / L1_CACHE_BYTES; + if (setjmp(bus_error_jmp) == 0) { + catch_memory_errors = 1; + sync(); + + if (cmd != 'i') { + for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES) + cflush((void *) adrs); + } else { + for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES) + cinval((void *) adrs); + } + sync(); + /* wait a little while to see if we get a machine check */ + __delay(200); + } + catch_memory_errors = 0; +} + +unsigned long +read_spr(int n) +{ + unsigned int instrs[2]; + unsigned long (*code)(void); + unsigned long opd[3]; + unsigned long ret = -1UL; + + instrs[0] = 0x7c6002a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6); + instrs[1] = 0x4e800020; + opd[0] = (unsigned long)instrs; + opd[1] = 0; + opd[2] = 0; + store_inst(instrs); + store_inst(instrs+1); + code = (unsigned long (*)(void)) opd; + + if (setjmp(bus_error_jmp) == 0) { + catch_memory_errors = 1; + sync(); + + ret = code(); + + sync(); + /* wait a little while to see if we get a machine check */ + __delay(200); + n = size; + } + + return ret; +} + +void +write_spr(int n, unsigned long val) +{ + unsigned int instrs[2]; + unsigned long (*code)(unsigned long); + unsigned long opd[3]; + + instrs[0] = 0x7c6003a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6); + instrs[1] = 0x4e800020; + opd[0] = (unsigned long)instrs; + opd[1] = 0; + opd[2] = 0; + store_inst(instrs); + store_inst(instrs+1); + code = (unsigned long (*)(unsigned long)) opd; + + if (setjmp(bus_error_jmp) == 0) { + catch_memory_errors = 1; + sync(); + + code(val); + + sync(); + /* wait a little while to see if we get a machine check */ + __delay(200); + n = size; + } +} + +static unsigned long regno; +extern char exc_prolog; +extern char dec_exc; + +void +super_regs(void) +{ + int cmd; + unsigned long val; +#ifdef CONFIG_PPC_ISERIES + struct paca_struct *ptrPaca = NULL; + struct lppaca *ptrLpPaca = NULL; + struct ItLpRegSave *ptrLpRegSave = NULL; +#endif + + cmd = skipbl(); + if (cmd == '\n') { + unsigned long sp, toc; + asm("mr %0,1" : "=r" (sp) :); + asm("mr %0,2" : "=r" (toc) :); + + printf("msr = %.16lx sprg0= %.16lx\n", get_msr(), get_sprg0()); + printf("pvr = %.16lx sprg1= %.16lx\n", get_pvr(), get_sprg1()); + printf("dec = %.16lx sprg2= %.16lx\n", get_dec(), get_sprg2()); + printf("sp = %.16lx sprg3= %.16lx\n", sp, get_sprg3()); + printf("toc = %.16lx dar = %.16lx\n", toc, get_dar()); + printf("srr0 = %.16lx srr1 = %.16lx\n", get_srr0(), get_srr1()); +#ifdef CONFIG_PPC_ISERIES + // Dump out relevant Paca data areas. + printf("Paca: \n"); + ptrPaca = get_paca(); + + printf(" Local Processor Control Area (LpPaca): \n"); + ptrLpPaca = ptrPaca->lppaca_ptr; + printf(" Saved Srr0=%.16lx Saved Srr1=%.16lx \n", + ptrLpPaca->saved_srr0, ptrLpPaca->saved_srr1); + printf(" Saved Gpr3=%.16lx Saved Gpr4=%.16lx \n", + ptrLpPaca->saved_gpr3, ptrLpPaca->saved_gpr4); + printf(" Saved Gpr5=%.16lx \n", ptrLpPaca->saved_gpr5); + + printf(" Local Processor Register Save Area (LpRegSave): \n"); + ptrLpRegSave = ptrPaca->reg_save_ptr; + printf(" Saved Sprg0=%.16lx Saved Sprg1=%.16lx \n", + ptrLpRegSave->xSPRG0, ptrLpRegSave->xSPRG0); + printf(" Saved Sprg2=%.16lx Saved Sprg3=%.16lx \n", + ptrLpRegSave->xSPRG2, ptrLpRegSave->xSPRG3); + printf(" Saved Msr =%.16lx Saved Nia =%.16lx \n", + ptrLpRegSave->xMSR, ptrLpRegSave->xNIA); +#endif + + return; + } + + scanhex(®no); + switch (cmd) { + case 'w': + val = read_spr(regno); + scanhex(&val); + write_spr(regno, val); + /* fall through */ + case 'r': + printf("spr %lx = %lx\n", regno, read_spr(regno)); + break; + case 'm': + val = get_msr(); + scanhex(&val); + set_msrd(val); + break; + } + scannl(); +} + +/* + * Stuff for reading and writing memory safely + */ +int +mread(unsigned long adrs, void *buf, int size) +{ + volatile int n; + char *p, *q; + + n = 0; + if (setjmp(bus_error_jmp) == 0) { + catch_memory_errors = 1; + sync(); + p = (char *)adrs; + q = (char *)buf; + switch (size) { + case 2: + *(short *)q = *(short *)p; + break; + case 4: + *(int *)q = *(int *)p; + break; + case 8: + *(long *)q = *(long *)p; + break; + default: + for( ; n < size; ++n) { + *q++ = *p++; + sync(); + } + } + sync(); + /* wait a little while to see if we get a machine check */ + __delay(200); + n = size; + } + catch_memory_errors = 0; + return n; +} + +int +mwrite(unsigned long adrs, void *buf, int size) +{ + volatile int n; + char *p, *q; + + n = 0; + if (setjmp(bus_error_jmp) == 0) { + catch_memory_errors = 1; + sync(); + p = (char *) adrs; + q = (char *) buf; + switch (size) { + case 2: + *(short *)p = *(short *)q; + break; + case 4: + *(int *)p = *(int *)q; + break; + case 8: + *(long *)p = *(long *)q; + break; + default: + for ( ; n < size; ++n) { + *p++ = *q++; + sync(); + } + } + sync(); + /* wait a little while to see if we get a machine check */ + __delay(200); + n = size; + } else { + printf("*** Error writing address %x\n", adrs + n); + } + catch_memory_errors = 0; + return n; +} + +static int fault_type; +static char *fault_chars[] = { "--", "**", "##" }; + +static int +handle_fault(struct pt_regs *regs) +{ + switch (TRAP(regs)) { + case 0x200: + fault_type = 0; + break; + case 0x300: + case 0x380: + fault_type = 1; + break; + default: + fault_type = 2; + } + + longjmp(bus_error_jmp, 1); + + return 0; +} + +#define SWAP(a, b, t) ((t) = (a), (a) = (b), (b) = (t)) + +void +byterev(unsigned char *val, int size) +{ + int t; + + switch (size) { + case 2: + SWAP(val[0], val[1], t); + break; + case 4: + SWAP(val[0], val[3], t); + SWAP(val[1], val[2], t); + break; + case 8: /* is there really any use for this? */ + SWAP(val[0], val[7], t); + SWAP(val[1], val[6], t); + SWAP(val[2], val[5], t); + SWAP(val[3], val[4], t); + break; + } +} + +static int brev; +static int mnoread; + +static char *memex_help_string = + "Memory examine command usage:\n" + "m [addr] [flags] examine/change memory\n" + " addr is optional. will start where left off.\n" + " flags may include chars from this set:\n" + " b modify by bytes (default)\n" + " w modify by words (2 byte)\n" + " l modify by longs (4 byte)\n" + " d modify by doubleword (8 byte)\n" + " r toggle reverse byte order mode\n" + " n do not read memory (for i/o spaces)\n" + " . ok to read (default)\n" + "NOTE: flags are saved as defaults\n" + ""; + +static char *memex_subcmd_help_string = + "Memory examine subcommands:\n" + " hexval write this val to current location\n" + " 'string' write chars from string to this location\n" + " ' increment address\n" + " ^ decrement address\n" + " / increment addr by 0x10. //=0x100, ///=0x1000, etc\n" + " \\ decrement addr by 0x10. \\\\=0x100, \\\\\\=0x1000, etc\n" + " ` clear no-read flag\n" + " ; stay at this addr\n" + " v change to byte mode\n" + " w change to word (2 byte) mode\n" + " l change to long (4 byte) mode\n" + " u change to doubleword (8 byte) mode\n" + " m addr change current addr\n" + " n toggle no-read flag\n" + " r toggle byte reverse flag\n" + " < count back up count bytes\n" + " > count skip forward count bytes\n" + " x exit this mode\n" + ""; + +void +memex(void) +{ + int cmd, inc, i, nslash; + unsigned long n; + unsigned char val[16]; + + scanhex((void *)&adrs); + cmd = skipbl(); + if (cmd == '?') { + printf(memex_help_string); + return; + } else { + termch = cmd; + } + last_cmd = "m\n"; + while ((cmd = skipbl()) != '\n') { + switch( cmd ){ + case 'b': size = 1; break; + case 'w': size = 2; break; + case 'l': size = 4; break; + case 'd': size = 8; break; + case 'r': brev = !brev; break; + case 'n': mnoread = 1; break; + case '.': mnoread = 0; break; + } + } + if( size <= 0 ) + size = 1; + else if( size > 8 ) + size = 8; + for(;;){ + if (!mnoread) + n = mread(adrs, val, size); + printf("%.16x%c", adrs, brev? 'r': ' '); + if (!mnoread) { + if (brev) + byterev(val, size); + putchar(' '); + for (i = 0; i < n; ++i) + printf("%.2x", val[i]); + for (; i < size; ++i) + printf("%s", fault_chars[fault_type]); + } + putchar(' '); + inc = size; + nslash = 0; + for(;;){ + if( scanhex(&n) ){ + for (i = 0; i < size; ++i) + val[i] = n >> (i * 8); + if (!brev) + byterev(val, size); + mwrite(adrs, val, size); + inc = size; + } + cmd = skipbl(); + if (cmd == '\n') + break; + inc = 0; + switch (cmd) { + case '\'': + for(;;){ + n = inchar(); + if( n == '\\' ) + n = bsesc(); + else if( n == '\'' ) + break; + for (i = 0; i < size; ++i) + val[i] = n >> (i * 8); + if (!brev) + byterev(val, size); + mwrite(adrs, val, size); + adrs += size; + } + adrs -= size; + inc = size; + break; + case ',': + adrs += size; + break; + case '.': + mnoread = 0; + break; + case ';': + break; + case 'x': + case EOF: + scannl(); + return; + case 'b': + case 'v': + size = 1; + break; + case 'w': + size = 2; + break; + case 'l': + size = 4; + break; + case 'u': + size = 8; + break; + case '^': + adrs -= size; + break; + break; + case '/': + if (nslash > 0) + adrs -= 1 << nslash; + else + nslash = 0; + nslash += 4; + adrs += 1 << nslash; + break; + case '\\': + if (nslash < 0) + adrs += 1 << -nslash; + else + nslash = 0; + nslash -= 4; + adrs -= 1 << -nslash; + break; + case 'm': + scanhex((void *)&adrs); + break; + case 'n': + mnoread = 1; + break; + case 'r': + brev = !brev; + break; + case '<': + n = size; + scanhex(&n); + adrs -= n; + break; + case '>': + n = size; + scanhex(&n); + adrs += n; + break; + case '?': + printf(memex_subcmd_help_string); + break; + } + } + adrs += inc; + } +} + +int +bsesc(void) +{ + int c; + + c = inchar(); + switch( c ){ + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 'b': c = '\b'; break; + case 't': c = '\t'; break; + } + return c; +} + +#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ + || ('a' <= (c) && (c) <= 'f') \ + || ('A' <= (c) && (c) <= 'F')) +void +dump(void) +{ + int c; + + c = inchar(); + if ((isxdigit(c) && c != 'f' && c != 'd') || c == '\n') + termch = c; + scanhex((void *)&adrs); + if (termch != '\n') + termch = 0; + if (c == 'i') { + scanhex(&nidump); + if (nidump == 0) + nidump = 16; + else if (nidump > MAX_DUMP) + nidump = MAX_DUMP; + adrs += ppc_inst_dump(adrs, nidump, 1); + last_cmd = "di\n"; + } else { + scanhex(&ndump); + if (ndump == 0) + ndump = 64; + else if (ndump > MAX_DUMP) + ndump = MAX_DUMP; + prdump(adrs, ndump); + adrs += ndump; + last_cmd = "d\n"; + } +} + +void +prdump(unsigned long adrs, long ndump) +{ + long n, m, c, r, nr; + unsigned char temp[16]; + + for (n = ndump; n > 0;) { + printf("%.16lx", adrs); + putchar(' '); + r = n < 16? n: 16; + nr = mread(adrs, temp, r); + adrs += nr; + for (m = 0; m < r; ++m) { + if ((m & 7) == 0 && m > 0) + putchar(' '); + if (m < nr) + printf("%.2x", temp[m]); + else + printf("%s", fault_chars[fault_type]); + } + if (m <= 8) + printf(" "); + for (; m < 16; ++m) + printf(" "); + printf(" |"); + for (m = 0; m < r; ++m) { + if (m < nr) { + c = temp[m]; + putchar(' ' <= c && c <= '~'? c: '.'); + } else + putchar(' '); + } + n -= r; + for (; m < 16; ++m) + putchar(' '); + printf("|\n"); + if (nr < r) + break; + } +} + +int +ppc_inst_dump(unsigned long adr, long count, int praddr) +{ + int nr, dotted; + unsigned long first_adr; + unsigned long inst, last_inst = 0; + unsigned char val[4]; + + dotted = 0; + for (first_adr = adr; count > 0; --count, adr += 4) { + nr = mread(adr, val, 4); + if (nr == 0) { + if (praddr) { + const char *x = fault_chars[fault_type]; + printf("%.16lx %s%s%s%s\n", adr, x, x, x, x); + } + break; + } + inst = GETWORD(val); + if (adr > first_adr && inst == last_inst) { + if (!dotted) { + printf(" ...\n"); + dotted = 1; + } + continue; + } + dotted = 0; + last_inst = inst; + if (praddr) + printf("%.16lx %.8x", adr, inst); + printf("\t"); + print_insn_powerpc(inst, adr, 0); /* always returns 4 */ + printf("\n"); + } + return adr - first_adr; +} + +void +print_address(unsigned long addr) +{ + xmon_print_symbol(addr, "\t# ", ""); +} + + +/* + * Memory operations - move, set, print differences + */ +static unsigned long mdest; /* destination address */ +static unsigned long msrc; /* source address */ +static unsigned long mval; /* byte value to set memory to */ +static unsigned long mcount; /* # bytes to affect */ +static unsigned long mdiffs; /* max # differences to print */ + +void +memops(int cmd) +{ + scanhex((void *)&mdest); + if( termch != '\n' ) + termch = 0; + scanhex((void *)(cmd == 's'? &mval: &msrc)); + if( termch != '\n' ) + termch = 0; + scanhex((void *)&mcount); + switch( cmd ){ + case 'm': + memmove((void *)mdest, (void *)msrc, mcount); + break; + case 's': + memset((void *)mdest, mval, mcount); + break; + case 'd': + if( termch != '\n' ) + termch = 0; + scanhex((void *)&mdiffs); + memdiffs((unsigned char *)mdest, (unsigned char *)msrc, mcount, mdiffs); + break; + } +} + +void +memdiffs(unsigned char *p1, unsigned char *p2, unsigned nb, unsigned maxpr) +{ + unsigned n, prt; + + prt = 0; + for( n = nb; n > 0; --n ) + if( *p1++ != *p2++ ) + if( ++prt <= maxpr ) + printf("%.16x %.2x # %.16x %.2x\n", p1 - 1, + p1[-1], p2 - 1, p2[-1]); + if( prt > maxpr ) + printf("Total of %d differences\n", prt); +} + +static unsigned mend; +static unsigned mask; + +void +memlocate(void) +{ + unsigned a, n; + unsigned char val[4]; + + last_cmd = "ml"; + scanhex((void *)&mdest); + if (termch != '\n') { + termch = 0; + scanhex((void *)&mend); + if (termch != '\n') { + termch = 0; + scanhex((void *)&mval); + mask = ~0; + if (termch != '\n') termch = 0; + scanhex((void *)&mask); + } + } + n = 0; + for (a = mdest; a < mend; a += 4) { + if (mread(a, val, 4) == 4 + && ((GETWORD(val) ^ mval) & mask) == 0) { + printf("%.16x: %.16x\n", a, GETWORD(val)); + if (++n >= 10) + break; + } + } +} + +static unsigned long mskip = 0x1000; +static unsigned long mlim = 0xffffffff; + +void +memzcan(void) +{ + unsigned char v; + unsigned a; + int ok, ook; + + scanhex(&mdest); + if (termch != '\n') termch = 0; + scanhex(&mskip); + if (termch != '\n') termch = 0; + scanhex(&mlim); + ook = 0; + for (a = mdest; a < mlim; a += mskip) { + ok = mread(a, &v, 1); + if (ok && !ook) { + printf("%.8x .. ", a); + fflush(stdout); + } else if (!ok && ook) + printf("%.8x\n", a - mskip); + ook = ok; + if (a + mskip < a) + break; + } + if (ook) + printf("%.8x\n", a - mskip); +} + +/* Input scanning routines */ +int +skipbl(void) +{ + int c; + + if( termch != 0 ){ + c = termch; + termch = 0; + } else + c = inchar(); + while( c == ' ' || c == '\t' ) + c = inchar(); + return c; +} + +#define N_PTREGS 44 +static char *regnames[N_PTREGS] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", + "pc", "msr", "or3", "ctr", "lr", "xer", "ccr", "softe", + "trap", "dar", "dsisr", "res" +}; + +int +scanhex(unsigned long *vp) +{ + int c, d; + unsigned long v; + + c = skipbl(); + if (c == '%') { + /* parse register name */ + char regname[8]; + int i; + + for (i = 0; i < sizeof(regname) - 1; ++i) { + c = inchar(); + if (!isalnum(c)) { + termch = c; + break; + } + regname[i] = c; + } + regname[i] = 0; + for (i = 0; i < N_PTREGS; ++i) { + if (strcmp(regnames[i], regname) == 0) { + if (xmon_regs == NULL) { + printf("regs not available\n"); + return 0; + } + *vp = ((unsigned long *)xmon_regs)[i]; + return 1; + } + } + printf("invalid register name '%%%s'\n", regname); + return 0; + } + + /* skip leading "0x" if any */ + + if (c == '0') { + c = inchar(); + if (c == 'x') { + c = inchar(); + } else { + d = hexdigit(c); + if (d == EOF) { + termch = c; + *vp = 0; + return 1; + } + } + } else if (c == '$') { + int i; + for (i=0; i<63; i++) { + c = inchar(); + if (isspace(c)) { + termch = c; + break; + } + tmpstr[i] = c; + } + tmpstr[i++] = 0; + *vp = kallsyms_lookup_name(tmpstr); + if (!(*vp)) { + printf("unknown symbol '%s'\n", tmpstr); + return 0; + } + return 1; + } + + d = hexdigit(c); + if (d == EOF) { + termch = c; + return 0; + } + v = 0; + do { + v = (v << 4) + d; + c = inchar(); + d = hexdigit(c); + } while (d != EOF); + termch = c; + *vp = v; + return 1; +} + +void +scannl(void) +{ + int c; + + c = termch; + termch = 0; + while( c != '\n' ) + c = inchar(); +} + +int +hexdigit(int c) +{ + if( '0' <= c && c <= '9' ) + return c - '0'; + if( 'A' <= c && c <= 'F' ) + return c - ('A' - 10); + if( 'a' <= c && c <= 'f' ) + return c - ('a' - 10); + return EOF; +} + +void +getstring(char *s, int size) +{ + int c; + + c = skipbl(); + do { + if( size > 1 ){ + *s++ = c; + --size; + } + c = inchar(); + } while( c != ' ' && c != '\t' && c != '\n' ); + termch = c; + *s = 0; +} + +static char line[256]; +static char *lineptr; + +void +flush_input(void) +{ + lineptr = NULL; +} + +int +inchar(void) +{ + if (lineptr == NULL || *lineptr == 0) { + if (fgets(line, sizeof(line), stdin) == NULL) { + lineptr = NULL; + return EOF; + } + lineptr = line; + } + return *lineptr++; +} + +void +take_input(char *str) +{ + lineptr = str; +} + + +static void +symbol_lookup(void) +{ + int type = inchar(); + unsigned long addr; + static char tmp[64]; + + switch (type) { + case 'a': + if (scanhex(&addr)) + xmon_print_symbol(addr, ": ", "\n"); + termch = 0; + break; + case 's': + getstring(tmp, 64); + if (setjmp(bus_error_jmp) == 0) { + catch_memory_errors = 1; + sync(); + addr = kallsyms_lookup_name(tmp); + if (addr) + printf("%s: %lx\n", tmp, addr); + else + printf("Symbol '%s' not found.\n", tmp); + sync(); + } + catch_memory_errors = 0; + termch = 0; + break; + } +} + + +/* Print an address in numeric and symbolic form (if possible) */ +static void xmon_print_symbol(unsigned long address, const char *mid, + const char *after) +{ + char *modname; + const char *name = NULL; + unsigned long offset, size; + + printf("%.16lx", address); + if (setjmp(bus_error_jmp) == 0) { + catch_memory_errors = 1; + sync(); + name = kallsyms_lookup(address, &size, &offset, &modname, + tmpstr); + sync(); + /* wait a little while to see if we get a machine check */ + __delay(200); + } + + catch_memory_errors = 0; + + if (name) { + printf("%s%s+%#lx/%#lx", mid, name, offset, size); + if (modname) + printf(" [%s]", modname); + } + printf("%s", after); +} + +static void debug_trace(void) +{ + unsigned long val, cmd, on; + + cmd = skipbl(); + if (cmd == '\n') { + /* show current state */ + unsigned long i; + printf("ppc64_debug_switch = 0x%lx\n", ppc64_debug_switch); + for (i = 0; i < PPCDBG_NUM_FLAGS ;i++) { + on = PPCDBG_BITVAL(i) & ppc64_debug_switch; + printf("%02x %s %12s ", i, on ? "on " : "off", trace_names[i] ? trace_names[i] : ""); + if (((i+1) % 3) == 0) + printf("\n"); + } + printf("\n"); + return; + } + while (cmd != '\n') { + on = 1; /* default if no sign given */ + while (cmd == '+' || cmd == '-') { + on = (cmd == '+'); + cmd = inchar(); + if (cmd == ' ' || cmd == '\n') { /* Turn on or off based on + or - */ + ppc64_debug_switch = on ? PPCDBG_ALL:PPCDBG_NONE; + printf("Setting all values to %s...\n", on ? "on" : "off"); + if (cmd == '\n') return; + else cmd = skipbl(); + } + else + termch = cmd; + } + termch = cmd; /* not +/- ... let scanhex see it */ + scanhex((void *)&val); + if (val >= 64) { + printf("Value %x out of range:\n", val); + return; + } + if (on) { + ppc64_debug_switch |= PPCDBG_BITVAL(val); + printf("enable debug %x %s\n", val, trace_names[val] ? trace_names[val] : ""); + } else { + ppc64_debug_switch &= ~PPCDBG_BITVAL(val); + printf("disable debug %x %s\n", val, trace_names[val] ? trace_names[val] : ""); + } + cmd = skipbl(); + } +} + +static void dump_slb(void) +{ + int i; + unsigned long tmp; + + printf("SLB contents of cpu %x\n", smp_processor_id()); + + for (i = 0; i < SLB_NUM_ENTRIES; i++) { + asm volatile("slbmfee %0,%1" : "=r" (tmp) : "r" (i)); + printf("%02d %016lx ", i, tmp); + + asm volatile("slbmfev %0,%1" : "=r" (tmp) : "r" (i)); + printf("%016lx\n", tmp); + } +} + +static void dump_stab(void) +{ + int i; + unsigned long *tmp = (unsigned long *)get_paca()->stab_addr; + + printf("Segment table contents of cpu %x\n", smp_processor_id()); + + for (i = 0; i < PAGE_SIZE/16; i++) { + unsigned long a, b; + + a = *tmp++; + b = *tmp++; + + if (a || b) { + printf("%03d %016lx ", i, a); + printf("%016lx\n", b); + } + } +} + +void xmon_init(void) +{ + __debugger = xmon; + __debugger_ipi = xmon_ipi; + __debugger_bpt = xmon_bpt; + __debugger_sstep = xmon_sstep; + __debugger_iabr_match = xmon_iabr_match; + __debugger_dabr_match = xmon_dabr_match; + __debugger_fault_handler = xmon_fault_handler; +} + +void dump_segments(void) +{ + if (cpu_has_feature(CPU_FTR_SLB)) + dump_slb(); + else + dump_stab(); +} -- cgit v1.2.3-70-g09d2